大神们帮我看看,这是请高手写的
我要下载,的图片顺序,跟显示的图片顺序不同发现查看网页的元素普通,有些是正常的,有些就是第一张和最后一张是对调了的
高手们有办法解决吗,谢谢
http://2.
http://2.
http://2.
------------------------------------------------------
程序代码:
from selenium import webdriver from selenium. import By import requests import time import pandas as pd import os driver = webdriver.Chrome() driver.implicitly_wait(5) driver.maximize_window() df = pd.read_excel('下载网址.xlsx', header=None) base_url = 'https://market.m.' for i in range(df.shape[0]): id = df[8][i].split('=')[1] url = base_url + str(id) folder_path = "D:/python/" + df[0][i] if not os.path.exists(folder_path): os.makedirs(folder_path) driver.get(url) time.sleep(5) # 等待 5 秒,等页面完全加载完成 texts = driver.find_elements(By.XPATH, '//*[@id="root"]/div/div[3]/div[1]/div[2]/div[2]//span') text_list = [] for k in range(len(texts)): text = texts[k].text text_list.append(text) df.iloc[i, 1] = ' '.join(text_list) imag = driver.find_elements(By.XPATH, '//*[@id="root"]/div/div[3]/div[1]/div[1]/div//img') img_list = [] for g in range(len(imag)): src = imag[g].get_attribute("src") if src not in img_list: img_list.append(src) j = 1 for i in img_list: response = requests.get(i) with open(folder_path + '/{}.jpg'.format(j), 'wb') as f: f.write(response.content) j += 1 pass df.to_excel('下载网址.xlsx', index=False, header=False)