#2
广水一中肖聪2023-07-01 15:47
程序代码: import pandas as pd from selenium import webdriver from selenium. import By import time import random import ddddocr url = "http://58.49.47.121:81/n_score/index.jsp" columns=['姓名','身份证号','高考报名号','准考证号','语文','数学','英语','物理','化学','生物','总分'] #读取考生信息 df=pd.read_excel('info.xls') i=0 #按行数进行循环,df.shape[0]表示df的总行数,不包括表头 while i< df.shape[0]: gaokaobaominghao=df.iat[i,0] #没有进行字符转换时总报错,转换后顺利通过 gaokaobaominghao=str(gaokaobaominghao) name=df.iat[i,1] shenfenzhenghao=df.iat[i,2] #用Firefox浏览器打开url driver = webdriver.Firefox() driver.get(url) #窗口最大化 driver.maximize_window() #查找验证码位置 img=driver.find_element(By.ID, "img_randCode") #对验证码进行截屏并保存为图片 img.screenshot('yzm.png') #调用ddddocr对图片进行文字识别 ocr=ddddocr.DdddOcr() with open("yzm.png",'rb') as f: image=f.read() res=ocr.classification(image) #如果验证码图片识别不对,跳出本次循环 if len(res) != 4: continue; #使用driver.find_element找到需要输入数据的文本框,并输入对应文本 driver.find_element(By.ID, "gkbmh").send_keys(gaokaobaominghao) driver.find_element(By.ID, "sfzh").send_keys(shenfenzhenghao) driver.find_element(By.ID, "yzm").send_keys(res) #点击查询按钮 driver.find_element(By.ID, "cx").click() #找到查询结果,并用相应的变量存储数据 xm=driver.find_element(By.ID, "result_xm").text sfzh=driver.find_element(By.ID, "result_sfzh").text gkbmh=driver.find_element(By.ID, "result_zkzh").text zkzh=driver.find_element(By.ID, "result_gkbmh").text #km1=driver.find_element(By.ID, "result_mc1").text #km2=driver.find_element(By.ID, "result_mc2").text #km3=driver.find_element(By.ID, "result_mc3").text #km4=driver.find_element(By.ID, "result_mc4").text #km5=driver.find_element(By.ID, "result_mc5").text #km6=driver.find_element(By.ID, "result_mc6").text score1=driver.find_element(By.ID, "result_score1").text score2=driver.find_element(By.ID, "result_score2").text score3=driver.find_element(By.ID, "result_score3").text score4=driver.find_element(By.ID, "result_score4").text score5=driver.find_element(By.ID, "result_score5").text score6=driver.find_element(By.ID, "result_score6").text totalscore=driver.find_element(By.ID, "result_totalscore").text #将数据存储在data[i]列表 data[i]=[xm,sfzh,gkbmh,zkzh,score1.score2,score3,score4,score5,score6,totalscore] i+=1 #关闭浏览器 driver.quit() #创建新表格 df.DataFrame(data=data,columns=columns) df_merge.to_excel('result.xlsx') |
问题简要描述:
我写了一个循环,每次循环能得到一些数据,如何利用pandas将这些得到的数据写入excel表中?
查阅资料说是创建DataFrame对象,但是资料上给的例子都是一次性得到所有数据的情况下进行创建,我的问题在于每次循环里面得到的数据,下一轮循环时这些变量的值发生了变化,而且数据量有点大,用列举的方法创建DataFrame对象有点困难。
代码如下:
import pandas as pd
from selenium import webdriver
from selenium. import By
import time
import random
import ddddocr
url = "http://58.49.47.121:81/n_score/index.jsp"
columns=['姓名','身份证号','高考报名号','准考证号','语文','数学','英语','物理','化学','生物','总分']
#读取考生信息
df=pd.read_excel('info.xls')
i=0
#按行数进行循环,df.shape[0]表示df的总行数,不包括表头
while i< df.shape[0]:
gaokaobaominghao=df.iat[i,0]
#没有进行字符转换时总报错,转换后顺利通过
gaokaobaominghao=str(gaokaobaominghao)
name=df.iat[i,1]
shenfenzhenghao=df.iat[i,2]
#用Firefox浏览器打开url
driver = webdriver.Firefox()
driver.get(url)
#窗口最大化
driver.maximize_window()
#查找验证码位置
img=driver.find_element(By.ID, "img_randCode")
#对验证码进行截屏并保存为图片
img.screenshot('yzm.png')
#调用ddddocr对图片进行文字识别
ocr=ddddocr.DdddOcr()
with open("yzm.png",'rb') as f:
image=f.read()
res=ocr.classification(image)
#如果验证码图片识别不对,跳出本次循环
if len(res) != 4:
continue;
#使用driver.find_element找到需要输入数据的文本框,并输入对应文本
driver.find_element(By.ID, "gkbmh").send_keys(gaokaobaominghao)
driver.find_element(By.ID, "sfzh").send_keys(shenfenzhenghao)
driver.find_element(By.ID, "yzm").send_keys(res)
#点击查询按钮
driver.find_element(By.ID, "cx").click()
#找到查询结果,并用相应的变量存储数据
xm=driver.find_element(By.ID, "result_xm").text
sfzh=driver.find_element(By.ID, "result_sfzh").text
gkbmh=driver.find_element(By.ID, "result_zkzh").text
zkzh=driver.find_element(By.ID, "result_gkbmh").text
#km1=driver.find_element(By.ID, "result_mc1").text
#km2=driver.find_element(By.ID, "result_mc2").text
#km3=driver.find_element(By.ID, "result_mc3").text
#km4=driver.find_element(By.ID, "result_mc4").text
#km5=driver.find_element(By.ID, "result_mc5").text
#km6=driver.find_element(By.ID, "result_mc6").text
score1=driver.find_element(By.ID, "result_score1").text
score2=driver.find_element(By.ID, "result_score2").text
score3=driver.find_element(By.ID, "result_score3").text
score4=driver.find_element(By.ID, "result_score4").text
score5=driver.find_element(By.ID, "result_score5").text
score6=driver.find_element(By.ID, "result_score6").text
totalscore=driver.find_element(By.ID, "result_totalscore").text
#将数据存储在data[i]列表
data[i]=[xm,sfzh,gkbmh,zkzh,score1.score2,score3,score4,score5,score6,totalscore]
i+=1
#关闭浏览器
driver.quit()
#创建新表格
df.DataFrame(data=data,columns=columns)
df_merge.to_excel('result.xlsx')
错误提示信息如下:
Traceback (most recent call last):
File "C:/Users/广水一中巡考电脑/Desktop/20230625/pd001.py", line 59, in <module>
data[i]=[xm,sfzh,gkbmh,zkzh,score1.score2,score3,score4,score5,score6,totalscore]
AttributeError: 'str' object has no attribute 'score2'
应该就是最后几行代码写的有问题。
感谢大佬耐心指导小弟!!!!!!
只有本站会员才能查看附件,请 登录
只有本站会员才能查看附件,请 登录
只有本站会员才能查看附件,请 登录
[此贴子已经被作者于2023-7-1 15:49编辑过]