注册 登录
编程论坛 Python论坛

requests 一直出現錯誤

Z89 发布于 2022-05-12 21:06, 1254 次点击

requests   一直出現500錯誤

用selenium 就正常 ,requests哪裡寫錯了嗎?

程序代码:


import requests
import os
import re
import time

s = requests.Session()
r = s.get('https://www.ntuh.gov.tw/ntuh/Index.action')

urla = 'https://reg.ntuh.gov.tw/webadministration/DoctorServiceQueryByDrName.aspx?HospCode=T0&QueryName=%E8%91%89%E9%99%B3%E7%92%9E'

ra = s.get(urla)
patterna  = (r'\$ctl02.+ServiceIDSE" value="([0-9][0-9][0-9][0-9][0-9][0-9][0-9])"')
IDSE = patterna.findall(ra.text)
print(IDSE)
pattern = (r'id="__VIEWSTATE" value="(.+)" />')
result1 = pattern.findall(ra.text)
patterna = (r'id="__EVENTVALIDATION" value="(.+)" />')
result1a = patterna.findall(ra.text)
patterna = (r'\$ctl02.+ServiceEncryptCode" value="(.+?)"')
EncryptCode = patterna.findall(ra.text)

request_headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
    'referer':urla,
    }
form_data = {
    '__EVENTTARGET':'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$AdminTextShow',
    '__EVENTARGUMENT':'',   
    '__VIEWSTATE':result1,
    '__VIEWSTATEGENERATOR':'15A8B68C',
    '__EVENTVALIDATION':result1a,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$HospitalCode':'T0',
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$ServiceIDSE':IDSE,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$ServiceEncryptCode':EncryptCode,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$FirstVisitQuotaFlag':'n',
}
raa = s.post(urla, headers=request_headers, data=form_data)
rbb = raa.url
print(rbb)

raba = s.get(rbb)

state= re.search(r'id="__VIEWSTATE" value="(.+)"',raba.text,re.M)
#print(state.group(1))
VIEWSTATE = state.group(1)
print(VIEWSTATE)


statea= re.search(r'id="__VIEWSTATEGENERATOR" value="(.+)"',raba.text,re.M)
#print(statea.group(1))
VIEWSTATEGENERATOR = statea.group(1)
print(VIEWSTATEGENERATOR)


stateb= re.search(r'id="__EVENTVALIDATION" value="(.+)"',raba.text,re.M)
#print(stateb.group(1))
EVENTVALIDATION = stateb.group(1)
print(EVENTVALIDATION)


request_headerst = {
    'user-agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
    'referer':rbb,
    }
form_datat = {
     'scrollLeft': '0',
     'scrollTop': '0',
     '__EVENTTARGET': '',
     '__EVENTARGUMENT': '',
     '__LASTFOCUS': '',
     '__VIEWSTATE':VIEWSTATE,
     '__VIEWSTATEGENERATOR':VIEWSTATEGENERATOR,
     '__EVENTVALIDATION':EVENTVALIDATION,
     'txtIuputID': 'G178009585',
     'radInputNum': '0',
     'ddlBirthYear': '1981',
     'ddlBirthMonth': '04',
     'ddlBirthDay': '16',
     'txtVerifyCode': '1111',
     'btnOK': '%E8%99%95%E7%90%86%E4%B8%AD..',  
}


ryy = s.post(rbb, headers=request_headerst, data=form_datat)

ryy.encoding = 'utf-8'
print('回應狀態:', ryy.status_code)
print('回應標頭:', ryy.headers['content-type'])
print('回應編碼:', ryy.encoding)

print(ryy.text)






程序代码:

import requests
import os
import re
import time

s = requests.Session()
r = s.get('https://www.ntuh.gov.tw/ntuh/Index.action')

urla = 'https://reg.ntuh.gov.tw/webadministration/DoctorServiceQueryByDrName.aspx?HospCode=T0&QueryName=%E8%91%89%E9%99%B3%E7%92%9E'

ra = s.get(urla)
patterna  = (r'\$ctl02.+ServiceIDSE" value="([0-9][0-9][0-9][0-9][0-9][0-9][0-9])"')
IDSE = patterna.findall(ra.text)
print(IDSE)
pattern = (r'id="__VIEWSTATE" value="(.+)" />')
result1 = pattern.findall(ra.text)
patterna = (r'id="__EVENTVALIDATION" value="(.+)" />')
result1a = patterna.findall(ra.text)
patterna = (r'\$ctl02.+ServiceEncryptCode" value="(.+?)"')
EncryptCode = patterna.findall(ra.text)

request_headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
    'referer':urla,
    }
form_data = {
    '__EVENTTARGET':'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$AdminTextShow',
    '__EVENTARGUMENT':'',   
    '__VIEWSTATE':result1,
    '__VIEWSTATEGENERATOR':'15A8B68C',
    '__EVENTVALIDATION':result1a,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$HospitalCode':'T0',
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$ServiceIDSE':IDSE,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$ServiceEncryptCode':EncryptCode,
    'DoctorServiceListInSeveralDaysInput$GridViewDoctorServiceList$ctl02$FirstVisitQuotaFlag':'n',
}
raa = s.post(urla, headers=request_headers, data=form_data)
rbb = raa.url
print(rbb)
time.sleep(0.5)


from datetime import datetime, date


import pytest
import time
import json
from selenium import webdriver
from selenium. import By
from selenium. import ActionChains
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait
from selenium. import Keys
from selenium. import DesiredCapabilities


from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
chrome_options = webdriver.ChromeOptions()
#chrome_options.add_argument('--headless') # 啟動無頭模式
#
chrome_options.add_argument('--disable-gpu') # windowsd必須加入此3
driver = webdriver.Chrome(ChromeDriverManager().install(),chrome_options=chrome_options)
from selenium import webdriver
from selenium.webdriver.support.ui import Select
driver.get(rbb)
driver.implicitly_wait(20)


search_input = driver.find_element_by_id("radInputNum_0")  # 查詢文字框
driver.find_element_by_id('radInputNum_0').click()#點選元素

time.sleep(0.5)

search_input = driver.find_element_by_id("txtIuputID")  # 查詢文字框
driver.find_element_by_id('txtIuputID').send_keys('G178009585')#填入'somekeys'


search_input = driver.find_element_by_id("ddlBirthYear")  # 查詢文字框
select = Select(driver.find_element_by_name('ddlBirthYear'))
select.select_by_visible_text(u"70")
time.sleep(0.5)

search_input = driver.find_element_by_id("ddlBirthMonth")  # 查詢文字框
select = Select(driver.find_element_by_name('ddlBirthMonth'))
select.select_by_visible_text(u"04")

time.sleep(0.5)
search_input = driver.find_element_by_id("ddlBirthDay")  # 查詢文字框
select = Select(driver.find_element_by_name('ddlBirthDay'))
select.select_by_visible_text(u"16")

time.sleep(0.5)

search_input = driver.find_element_by_id("txtVerifyCode")  # 查詢文字框
driver.find_element_by_id('txtVerifyCode').send_keys("1111")#填入'somekeys'
time.sleep(0.5)
search_input = driver.find_element_by_id("btnOK")  # 查詢文字框
driver.find_element_by_id('btnOK').click()#點選元素

text = driver.page_source # 獲取頁面資訊
print(text)


time.sleep(1000000)



4 回复
#2
wp2319572022-05-13 13:14
回复 楼主 Z89
selenium  和requests   是两种不同的机制    你想提取哪部分数据   说一下
#3
Z892022-05-13 19:52
回复 2楼 wp231957
requests我抓包過數據,最後的requests post , print(ryy.text)

希望回傳與selenium一樣,內容會有應該要有,驗證碼輸入錯誤,請重新輸入,



以下為selenium與requests的檔案,網址有變動,能不能幫忙看哪問題,謝


只有本站会员才能查看附件,请 登录


[此贴子已经被作者于2022-5-13 20:28编辑过]

#4
Z892022-05-22 13:56
我想爬點擊 '確定' 後的數據 ,有人能幫幫忙嗎?requests的方式

driver.find_element_by_id('btnOK').click()#點選元素
#5
Z892022-05-26 12:42
解悉後發現這些值是隱藏的~~~該怎麼取的???有人能幫看一下嗎?

<input id="scrollLeft" name="scrollLeft" type="hidden" value=""/>
<input id="scrollTop" name="scrollTop" type="hidden" value=""/>
<input id="__EVENTTARGET" name="__EVENTTARGET" type="hidden" value=""/>
<input id="__EVENTARGUMENT" name="__EVENTARGUMENT" type="hidden" value=""/>
<input id="__LASTFOCUS" name="__LASTFOCUS" type="hidden" value=""/>
1