我是VFP的老菜鸟,遇到问题总喜欢在这里寻求帮助,主要是因为这里人气旺,一般都有人解答。
下面是python+selenium+IE的解决办法:
from selenium import webdriver
#from selenium.webdriver.support.ui import Select
import time
import sys
import re
import random
from decimal import Decimal
import os
import json
import logging
import datetime
logging.basicConfig(filename='rizhi.txt', level = logging.INFO, format = '%(asctime)s: %(levelname)s:%(message)s')
"""沪深股通获得北上(陆股通)持股列表"""
def getdata():
gp_dict = {}
ie = webdriver.Ie()
ie.implicitly_wait(10)
ie.set_window_position(-900,10)
ie.get("http://data.)#东方财富网北上持股
if not ie.find_element_by_id("dataview_bkcgmx"):
raise RuntimeError("目标网页没有打开!")
zong_ye_shu = eval(ie.find_element_by_class_name("pagerbox").find_element_by_xpath(".//a[@target='_self'][last()-1]").get_attribute("innerText"))
pages = list(range(1,zong_ye_shu + 1))
random.shuffle(pages)
while pages:
ie.find_element_by_id("gotopageindex").clear()
time.sleep(random.randint(8,18)/10)
ie.find_element_by_id("gotopageindex").send_keys(pages.pop())
time.sleep(random.randint(8,18)/10)
ie.find_element_by_class_name("gotopage").find_element_by_class_name("btn").click()
time.sleep(random.randint(8,18)/10)
tr_list = ie.find_element_by_id("dataview_bkcgmx").find_element_by_xpath(".//tbody").find_elements_by_xpath(".//tr")
while tr_list:
trx = tr_list.pop()
daima = re.findall("\d{6,10}",trx.get_attribute("innerText"))[0][-6:]
bili = trx.find_element_by_xpath(".//td[10]").get_attribute("innerText")
bili = Decimal(re.match("\d{1,2}\.\d\d",bili).group())
gp_dict[daima] = bili
ie.quit()
print(len(gp_dict))
return gp_dict