求大神帮忙解答下python抓取数据抓不出来
#coding: utf-8import urllib
import urllib2
import re
def gettiebalistnumbers(name):
url="http://tieba.baidu.com/f?"
headers = {"User-Agent": "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0);"}
word = { "kw": name} #贴吧的名字
word = urllib.urlencode(word) # 编码成字符串
url = url + word #拼接URL
request = urllib2.Request(url, headers=headers) # 发起请求,
# 也可以通过调⽤Request.add_header() 添加/修改⼀个特定的 header
request.add_header("Connection", "keep-alive") # 一直活着
response = urllib2.urlopen(request)
data = response.read() # 打开请求,抓取数据
restr="<span class=\"card_infoNum\">([\s\S]*?)</span>" # 正则表达式,()只要括号内的数据
regex = (restr, re.IGNORECASE)
mylist = regex.findall( data) #寻找页面所有符合条件
tienumbers= mylist[0].replace(",","") #替换,
tienumbers=eval(tienumbers) #转化为数字
return tienumbers
环境是python2.7
PyCharm Community Edition 2020.1.3 x64
运行完成后没有数据显示,但是没有报错
返回信息:
D:\Python\py2.7\installpath\python27.exe E:/Py基础/py练习/yincheng爬虫/tieba/1tieba.py
Process finished with exit code 0