爬虫代码,想写入文件,但写不进,可以运行,不知道什么问题
程序代码:
import time import re import requests #请求头伪装浏览器 headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64)\ AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.\ 3396.99 Safari/537.36' } #初始化列表,转入爬虫信息 info_lists = [] #判断用户性别函数 def judgment_sex(class_name): if class_name == "manIcon": return '男' else: return '女' #获取信息 def get_info(url): res = requests.get(url,headers = headers) ids = re.findall('<h2>(.*?)</h2>',res.text,re.S) levels = re.findall('<div class="articleGender \D+Icon">\ (.*?)</div>',res.text,re.S) sexs = re.findall('<div class="articleGender (.*?)"', res.text,re.S) contents = re.findall('<span>(.*?)</span>',res.text,re.S) laughs = re.findall('<span class="stats-vote"><i class=\ "number">(\d+)</i> 好笑</span>',res. text,re.S) comments = re.findall('<i class="number">(\d+)</i> 评论' ,res.text,re.S) for id,level,sex,content,laugh,comment in zip(ids,levels, sexs,contents,laughs,comments): info = { 'id' : id, 'level' : level, 'sex' : judgment_sex(sex), 'content' : content, 'laugh' : laugh, 'comment' : comment } info_lists.append(info) urls = ['https://www.{}/'.format( str(i)) for i in range(1,14)] for url in urls: get_info(url) time.sleep(1) #写入文件 for info_list in info_lists: f = open('.\qiushi.txt','a+') try: f.write(info_list['id'] + '\n') f.write(info_list['level'] + '\n') f.write(info_list['sex'] + '\n') f.write(info_list['content'] + '\n') f.write(info_list['laugh'] + '\n') f.write(info_list['comment'] + '\n\n') f.close() except UnicodeEncodeError: pass