![]() |
#2
TysonKoothra2019-08-22 13:08
|
![](zzz/editor/img/code.gif)
import urllib.request
import re,xlwt
def get_content():
for i in range(1, 5):
url = 'https://web.' + str(i)
a = urllib.request.urlopen(url)
html = a.read()
html = html.decode('utf-8')
# html = get_content()
#print(html,i)
return (html)
def get():
html=get_content()
reg=('class="chengshi_wz">.*?title="(.*?)".*?class="chengshi_wz_m">(.*?)</div>.*?<div class="chengshi_wz_f">(.*?)</div>',re.S)
items=re.findall(reg,html)
#items = items.decode('utf-8')
#print(items)
return(items)
#创建表格
def excel_write(items):
newtable='test.xls' #文件名称
wb=xlwt.Workbook(encoding='utf-8') #创建excel文件
ws=wb.add_sheet('test1') #创建sheet名称
headData=['新闻标题','摘要','作者和时间']
for colnum in range(0,3):
ws.write(0,colnum,headData[colnum],xlwt.easyxf('font:bold on')) #0代表行,colum代表列,后面的是数据
index=1
for item in items:#新闻信息
for i in range(0,3):
#print (item[i])
ws.write(index,i,item[i])#index代表行,i代表列
index+=1
wb.save(newtable)
if __name__=="__main__": #判断文件入口
items=get()
excel_write(items)
import re,xlwt
def get_content():
for i in range(1, 5):
url = 'https://web.' + str(i)
a = urllib.request.urlopen(url)
html = a.read()
html = html.decode('utf-8')
# html = get_content()
#print(html,i)
return (html)
def get():
html=get_content()
reg=('class="chengshi_wz">.*?title="(.*?)".*?class="chengshi_wz_m">(.*?)</div>.*?<div class="chengshi_wz_f">(.*?)</div>',re.S)
items=re.findall(reg,html)
#items = items.decode('utf-8')
#print(items)
return(items)
#创建表格
def excel_write(items):
newtable='test.xls' #文件名称
wb=xlwt.Workbook(encoding='utf-8') #创建excel文件
ws=wb.add_sheet('test1') #创建sheet名称
headData=['新闻标题','摘要','作者和时间']
for colnum in range(0,3):
ws.write(0,colnum,headData[colnum],xlwt.easyxf('font:bold on')) #0代表行,colum代表列,后面的是数据
index=1
for item in items:#新闻信息
for i in range(0,3):
#print (item[i])
ws.write(index,i,item[i])#index代表行,i代表列
index+=1
wb.save(newtable)
if __name__=="__main__": #判断文件入口
items=get()
excel_write(items)
[此贴子已经被作者于2019-8-22 13:37编辑过]