新手有关于bs4 requests爬虫问题
要求爬取https://turnoff.us的漫画 # -*- coding:utf-8 -*-
import requests
import os
import bs4
url = "https://turnoff.us"
os.makedirs("漫画",True)
while not url.endswith('.prev href[previous]'):
web = requests.get(url)
web.raise_for_status()
web_soup = bs4.BeautifulSoup(web.text)
comics = web_soup.select('.post-content img[src]')
if comics == []:
print("no comic exists")
continue
comics_linkage = comics[0].get("src")
comics_content = requests.get("https://turnoff.us" +comics_linkage)
comics_content.raise_for_status()
disk_file = os.path.join("漫画",os.path.basename(comics_linkage))
with open(disk_file,"wb") as f:
for i in comics_content.iter_content(100000):
f.write(i)
f.close()
prevs = web_soup.select('.prev a[href]')
if prevs ==[]:
print("cannot get prev linkage")
url = "http://turnoff.us/"+prevs[0].get('href')
哪里出了问题,创建文件夹内是空的,爬不到图片