求助:下列代码不能爬取毎章小说的下一页
下列代码不能爬取每章小说的下一页(部分章节有下一页),请高手赐教,万分感谢!!!import os
import requests
from bs4 import BeautifulSoup
#目标网页
base_url='https://www.'
chapter_list_url = f'{base_url}/ml/116168/'
#发送请求获取网页内容
response = requests.get(chapter_list_url)
response.encoding ='utf-8'
#使用BeautifulSoup解析HTML
soup =BeautifulSoup(response.text,'html.parser')
#找到id为"List"的div下的所有〈dd〉标签
chapter_list = soup.find("div",id="list").find_all("dd")
#创建保存章节的目录
save_dir = ('novel_chapters')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def get_full_chapter_content(url):
"""
获取整个章节的所有页面的内容
:param url:章节第一页的URL
:return:完整章节内容
"""
full_content =" "
while url:
response = requests.get(url)
response.encoding = 'utf-8'
chapter_soup = BeautifulSoup(response.text,'html.parser')
#章节内容位于〈div class="content"〉标签内
chapter_content_tag = chapter_soup.find('div',id='content')
if chapter_content_tag:
full_content +=chapter_content_tag.get_text("\n",strip=True)+"\n"
#检查是否有"下一页"链接
next_page_tag=chapter_soup.find('a',text='下一页')
if next_page_tag and next_page_tag.has_attr('href'):
url = base_url + next_page_tag['href'] if next_page_tag['href'].startswith('/') else next_page_tag['href']
else:
url=None
return full_content
#遍历〈dd〉标签,找到从"第1章少馆主"开始的所有幸节
start_collecting = False
for chapter in chapter_list:
link = chapter.a['href']
title = chapter.a.get_text()
if "第1章" in title:
start_collecting = True
if start_collecting:
if link.startswith('/'):
link = base_url + link
full_chapter_content = get_full_chapter_content(link)
#保存章节内容到代txt文件
file_name = os.path.join(save_dir, f"{title}.txt")
with open(file_name,'w',encoding='utf-8') as file:
file.write(full_chapter_content)
print(f'Saved:{file_name}')






