python京东手机爬虫
代码如下:
import parsel as parsel
import requests
import csv # 内置模块
from lxml import etree
def get_html(page):
url = 'https://search.'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.9231 SLBChan/30'
}
response = requests.get(url=url, headers=headers)
# print(response.text)
html = parsel.Selector(response.text)
# print(html)
return html
def parse_data(selector):
href = selector.css('li::attr(data-sku) ')
# href = selector.css('.p-img a::attr(href)').getall()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.9231 SLBChan/30'
}
res = []
for index in zip(href):
# https://item.
index_url = 'https://item.'+str(index[0])+'.html'
response_1 = requests.get(url=index_url, headers=headers)
selector_1 = parsel.Selector(response_1.text)
print(selector_1)
price = selector_1.css('ul.parameter2.p-parameter-list li:nth-child(1)::text').get()
# #detail > div.tab-con > div:nth-child(1) > div.p-parameter > ul.parameter2.p-parameter-list > li:nth-child(1)
print(price)
if __name__=='__main__':
page = 1
html = get_html(page)
res = parse_data(html)
求教:按照我的方法爬取<li>标签下title的值,该怎么写?或者是不是我根本没有爬取到这个页面的html?
京东的价格和评论怎么爬取不到啊?
[此贴子已经被作者于2022-11-2 22:44编辑过]