# coding=utf-8import requestsfrom lxml import etreea=‘https://www.dytt8.net‘headers={ ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36‘, ‘Referer‘
# coding=utf-8import requestsfrom lxml import etreea=‘https://www.dytt8.net‘headers={ ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36‘, ‘Referer‘:‘https://www.dytt8.net/html/gndy/dyzz/list_23_3.html‘ }def get_detail_urls(url): response=requests.get(url,headers=headers) text=response.text html=etree.HTML(text) detail_urls=html.xpath("//table[@class=‘tbspan‘]//a/@href") detail_urls = map(print(lambda url:a+url,detail_urls)) return detail_urls()def spider(url): base_url="https://www.dytt8.net/html/gndy/dyzz/list_23_().html" for x in format(1,8): url=base_url.format(x) detail_urls=get_detail_urls(url) for detail_url in detail_urls: movie=parse_detail_page(detail_url)def parse_detail_page(url): movie={} response = requests.get(url, headers=headers) text = response.text html = etree.HTML(text) title=html.xpath("//div[@class=‘title all‘//font[@color=‘#07519a‘]/text()")[0] movie[‘title‘]=title print(title) zoomE=html.xpath("//div[@id=‘zoom‘]")[0] imgs=zoomE.xpath("//img[@src]")[0] covers=imgs[0] screenshot=imgs[1] movie[‘covers‘]=covers movie[‘screenshot‘]=screenshot infos=zoomE.xpath(".//text()") print(infos) for info in infos: