一、获取IP地址 二、判断地址是否可用。并把可的地址保存。 import requests from lxml import etree import time headers = { "User-Agent" : "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) " "Chro
一、获取IP地址
二、判断地址是否可用。并把可的地址保存。
import requestsfrom lxml import etreeimport timeheaders = { "User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) " "Chrome/86.0.4240.198Safari/537.36 " }url = "http://www.66ip.cn/areaindex_10/"for page in range(1, 11): page_url = url + str(page) + ".html" resp = requests.get(url=page_url, headers=headers) resp.encoding = resp.apparent_encoding e = etree.HTML(resp.text) ips = e.xpath("//div[1]/table//tr/td[1]/text()") ports = e.xpath("//div[1]/table//tr/td[2]/text()") addrs = e.xpath("//div[1]/table//tr/td[3]/text()") # print(ips) # print(ports) # print(addrs) with open("06-IP代理.txt","w",encoding="utf-8") as f: for i,p,a in zip(ips,ports,addrs): f.write(f"---{i}---{p}---{a}\n") time.sleep(5)print("保存完毕")import requestsfrom lxml import etreeimport timeheaders = { "User-Agent": "Mozilla/5.0(Windows NT 6.1;WOW64) AppleWebKit/537.36(KABUL, like Gecko) " "Chrome/86.0.4240.198Safari/537.36 " }url = "http://www.66ip.cn/areaindex_10/"for page in range(1, 11): page_url = url + str(page) + ".html" resp = requests.get(url=page_url, headers=headers) resp.encoding = resp.apparent_encoding e = etree.HTML(resp.text) ips = e.xpath("//div[1]/table//tr/td[1]/text()") ports = e.xpath("//div[1]/table//tr/td[2]/text()") addrs = e.xpath("//div[1]/table//tr/td[3]/text()") # print(ips) # print(ports) # print(addrs) for ip,port in zip(ips,ports): if ip != 'ip': proxies_dict = { "https": "https://" + ip + ':' + port, } print(proxies_dict) try: res = requests.get(url="https://www.baidu.com", headers=headers, proxies=proxies_dict, timeout=2) res.encoding = res.apparent_encoding if res.status_code == 200: with open("06-IP代理-可用.txt","a",encoding="utf-8") as f: f.write(f"{ip}---{port}\n") except: print("当前代理不可用") time.sleep(3)print("保存完毕")1、xpath解析数据。
e = etree.HTML(resp.text)ips = e.xpath("//div[1]/table//tr/td[1]/text()")2、txt文本格式保存数据。
with open("06-IP代理.txt","w",encoding="utf-8") as f: for i,p,a in zip(ips,ports,addrs): f.write(f"---{i}---{p}---{a}\n")