一) import requests def download(url, num_tries=2, user_agent='wswp', proxies=None): ''' 下载指定url并返回网页内容 :param url: 请求URL :param num_tries: 重试次数 :param user_agent: 用户代理 :param proxies: 代理(字典
一)
import requestsdef download(url, num_tries=2, user_agent='wswp', proxies=None):
'''
下载指定url并返回网页内容
:param url: 请求URL
:param num_tries: 重试次数
:param user_agent: 用户代理
:param proxies: 代理(字典): 键(http/https)
:return:
'''
headers = {'User-Agent':user_agent}
try:
res = requests.get(url, headers=headers, proxies=proxies)
res.encoding = 'utf-8'
html = res.text
if res.status_code >= 400:
html = None
if num_tries and 500 <= res.status_code < 600: # 发生5xx错误
return download(url,num_tries-1)
except requests.exceptions.RequestException as e:
print('Downlaod error:',e)
html = None
return html
print(download('http://www.baidu.com'))