01text """ 变量:变化的量 1 、变量值 value 2 、变量的内存地址 id 3 、变量的数据类型 type """ # import numpy as np# arr = np.arange( 10 )# print(type(arr))# print(arr)## l1 = [ 1 , 2 , 3 , 4 , 5 ]# print(type(l1))#
01text
""" 变量:变化的量 1、变量值 value 2、变量的内存地址 id 3、变量的数据类型 type """ # import numpy as np # arr = np.arange(10) # print(type(arr)) # print(arr) # # l1 = [1, 2, 3, 4, 5] # print(type(l1)) # print(l1) # import requests # # header = { # ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘ # } # # res = requests.get(‘https://www.bilibili.com/video/av68746541/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16‘, # headers=header) # res.encoding = res.apparent_encoding # print(res.text) # for i in range(0, 100, 25): # print(i) import re print(re.findall(‘a.*?c‘, ‘a123c456dsdadac‘))
02模拟浏览器登陆
import requests header = { ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘ } res = requests.get(‘https://dig.chouti.com/‘, headers=header) data = res.text print(data)
03爬豆瓣again
‘‘‘爬取豆瓣电影TOP250 第一页: https://movie.douban.com/top250?start=0&filter= 第二页: https://movie.douban.com/top250?start=25&filter= requests:请求库 re:正则表达式 ‘‘‘ import requests import re # 拼接电影爬去地址url for line in range(0, 100, 25): url = f‘https://movie.douban.com/top250?start={line}&filter=‘ response = requests.get(url) # data = response.text # 3.解析并提取数据 data = re.findall( ‘<div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘, response.text, re.S) # re.S忽略换行 for d in data: url, name, point, count = d movie_data = ‘‘‘ 电影名称: %s 电影地址: %s 电影评分: %s 评价人数: %s \n ‘‘‘ % (name, url, point, count) print(movie_data) with open(‘豆瓣.txt‘, ‘a‘, encoding=‘utf-8‘) as f: f.write(movie_data) # print(url) # 往拼接好的ulr地址发送请求获取数据 # response = requests.get(url) # # print(response.text) # 获取文本数据 # # 3.解析并提取数据 # # 电影名称、电影地址、电影评分、评价人数 # # re.findall(‘匹配文本的规则‘, ‘匹配的文本‘, ‘匹配模式‘) # 解析提取文本数据中 想要的数据 # # .*?: 过滤不想要的数据,直到想要的数据出现 # # (.*?): 提取想要的数据 # # 匹配规则 # # <div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span> # data = re.findall( # ‘<div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>‘, # response.text, re.S) # re.S忽略换行 # # print(data) # for d in data: # # print(d) # # url, name, point, count = d # # movie_data = ‘‘‘ # 电影名称: %s # 电影地址: %s # 电影评分: %s # 评价人数: %s # \n # ‘‘‘ % (name, url, point, count) # # print(movie_data) # # # 4.保存数据 # # a: append # with open(‘豆瓣.txt‘, ‘a‘, encoding=‘utf-8‘) as f: # f.write(movie_data)
04友好爬豆瓣
import requests # import re from bs4 import BeautifulSoup from openpyxl import Workbook import time wb = Workbook() sheet = wb.active count = 1 # 拼接电影爬去地址url for line in range(0, 100, 25): url = f‘https://movie.douban.com/top250?start={line}&filter=‘ # 向对方服务器发送请求,获取响应数据 response = requests.get(url) # 拿到文本数据 data = response.text # 通过‘html.parser‘解析器解析数据 soup = BeautifulSoup(data, ‘html.parser‘) # 找到类名为:grid_view的ol标签 ol = soup.find(name="ol", attrs={‘class‘: ‘grid_view‘}) # 拿到中间的25个li标签,存入列表中 li_list = ol.find_all(name=‘li‘) sheet.title = ‘好评电影‘ sheet[‘A1‘].value = ‘序号‘ sheet[‘B1‘].value = ‘电影名称‘ sheet[‘C1‘].value = ‘电影评分‘ sheet[‘D1‘].value = ‘电影链接‘ sheet[‘E1‘].value = ‘电影图片‘ for li in li_list: # 找到类名为:title的span标签 name = li.find(name=‘span‘, attrs={‘class‘: ‘title‘}) url = li.find(name=‘a‘) rat = li.find(name=‘span‘, attrs={‘class‘: ‘rating_num‘}) img = li.find(name=‘img‘) count = count + 1 sheet[‘A%s‘ % (count)].value = count - 1 sheet[‘B%s‘ % (count)].value = name.text sheet[‘C%s‘ % (count)].value = rat.text sheet[‘D%s‘ % (count)].value = url[‘href‘] sheet[‘E%s‘ % (count)].value = img[‘src‘] time.sleep(1) wb.save(‘好评电影.xlsx‘)
05金山词霸翻译
import requests import json def main(key=""): header = { ‘User-Agent‘: ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36‘ } url = ‘http://fy.iciba.com/ajax.php?a=fy‘ data = { ‘f‘: ‘auto‘, ‘t‘: ‘auto‘, ‘w‘: key } # 向对方服务器发送post请求,带上headers,数据data res = requests.post(url=url, headers=header, data=data) data = res.text # 将二进制数据反序列化 data_list = json.loads(data) # print(data_list) try: val = data_list[‘content‘][‘word_mean‘] except: val = data_list[‘content‘][‘out‘] return val if __name__ == ‘__main__‘: # 接收用户输入 key = input(‘请输入要翻译的词语:‘).strip() # 判断如果输入为空将要执行的代码 if not key: print(‘输入为空‘) else: data = main(key=key) print(data)