当前位置 : 主页 > 编程语言 > python >

day04 python

来源:互联网 收集:自由互联 发布时间:2021-06-25
01text """ 变量:变化的量 1 、变量值 value 2 、变量的内存地址 id 3 、变量的数据类型 type """ # import numpy as np# arr = np.arange( 10 )# print(type(arr))# print(arr)## l1 = [ 1 , 2 , 3 , 4 , 5 ]# print(type(l1))#

01text

"""
变量:变化的量
    1、变量值 value
    2、变量的内存地址 id
    3、变量的数据类型 type
"""
# import numpy as np

# arr = np.arange(10)
# print(type(arr))
# print(arr)
#
# l1 = [1, 2, 3, 4, 5]
# print(type(l1))
# print(l1)

# import requests
#
# header = {
#     User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36
# }
#
# res = requests.get(https://www.bilibili.com/video/av68746541/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16,
#                    headers=header)
# res.encoding = res.apparent_encoding
# print(res.text)


# for i in range(0, 100, 25):
#     print(i)
import re

print(re.findall(a.*?c, a123c456dsdadac))

02模拟浏览器登陆

import requests

header = {
    User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36
}

res = requests.get(https://dig.chouti.com/, headers=header)
data = res.text

print(data)

03爬豆瓣again

‘‘‘爬取豆瓣电影TOP250
第一页:
https://movie.douban.com/top250?start=0&filter=
第二页:
https://movie.douban.com/top250?start=25&filter=

requests:请求库
re:正则表达式
‘‘‘

import requests
import re

# 拼接电影爬去地址url

for line in range(0, 100, 25):
    url = fhttps://movie.douban.com/top250?start={line}&filter=
    response = requests.get(url)
    # data = response.text
    # 3.解析并提取数据
    data = re.findall(
        <div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>,
        response.text, re.S)  # re.S忽略换行
    for d in data:
        url, name, point, count = d
        movie_data = ‘‘‘
            电影名称: %s
            电影地址: %s
            电影评分: %s
            评价人数: %s
            \n
            ‘‘‘ % (name, url, point, count)

        print(movie_data)
        with open(豆瓣.txt, a, encoding=utf-8) as f:
            f.write(movie_data)

# print(url)
# 往拼接好的ulr地址发送请求获取数据
# response = requests.get(url)
# # print(response.text)  # 获取文本数据
# # 3.解析并提取数据
# # 电影名称、电影地址、电影评分、评价人数
# # re.findall(匹配文本的规则, 匹配的文本, 匹配模式)  # 解析提取文本数据中 想要的数据
# # .*?: 过滤不想要的数据,直到想要的数据出现
# #  (.*?): 提取想要的数据
# # 匹配规则
# # <div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>
# data = re.findall(
#     <div class="item">.*?<a href="(.*?)">.*?<span class="title">(.*?)</span>.*?<span class="rating_num" property="v:average">(.*?)</span>.*?<span>(.*?)人评价</span>,
#     response.text, re.S)  # re.S忽略换行
# # print(data)
# for d in data:
#     # print(d)
#
#     url, name, point, count = d
#
#     movie_data = ‘‘‘
#     电影名称: %s
#     电影地址: %s
#     电影评分: %s
#     评价人数: %s
#     \n
#     ‘‘‘ % (name, url, point, count)
#
#     print(movie_data)
#
#     # 4.保存数据
#     # a: append
#     with open(豆瓣.txt, a, encoding=utf-8) as f:
#         f.write(movie_data)

04友好爬豆瓣

import requests
# import re
from bs4 import BeautifulSoup
from openpyxl import Workbook
import time

wb = Workbook()
sheet = wb.active

count = 1
# 拼接电影爬去地址url
for line in range(0, 100, 25):
    url = fhttps://movie.douban.com/top250?start={line}&filter=
    # 向对方服务器发送请求,获取响应数据
    response = requests.get(url)
    # 拿到文本数据
    data = response.text
    # 通过‘html.parser‘解析器解析数据
    soup = BeautifulSoup(data, html.parser)

    # 找到类名为:grid_view的ol标签
    ol = soup.find(name="ol", attrs={class: grid_view})

    # 拿到中间的25个li标签,存入列表中
    li_list = ol.find_all(name=li)

    sheet.title = 好评电影
    sheet[A1].value = 序号
    sheet[B1].value = 电影名称
    sheet[C1].value = 电影评分
    sheet[D1].value = 电影链接
    sheet[E1].value = 电影图片

    for li in li_list:
        # 找到类名为:title的span标签
        name = li.find(name=span, attrs={class: title})
        url = li.find(name=a)
        rat = li.find(name=span, attrs={class: rating_num})
        img = li.find(name=img)
        count = count + 1
        sheet[A%s % (count)].value = count - 1
        sheet[B%s % (count)].value = name.text
        sheet[C%s % (count)].value = rat.text
        sheet[D%s % (count)].value = url[href]
        sheet[E%s % (count)].value = img[src]
    time.sleep(1)
wb.save(好评电影.xlsx)

05金山词霸翻译

import requests
import json


def main(key=""):
    header = {
        User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36
    }
    url = http://fy.iciba.com/ajax.php?a=fy

    data = {
        f: auto,
        t: auto,
        w: key
    }

    # 向对方服务器发送post请求,带上headers,数据data
    res = requests.post(url=url, headers=header, data=data)
    data = res.text
    # 将二进制数据反序列化
    data_list = json.loads(data)
    # print(data_list)
    try:
        val = data_list[content][word_mean]
    except:
        val = data_list[content][out]

    return val


if __name__ == __main__:
    # 接收用户输入
    key = input(请输入要翻译的词语:).strip()
    # 判断如果输入为空将要执行的代码
    if not key:
        print(输入为空)
    else:
        data = main(key=key)
        print(data)
网友评论