需求: 第一次访问金华房网( https://www.0579fw.com/ )需要进行滑块校验,经过判断此验证码为阿里云验证码,但不同于12306、淘宝的验证码校验,现需要对网站滑块进行验证码破解并获取
第一次访问金华房网(https://www.0579fw.com/
)需要进行滑块校验,经过判断此验证码为阿里云验证码,但不同于12306、淘宝的验证码校验,现需要对网站滑块进行验证码破解并获取相应的cookie进行网站数据的采集,滑块样式如下:
经过抓包分析验证码来源于案例滑块验证码
- 通过selenium直接进行滑动
- 通过chrome debug模式启动 + selenium 进行滑动
- 通过chrome debug模式启动 + selenium + pupynput 进行滑动
- 通过ichrome直接进行滑动
- 通过ichrome + pupynput 进行滑动
- 通过chrome debug模式启动 + ichrome + pupynput 进行滑动
由于selenium的特征值比较多,且前端对于selenium的校验比较简单容易被识别,所以直接采用ichrome(基于谷歌的CDP协议开发出来的一个自动化框架)进行滑动。
实行方案四代码:import asyncio
from ichrome import AsyncChromeDaemon
class Crawler:
def __init__(self, port):
self.port = port
self.start_url = 'https://www.0579fw.com/'
self.span_tab = '#nc_1__scale_text > span'
self.cookie = ''
async def get_cookie(self):
async with AsyncChromeDaemon(host="127.0.0.1", port=self.port, headless=False, debug=False) as cd:
async with cd.connect_tab(index=0, auto_close=True) as tab:
await tab.goto(self.start_url, timeout=5)
await asyncio.sleep(2)
title = await tab.title
if "滑动验证页面" in title:
aaa = await tab.get_element_clip(self.span_tab)
start_x = aaa['x']
start_y = aaa['y']
# 滑动滑块
await tab.mouse_drag_rel_chain(start_x, start_y).move(300, 0, 1)
await asyncio.sleep(300)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
crawler = Crawler(9678)
loop.run_until_complete(crawler.get_cookie())
方案四存在的问题:
通过对上面的代码进行分析,虽然滑块可以正常的滑动到正确的位置,但是由于网站一部分的校验无法通过最终校验,一直重复刷新验证码,如果将移动滑块进行注释,手动进行滑动滑块可以正常通过最终校验,故放弃采用方案四
方案五存在的问题:方案五只是在方案四的基础上改变了滑动滑块实现的方法,经过测试依然存在于方案四相同的问题,无法通过最终校验,故放弃采用方案五
方案六代码思路:- 通过命令行启动chrome debug模式
- 通过ichrom中的Chrome连接到debug模式的chrome实例中
- 通过pywin32获取到窗口坐标及窗口界面的截图
- 查找滑块的位置
- 计算运动轨迹
- 通过pynput进行滑动
import os
import time
import random
import shutil
import aircv as ac
from ichrome import Chrome
import win32con, win32gui, win32ui
from pynput.mouse import Button, Controller as c1
USER_DATA_DIR = r"D:\ichrome_user_data\chrome_9222"
COOKIE_STR = ""
def get_trace():
"""
生成运行轨迹
"""
lu = [0.7, 0.3]
# 创建存放轨迹信息的列表
trace = []
faster_distance = 260
for i in lu:
the_distance = i * faster_distance
# 设置初始位置、初始速度、时间间隔
start, v0, t = 0, 0, 1
# 当尚未移动到终点时
while start < the_distance:
# 如果处于加速阶段
if start < the_distance:
# 设置加速度为2
a = 30
# 如果处于减速阶段
else:
# 设置加速度为-3
a = -30
# 移动的距离公式
move = v0 * t + 1 / 2 * a * t * t
# 此刻速度
v = v0 + a * t
# 重置初速度
v0 = v
# 重置起点
start += move
# 将移动的距离加入轨迹列表
trace.append(round(move, 2))
# 返回轨迹信息
return trace
def get_run_chrome():
"""
debug模式打开游览器
"""
cmd_line = "start chrome.exe --remote-debugging-port=9222 --user-data-dir={} --disable-gpu --no-first-run".format(USER_DATA_DIR)
os.system(cmd_line)
def show_chrome_top(hwnd):
"""
将窗口至于前方
"""
win32gui.SetForegroundWindow(hwnd)
def save_chrome_image(hwnd, img_name='output.png'):
"""
保存窗口图片
"""
# 获取窗口的坐标位置
left, top, right, bot = win32gui.GetWindowRect(hwnd)
width = right - left
height = bot - top
# 保存图片相关
hWndDC = win32gui.GetWindowDC(hwnd)
mfcDC = win32ui.CreateDCFromHandle(hWndDC)
saveDC = mfcDC.CreateCompatibleDC()
saveBitMap = win32ui.CreateBitmap()
saveBitMap.CreateCompatibleBitmap(mfcDC, width, height)
saveDC.SelectObject(saveBitMap)
saveDC.BitBlt((0,0), (width, height), mfcDC, (0, 0), win32con.SRCCOPY)
saveBitMap.SaveBitmapFile(saveDC, img_name)
def get_box_coordinate(imgsrc, imgobj, confidencevalue=0.5):
"""
imgsrc: 原图
imgobj: 要查找的图片(部分图片)
"""
imsrc = ac.imread(imgsrc)
imobj = ac.imread(imgobj)
match_result = ac.find_template(imsrc, imobj, confidencevalue)
if match_result is None:
return None
else:
return match_result.get('result')
def close_chrome(port=9222):
find_port= 'netstat -aon | findstr %s' % port
result = os.popen(find_port)
text = result.read()
jobs = text.split('\n')
for job in jobs:
pid = job.strip().split(' ')[-1]
find_kill = 'taskkill -f -pid %s' % pid
os.popen(find_kill)
def clear_chrome_logs():
shutil.rmtree(USER_DATA_DIR)
# cmd_line = "del /F /S /Q {}".format(USER_DATA_DIR)
# print(cmd_line)
# os.popen(cmd_line)
def get_cookie():
global COOKIE_STR
clear_chrome_logs()
get_run_chrome()
time.sleep(3)
chrome=Chrome(port=9222)
tab = chrome.tabs[0]
tab.set_url('https://www.0579fw.com/')
time.sleep(1)
title = tab.title
if "滑动验证页面" in title:
# 1. 获取窗口句柄
hwnd = win32gui.FindWindow(0, "滑动验证页面 - Google Chrome")
# 2. 显示窗口
show_chrome_top(hwnd)
# 3. 保存游览器图片
save_chrome_image(hwnd, img_name='output.png')
# 4. 获取滑块的坐标
coordinate = get_box_coordinate('output.png', 'box.png')
if coordinate is not None:
left, top, right, bot = win32gui.GetWindowRect(hwnd)
mouse = c1()
x_coordinate = left + coordinate[0]
y_coordinate = top + coordinate[1]
# 5. 开始模拟滑动
mouse.position = (x_coordinate, y_coordinate)
mouse.press(Button.left)
time.sleep(1 + random.random())
trace = get_trace()
for d in trace:
mouse.move(d, 0)
time.sleep(random.random() / 10)
time.sleep(random.random())
mouse.release(Button.left)
time.sleep(3)
cookies = tab.get_cookies()
for cookie in cookies:
if cookie['name'] == 'acw_sc__v3':
COOKIE_STR = "{}={}".format(cookie['name'], cookie['value'])
break
if __name__ == "__main__":
try:
get_cookie()
except Exception as e:
print("error: {}".format(e))
close_chrome()
普通的阿里滑块(12306为例)
对于普通的阿里验证码,可以使用ichrome直接进行滑动进行破解(已经测试可以直接滑过去),代码如下:
import asyncio
from ichrome import AsyncChromeDaemon
class Crawler:
def __init__(self, port):
self.port = port
self.start_url = 'https://kyfw.12306.cn/otn/resources/login.html'
self.username = ''
self.password = ''
async def get_cookie(self):
async with AsyncChromeDaemon(host="127.0.0.1", port=self.port, headless=False, debug=False) as cd:
async with cd.connect_tab(index=0, auto_close=True) as tab:
await tab.goto(self.start_url, timeout=5)
await asyncio.sleep(2)
# 输入账号
await tab.mouse_click_element_rect("#J-userName")
await tab.keyboard_send(string=self.username)
# 输入密码
await tab.mouse_click_element_rect("#J-password")
await tab.keyboard_send(string=self.password)
# 点击登录
await tab.click("#J-login")
await asyncio.sleep(2)
aaa = await tab.get_element_clip("#nc_1_n1z")
start_x = aaa['x']
start_y = aaa['y']
# 滑动滑块
await tab.mouse_drag_rel_chain(start_x, start_y).move(300, 0, 1)
await asyncio.sleep(300)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
crawler = Crawler(9678)
loop.run_until_complete(crawler.get_cookie())