一、介绍
今天主要介绍的是微博客户端在登录时出现的四宫格手绘验证码,不多说直接看看验证码长成什么样。
二、思路
1、由于微博上的手绘验证码只有四个宫格,且每个宫格之间都有有向线段连接,所以我们可以判断四个宫格不同方向的验证码一共有24种,
我们将四个宫格进行标号,得到的结果如下:
则我们可以排列出24种不同的手绘方向的验证码,分别为一下24种
1234 | 2134 | 3124 | 4321 |
1243 | 2143 | 3142 | 4312 |
1342 | 2314 | 3214 | 4123 |
1324 | 2341 | 3241 | 4132 |
1423 | 2413 | 3412 | 4213 |
1432 | 2431 | 3421 | 4231 |
2、我们通过获取到微博客户端的24种手绘验证码后需要进行模板匹配,这样通过全图匹配的方式进行滑动。
三、代码实现
1、首先是要通过微博移动端(https://passport.weibo.cn/signin/login)批量获取手绘验证码,但是这个验证码不一定出现,
只有在账号存在风险或者频繁登录的时候才会出现。获取手绘验证码的代码如下:
1 # -*- coding:utf-8 -*- 2 import time 3 from io import BytesIO 4 from PIL import Image 5 from selenium import webdriver 6 from selenium.webdriver.common.by import By 7 from selenium.common.exceptions import TimeoutException 8 from selenium.webdriver.support.ui import WebDriverWait 9 from selenium.webdriver.support import expected_conditions as EC 10 11 12 class CrackWeiboSlide(): 13 def __init__(self): 14 self.url = "https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/" 15 self.browser = webdriver.Chrome(r"D:\chromedriver.exe") 16 self.browser.maximize_window() 17 self.wait = WebDriverWait(self.browser,5) 18 19 20 def __del__(self): 21 self.browser.close() 22 23 def open(self): 24 # 打开模拟浏览器 25 self.browser.get(self.url) 26 # 获取用户名元素 27 username = self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="loginName"]'))) 28 # 获取密码框元素 29 password = self.wait.until(EC.presence_of_element_located((By.XPATH,'//*[@id="loginPassword"]'))) 30 # 获取登录按钮元素 31 submit = self.wait.until(EC.element_to_be_clickable((By.XPATH,'//*[@id="loginAction"]'))) 32 # 提交数据并登录 33 username.send_keys("15612345678") 34 password.send_keys("xxxxxxxxxxxx") 35 submit.click() 36 37 38 def get_image(self,name = "captcha.png"): 39 try: 40 # 获取验证码图片元素 41 img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,"patt-shadow"))) 42 time.sleep(1) 43 # 获取验证码图片所在的位置 44 location = img.location 45 # 获取验证码图片的大小 46 size = img.size 47 top = location["y"] # 上 48 bottom = location["y"] + size["height"] # 下 49 left = location["x"] # 左 50 right = location["x"] + size["width"] # 右 51 print("验证码的位置:", left, top, right, bottom) 52 # 将当前窗口进行截屏 53 screenshot = self.browser.get_screenshot_as_png() 54 # 读取截图 55 screenshot = Image.open(BytesIO(screenshot)) 56 # 剪切九宫格图片验证码 57 captcha = screenshot.crop((left, top, right, bottom)) 58 # 将剪切的九宫格验证码保存到指定位置 59 captcha.save(name) 60 print("微博登录验证码保存完成!!!") 61 return captcha 62 except TimeoutException: 63 print("没有出现验证码!!") 64 # 回调打开模拟浏览器函数 65 self.open() 66 67 68 def main(self): 69 count = 1 70 while True: 71 # 调用打开模拟浏览器函数 72 self.open() 73 # 调用获取验证码图片函数 74 self.get_image(str(count) + ".png") 75 count += 1 76 77 78 if __name__ == '__main__': 79 crack = CrackWeiboSlide() 80 crack.main()
得到的24种手绘验证码,同时需要对这些手绘验证码根据上边的编号进行命名
上图就是我们需要的模板,接下来我们进行遍历模板匹配即可
2、模板匹配
通过遍历手绘验证码模板进行匹配
1 import os 2 import time 3 from io import BytesIO 4 from PIL import Image 5 from selenium import webdriver 6 from selenium.common.exceptions import TimeoutException 7 from selenium.webdriver import ActionChains 8 from selenium.webdriver.common.by import By 9 from selenium.webdriver.support.ui import WebDriverWait 10 from selenium.webdriver.support import expected_conditions as EC 11 from os import listdir 12 13 USERNAME = '13389185673' 14 PASSWORD = '' 15 16 TEMPLATES_FOLDER = 'templates/' 17 18 19 class CrackWeiboSlide(): 20 def __init__(self): 21 self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/' 22 self.browser = webdriver.Chrome() 23 self.wait = WebDriverWait(self.browser, 20) 24 self.username = USERNAME 25 self.password = PASSWORD 26 27 def __del__(self): 28 self.browser.close() 29 30 def open(self): 31 """ 32 打开网页输入用户名密码并点击 33 :return: None 34 """ 35 self.browser.get(self.url) 36 username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName'))) 37 password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword'))) 38 submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction'))) 39 username.send_keys(self.username) 40 password.send_keys(self.password) 41 submit.click() 42 43 def get_position(self): 44 """ 45 获取验证码位置 46 :return: 验证码位置元组 47 """ 48 try: 49 img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'patt-shadow'))) 50 except TimeoutException: 51 print('未出现验证码') 52 self.open() 53 time.sleep(2) 54 location = img.location 55 size = img.size 56 top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[ 57 'width'] 58 return (top, bottom, left, right) 59 60 def get_screenshot(self): 61 """ 62 获取网页截图 63 :return: 截图对象 64 """ 65 screenshot = self.browser.get_screenshot_as_png() 66 screenshot = Image.open(BytesIO(screenshot)) 67 return screenshot 68 69 def get_image(self, name='captcha.png'): 70 """ 71 获取验证码图片 72 :return: 图片对象 73 """ 74 top, bottom, left, right = self.get_position() 75 print('验证码位置', top, bottom, left, right) 76 screenshot = self.get_screenshot() 77 captcha = screenshot.crop((left, top, right, bottom)) 78 captcha.save(name) 79 return captcha 80 81 def is_pixel_equal(self, image1, image2, x, y): 82 """ 83 判断两个像素是否相同 84 :param image1: 图片1 85 :param image2: 图片2 86 :param x: 位置x 87 :param y: 位置y 88 :return: 像素是否相同 89 """ 90 # 取两个图片的像素点 91 pixel1 = image1.load()[x, y] 92 pixel2 = image2.load()[x, y] 93 threshold = 20 94 if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs( 95 pixel1[2] - pixel2[2]) < threshold: 96 return True 97 else: 98 return False 99 100 def same_image(self, image, template): 101 """ 102 识别相似验证码 103 :param image: 待识别验证码 104 :param template: 模板 105 :return: 106 """ 107 # 相似度阈值 108 threshold = 0.99 109 count = 0 110 for x in range(image.width): 111 for y in range(image.height): 112 # 判断像素是否相同 113 if self.is_pixel_equal(image, template, x, y): 114 count += 1 115 result = float(count) / (image.width * image.height) 116 if result > threshold: 117 print('成功匹配') 118 return True 119 return False 120 121 def detect_image(self, image): 122 """ 123 匹配图片 124 :param image: 图片 125 :return: 拖动顺序 126 """ 127 for template_name in listdir(TEMPLATES_FOLDER): 128 print('正在匹配', template_name) 129 template = Image.open(TEMPLATES_FOLDER + template_name) 130 if self.same_image(image, template): 131 # 返回顺序 132 numbers = [int(number) for number in list(template_name.split('.')[0])] 133 print('拖动顺序', numbers) 134 return numbers 135 136 def move(self, numbers): 137 """ 138 根据顺序拖动 139 :param numbers: 140 :return: 141 """ 142 # 获得四个按点 143 circles = self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ') 144 dx = dy = 0 145 for index in range(4): 146 circle = circles[numbers[index] - 1] 147 # 如果是第一次循环 148 if index == 0: 149 # 点击第一个按点 150 ActionChains(self.browser) \ 151 .move_to_element_with_offset(circle, circle.size['width'] / 2, circle.size['height'] / 2) \ 152 .click_and_hold().perform() 153 else: 154 # 小幅移动次数 155 times = 30 156 # 拖动 157 for i in range(times): 158 ActionChains(self.browser).move_by_offset(dx / times, dy / times).perform() 159 time.sleep(1 / times) 160 # 如果是最后一次循环 161 if index == 3: 162 # 松开鼠标 163 ActionChains(self.browser).release().perform() 164 else: 165 # 计算下一次偏移 166 dx = circles[numbers[index + 1] - 1].location['x'] - circle.location['x'] 167 dy = circles[numbers[index + 1] - 1].location['y'] - circle.location['y'] 168 169 def crack(self): 170 """ 171 破解入口 172 :return: 173 """ 174 self.open() 175 # 获取验证码图片 176 image = self.get_image('captcha.png') 177 numbers = self.detect_image(image) 178 self.move(numbers) 179 time.sleep(10) 180 print('识别结束') 181 182 183 if __name__ == '__main__': 184 crack = CrackWeiboSlide() 185 crack.crack()
四、识别结果
通过循环四次后绘出四条方向,最终得到效果图