python 识别验证码自动登陆

# python 3.5.0
# 通过Chrom浏览器访问发起请求
# 需要对应版本的Chrom和chromdriver 
# 作者:linyouyi

from selenium import webdriver
# 引入Keys类包 发起键盘操作
from selenium.webdriver.common.keys import Keys
import threading
import time
import random
import requests
import eventlet
import _thread
from io import BytesIO
from PIL import Image
from PIL import ImageEnhance
import pytesseract
import re
pytesseract.pytesseract.tesseract_cmd = 'D:\\Program Files\\Tesseract-OCR\\tesseract.exe'
tessdata_dir_config = '--tessdata-dir "D:\\Program Files\\Tesseract-OCR\\tessdata"'

def chrome():
    print("启动第一个线程==============================")
    chromeOptions = webdriver.ChromeOptions()
    #chromeOptions.add_argument('user-agent="Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"')
    chrome_driver="C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe"
    #chromeOptions.add_argument("--headless")
    chromeOptions.add_argument("--disable-gpu")
    #下面两行是禁止加载图片,提高速度
    #prefs = {"profile.managed_default_content_settings.images":2}
    #chromeOptions.add_experimental_option("prefs",prefs)

    driver = webdriver.Chrome(chrome_options=chromeOptions,executable_path=chrome_driver)
    return driver
    
def read_file(filedir):
    '''读取链接文件'''
    file = open(filedir,'r')
    return file
    
def send_massage(filedir):
    '''一次返回一个链接'''
    file = read_file(filedir)
    for line in file:
        # 生成器,一次返回一项
        yield line
    file.close()
    
def binaryzation(code_image,value):
    '''二值化处理'''
    #转换成灰度
    im = code_image.convert('L')
    #对比度增强 
    im = ImageEnhance.Contrast(im)
    im = im.enhance(1)
    #锐度增强
    #im=ImageEnhance.Sharpness(im)
    #im=im.enhance(3.0)
    #色度增强
    #im=ImageEnhance.Color(im)
    #im=im.enhance(3.0)
    #亮度增强  
    #im=ImageEnhance.Brightness(im)
    #im=im.enhance(2.0)
    
    table = []
    for y in range(256):
        if y < value:
            table.append(0)
        else:
            table.append(1)
    im = im.point(table,'1')
    return im
    
def discern(code_img):
    '''识别验证码'''
    try:
        im = binaryzation(code_img,127)
        code = pytesseract.image_to_string(im)
        # 保留数字和字母
        code = re.sub("\W", "", code)
        if code == '':
            return "6666"
        else:
            return code
    except:
        return "识别验证码失败!!!"
    

    
def call_link(filedir):
    '''在所有input填入手机号码,获取验证码图片,识别完输入验证码'''
    driver = chrome()
    link = send_massage(filedir)
    for link in link:
        print(link)
        try:
            # 超时则跳过
            eventlet.monkey_patch()
            with eventlet.Timeout(100,False):
                # 访问链接
                driver.get(link)
                # 最多等待10秒
                driver.implicitly_wait(10)
                button = driver.find_elements_by_xpath('//button')
                span = driver.find_elements_by_xpath('//span')
                inp = driver.find_elements_by_xpath('//div//input')
                '''# 所有input都填上手机号码
                for aa in inp:
                    try:
                        aa.send_keys('00000000000')
                        time.sleep(random.randint(1,2))
                    except:
                        print("########")'''
                time.sleep(5)
                # 获取所有图片标签
                images = driver.find_elements_by_xpath('//img')
                for img in images:
                    img_link = img.get_attribute("src")
                    if ("captcha" in img_link):
                        print(img_link)
                        # 获取验证码在画布中的位置x,y轴坐标
                        img_location = img.location
                        # 获取验证码大小
                        img_size = img.size
                        # 截取的是整个屏幕
                        code_img = driver.get_screenshot_as_png()
                        # 截图保存
                        #driver.get_screenshot_as_file('D:\\pythontest\\duanxinhongzha\\aa.png')
                        code_img = Image.open(BytesIO(code_img))
                        # 使用Image的crop函数，从截图中再次截取我们需要的验证码所在区域
                        code_img = code_img.crop((img_location['x'],img_location['y'],int(img_location['x'] + img_size['width']),int(img_location['y'] + img_size['height'])))
                        # 图片放大两倍
                        code_img = code_img.resize((img_size['width'] * 2,img_size['height'] * 2))
                        #code_img.save('D:\\pythontest\\duanxinhongzha\\aa.png')
                        print("验证码所在区域大小为:", code_img.size)
                        # 把识别的验证码填入,如果识别不出来择忽略错误
                        code_num = discern(code_img)
                        print(code_num)
                        # 根据条件输入验证码,不符合条件的input都填上手机号码
                        for inp_num in inp:
                            try:
                                if ("captcha" in inp_num.get_attribute('id').lower() ):
                                    inp_num.send_keys(code_num)
                                elif ("ode" in inp_num.get_attribute('id').lower()):
                                    inp_num.send_keys(code_num)
                                elif ("captcha" in inp_num.get_attribute('name').lower()):
                                    inp_num.send_keys(code_num)
                                else:
                                    inp_num.send_keys('00000000000')
                                    time.sleep(random.randint(1,2))
                            except:
                                print("########")
                        
                # 如果按钮是a标签形式,则获取然后点击
                try:
                    driver.find_element_by_partial_link_text("获取").click()
                except:
                    print("a标签失败")
                # 如果按钮是button标签形式,则获取然后点击
                try:
                    
                    for button in button:
                        if ("获取" in button.text or "发送" in button.text or "码" in button.text):
                            button.click()
                except:
                    print("button失败!!!")
                # 如果按钮是span标签形式,则获取然后点击
                try:
                    for span in span:
                        if ("获取" in span.text or "发送" in span.text or "码" in span.text):
                            span.click()
                except:
                    print("span失败!!!")
                # 如果按钮是input标签形式,则获取然后点击
                try:
                    for inp in inp:
                        if ("获取" in inp.get_attribute("value") or "发送" in inp.get_attribute("value") or "码" in inp.get_attribute("value")):
                            inp.click()
                except:
                    print("input失败!!!")
                #driver.find_element_by_partial_link_text(str(u"获取").encode('utf-8')).send_keys(Keys.ENTER)
                #driver.find_element_by_partial_link_text('获取').find_element().click()
                print("短信发送完毕!!!!")
                time.sleep(5)
                
        except:
            print("获取文本失败!!!")
    driver.quit()
    
    

if __name__ == '__main__':
    #t1 = threading.Thread(target=query_register)
    #t2 = threading.Thread(target=button)
    t3 = threading.Thread(target=call_link('D:\pythontest\lianjie1.txt'))
    
    #t1.start()
    #t2.start()
    t3.start()
python 识别验证码自动登陆

猜你喜欢