python第7天作业

破解滑动验证登录

''''''
'''
破解极验滑动验证
破解极验滑动验证
博客园登录url:
https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F

代码逻辑:
1、输入用户名与密码,并点击登录
2、弹出滑动验证,获取有缺口与完整的图片
3、通过像素点进行比对,获取滑动位移距离
4、模拟人的行为轨迹
5、开始滑动

'''

# 截图图片函数
def cut_image(driver):
    # 获取整个页面图片,图片名字为'snap.png'
    driver.save_screenshot('snap.png')

    # 获取滑动小画图
    image = driver.find_element_by_class_name('geetest_canvas_img')
    print(image.location)
    print(image.size)

    # 获取小图片的左上右下的位置
    left = image.location['x']
    top = image.location['y']
    right = left + image.size['width']
    buttom = top + image.size['height']
    print(left, top, right, buttom)

    # 调用open方法打开全屏图片并赋值给image_obj对象
    image_obj = Image.open('snap.png')

    # 通过image_obj对象对小图片进行截取
    # box: The crop rectangle, as a (left, upper, right, lower)-tuple.
    img = image_obj.crop((left, top, right, buttom))
    # 打开截取后的小图片
    # img.show()
    return img

# 获取完整图片
def get_image1(driver):
    time.sleep(2)

    # 修改document文档树,把完整图片的display属性修改为block
    js_code = '''
        var x = document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display = "block";
    '''

    # 执行js代码
    driver.execute_script(js_code)

    # 截取图片
    image = cut_image(driver)

    return image

# 获取有缺口图片
def get_image2(driver):
    time.sleep(2)

    # 修改document文档树,把完整图片的display属性修改为block
    js_code = '''
        var x = document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display = "none";
    '''

    # 执行js代码
    driver.execute_script(js_code)

    # 截取图片
    image = cut_image(driver)

    return image

# 获取滑块滑动距离
def get_distance(image1, image2):
    # 小滑块右侧位置
    start = 60

    # 像素差
    num = 60
    print(image1.size)
    for x in range(start, image1.size[0]):
        for y in range(image1.size[1]):

            # 获取image1完整图片每一个坐标的像素点
            rgb1 = image1.load()[x, y]

            # 获取image2缺口图片每一个坐标的像素点
            rgb2 = image2.load()[x, y]
            # (60, 86, 40) (60, 86, 40) rgb
            print(rgb1, rgb2)

            # abs获取绝对值, 像素点比较的值
            r = abs(rgb1[0] - rgb2[0])
            g = abs(rgb1[1] - rgb2[1])
            b = abs(rgb1[2] - rgb2[2])

            # 如果条件成立,则找到缺口位置
            if not (r < num and g < num and b < num):
                # 有误差 - 7像素
                return x - 7

# 模拟人的滑动轨迹
def get_strck_move(distance):
    distance += 20

    '''
    滑动行为轨迹
    加速公式:
        v = v0 + a * t
        
    路程公式:
        s = v0 * t + 0.5 * a * (t ** 2)
    '''

    # 初速度
    v0 = 0

    # 时间
    t = 0.2

    # 位置
    s = 0

    # 滑动轨迹列表 向前滑动列表
    move_list = []

    # 中间值,作为加减速度的位置
    mid = distance / 5 * 3

    # 加减速度列表
    v_list = [1, 2, 3, 4]

    # 循环位移
    while s < distance:
        if s < mid:
            # 随机获取一个加速度
            a = v_list[random.randint(0, len(v_list) - 1)]

        else:
            # 随机获取一个减速度
            a = -v_list[random.randint(0, len(v_list) - 1)]

        '''
        匀加速\减速运行
        v = v0 + a * t

        位移:
        s = v * t + 0.5 * a * (t**2)
        '''
        # 获取初始速度
        v = v0

        # 路程公式:
        s1 = v * t + 0.5 * a * (t ** 2)
        s1 = round(s1)  # 取整

        # 加速公式:
        # v = v0 + a * t
        m_v = v + a * t

        # 把当前加/减速度赋值给初始速度,以便下一次计算
        v0 = m_v

        # 把位移添加到滑动列表中
        move_list.append(s1)

        # 修改滑动初始距离
        s += s1

    # 后退列表, 自定义后退滑动轨迹,必须是负值
    back_list = [-1, -1, -2, -3, -2, -1, -1, -2, -3, -2, -1, -1]

    return {'move_list': move_list, 'back_list': back_list}

def main():
    driver = webdriver.Chrome()



    driver.implicitly_wait(10)
    try:
        driver.get('https://account.cnblogs.com/signin?returnUrl=https%3A%2F%2Fwww.cnblogs.com%2F')

        # 1、输入用户名与密码,并点击登录
        user_input = driver.find_element_by_id('LoginName')
        user_input.send_keys('_tank_')
        time.sleep(0.2)

        pwd_input = driver.find_element_by_id('Password')
        pwd_input.send_keys('k46709394.')
        time.sleep(2)

        login_submit = driver.find_element_by_id('submitBtn')
        login_submit.click()

        # 2、获取完整的图片
        image1 = get_image1(driver)

        # 3、获取有缺口图片
        image2 = get_image2(driver)

        # 4、比对两张图片,获取滑动距离
        distance = get_distance(image1, image2)
        print(distance)

        # 5、模拟人的滑动轨迹
        move_dict = get_strck_move(distance)
        # 获取前进滑动轨迹
        move_list = move_dict['move_list']
        # 获取后退滑动轨迹
        back_list = move_dict['back_list']

        # 6、开始滑动
        move_tag = driver.find_element_by_class_name('geetest_slider_button')
        # 点击摁住滑动按钮
        ActionChains(driver).click_and_hold(move_tag).perform()

        # 向前滑动
        for move in move_list:
            ActionChains(driver).move_by_offset(xoffset=move, yoffset=0).perform()
            time.sleep(0.1)

        time.sleep(0.1)

        # 向后滑动
        for back in back_list:
            ActionChains(driver).move_by_offset(xoffset=back, yoffset=0).perform()
            time.sleep(0.1)

        # 制作微妙晃动
        ActionChains(driver).move_by_offset(xoffset=3, yoffset=0).perform()
        ActionChains(driver).move_by_offset(xoffset=-3, yoffset=0).perform()

        time.sleep(0.1)

        # 释放滑动按钮
        ActionChains(driver).release().perform()

        time.sleep(100)

    finally:
        driver.close()

if __name__ == '__main__':
    main()

  豌豆荚的网页主页爬取

'''
主页:
    图标地址、下载次数、大小、详情页地址

详情页:
    游戏名、图标名、好评率、评论数、小编点评、简介、网友评论、1-5张截图链接地址、下载地址
https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=1&ctoken=FRsWKgWBqMBZLdxLaK4iem9B

https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=2&ctoken=FRsWKgWBqMBZLdxLaK4iem9B

https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=3&ctoken=FRsWKgWBqMBZLdxLaK4iem9B

32
'''
import requests
from bs4 import BeautifulSoup
# 1、发送请求
def get_page(url):
    response = requests.get(url)
    return response

# 2、开始解析
# 解析主页
def parse_index(data):
    soup = BeautifulSoup(data, 'lxml')

    # 获取所有app的li标签
    app_list = soup.find_all(name='li', attrs={"class": "card"})
    for app in app_list:
        # print('tank *' * 1000)
        # print(app)
        # 图标地址
        img = app.find(name='img').attrs['data-original']
        print(img)

        # 下载次数
        down_num = app.find(name='span', attrs={"class": "install-count"}).text
        print(down_num)

        import re
        # 大小
        size = soup.find(name='span', text=re.compile("\d+MB")).text
        print(size)

        # 详情页地址
        detail_url = soup.find(name='a', attrs={"class": "detail-check-btn"}).attrs['href']
        print(detail_url)


def main():
    for line in range(1, 33):
        url = f"https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={line}&ctoken=FRsWKgWBqMBZLdxLaK4iem9B"

        # 1、往app接口发送请求
        response = get_page(url)
        # print(response.text)
        print('*' * 1000)
        # 反序列化为字典
        data = response.json()
        # 获取接口中app标签数据
        app_li = data['data']['content']
        # print(app_li)
        # 2、解析app标签数据
        parse_index(app_li)


if __name__ == '__main__':
    main()

  

猜你喜欢

转载自www.cnblogs.com/xiaohuangxiong/p/11068027.html
今日推荐