appium 爬取抖音

1.MongoDB.py

import pymongo
from pymongo.collection import Collection


client = pymongo.MongoClient(host='192.168.54.41',port=27017)
db = client['douyin']

def send_task():
    with open('douyin_hot_id.txt','r') as f:
        f_read = f.readlines()
        for i in f_read:
            task_info = {}
            task_info['share_id'] = i.replace('\n','')
            task_info['task_type'] = 'share_id'
            print('当前保存的task为%s:'%task_info)
            save_task(task_info)

def save_task(task):   # 保存数据到mongodb中
    task_collections = Collection(db,'douyin_task')
    task_collections.update({'share_id':task['share_id']},task,True)

def get_task(task_type):
    task_collections = Collection(db,'douyin_task')
    task = task_collections.find_one_and_delete({'task_type':task_type})
    return task

def delete_task(task):
    pass

def save_data(item):
    data_collections = Collection(db,'douyin_data')
    data_collections.insert(item)

2.decode.py

import json
try:
    from douyin.handle_mongo import save_task
except:
    from handle_mongo import save_task

def response(flow):
    if 'aweme/v1/user/follower/list/' in flow.request.url:
        for user in json.loads(flow.response.text)['followers']:
            douyin_info = {}
            douyin_info['share_id'] = user['uid']
            douyin_info['douyin_id'] = user['short_id']
            save_task(douyin_info)

3.douyin.py

import time
from selenium.webdriver.support.ui import WebDriverWait
from appium import webdriver

desired_caps = {}
desired_caps['platformName'] = 'Android'
desired_caps['deviceName'] = 'TGIRPJOBFUZ9IJSW'
desired_caps['platformVersion'] = '6.0'
desired_caps['appPackage'] = 'com.ss.android.ugc.aweme'
desired_caps['appActivity'] = 'com.ss.android.ugc.aweme.splash.SplashActivity'
desired_caps['noReset'] = True
desired_caps['unicodeKeyboard'] = True
desired_caps['resetKeyboard'] = True

driver = webdriver.Remote('http://192.168.54.56:4723/wd/hub', desired_caps)


def get_size(driver):
    x = driver.get_window_size()['width']
    y = driver.get_window_size()['height']
    return (x, y)


def handle_douyin(driver):
    # 处理权限
    try:
        while WebDriverWait(driver, 5).until(lambda x: x.find_element_by_xpath(
                "//android.widget.TextView[@resource-id='android:id/le_bottomsheet_default_title']")):
            driver.find_element_by_xpath(
                "//android.widget.Button[@resource-id='com.android.packageinstaller:id/permission_allow_button']").click()
    except:
        pass

    try:
        # 点击搜索
        print('点击搜索')
        if WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath(
                "//android.widget.ImageView[@resource-id='com.ss.android.ugc.aweme:id/ab_']")):
            driver.find_element_by_xpath(
                "//android.widget.ImageView[@resource-id='com.ss.android.ugc.aweme:id/ab_']").click()
    except:
        # [26,76][115,165]
        driver.tap([(26, 76), (115, 165)], 500)  # 控件所在的位置 不适用xpath
    # 定位搜索框
    if WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath(
            "//android.widget.EditText[@resource-id='com.ss.android.ugc.aweme:id/a4p']")):
        # 获取douyin_id进行搜索
        driver.find_element_by_xpath(
            "//android.widget.EditText[@resource-id='com.ss.android.ugc.aweme:id/a4p']").send_keys('lwnx1208')
        while driver.find_element_by_xpath(
                "//android.widget.EditText[@resource-id='com.ss.android.ugc.aweme:id/a4p']").text != 'lwnx1208':
            driver.find_element_by_xpath(
                "//android.widget.EditText[@resource-id='com.ss.android.ugc.aweme:id/a4p']").send_keys('lwnx1208')
            time.sleep(0.1)
    # 点击搜索
    driver.find_element_by_xpath("//android.widget.TextView[@resource-id='com.ss.android.ugc.aweme:id/a4r']").click()
    # 点击用户标签
    if WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath("//android.widget.TextView[@text='用户']")):
        driver.find_element_by_xpath("//android.widget.TextView[@text='用户']").click()
    # 点击头像
    if WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath(
            "//android.support.v7.widget.RecyclerView[@resource-id='com.ss.android.ugc.aweme:id/kh']/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.ImageView[1]")):
        driver.find_element_by_xpath(
            "//android.support.v7.widget.RecyclerView[@resource-id='com.ss.android.ugc.aweme:id/kh']/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.ImageView[1]").click()
    # 点击粉丝按钮
    if WebDriverWait(driver, 3).until(lambda x: x.find_element_by_xpath(
            "//android.widget.TextView[@resource-id='com.ss.android.ugc.aweme:id/a6a']")):
        driver.find_element_by_xpath(
            "//android.widget.TextView[@resource-id='com.ss.android.ugc.aweme:id/a6a']").click()

    l = get_size(driver)
    x1 = int(l[0] * 0.5)
    y1 = int(l[1] * 0.75)
    y2 = int(l[1] * 0.25)
    while True:
        if '没有更多了' in driver.page_source:
            break
        driver.swipe(x1, y1, x1, y2)
        time.sleep(0.5)


if __name__ == '__main__':
    handle_douyin(driver)

  

  

猜你喜欢

转载自www.cnblogs.com/yoyo1216/p/11114031.html
今日推荐