python伪代码之自动化工作1【内向即蛔虫--王奕君】

代码很伪,不适于学习

# -*- coding:utf-8 -*-
import os
from math import isnan

import pandas as pd
import pyautogui
import requests
import xlrd
import xlwt
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from xlutils.copy import copy


def judge_name(row: str):
    row = row.strip()
    if row == '夜听与你真情入眠(夜听与您温情入眠)':
        row = '夜听与您温情入眠'
        return row
    elif row == '来学最美广场':
        row = '来学最美广场舞(原名:夜听好眠入梦)'
        return row
    else:
        return row


date = '8.21'
TASK_DAY = '2018-08-21'
number = 231
origin_file = r'C:\Users\小明\Desktop\【数据组】变现号及闲置号统计表8.22汇总.xlsx'
new_file = r'C:\Users\小明\Desktop\上海账号统计2018.8.22.xlsx'
end_file = './上海.xlsx'
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 10000)
pd.set_option('expand_frame_repr', False)
if not os.path.exists(end_file):
    df = pd.read_excel(origin_file, sheet_name=[1, 3])
    bianxian, xianzhi = df[1], df[3]
    origin_container = {}
    for index, row in bianxian.iterrows():
        origin_container[row["公众号名称"].strip()] = [row["({})总粉丝数".format(date)], row['({})取消关注人数'.format(date)],
                                                  row['({})新增关注人数'.format(date)]]
    for index, row in xianzhi.iterrows():
        origin_container[row["公众号名称"].strip()] = [row["({})总粉丝数".format(date)], row['({})取消关注人数'.format(date)],
                                                  row['({})新增关注人数'.format(date)]]
    new_container = {}
    d = pd.read_excel(new_file, sheetname='男号+女号')
    df2 = d.reset_index()
    df2 = df2[number - 2::]
    workbook = xlwt.Workbook(encoding='utf-8')
    rb = xlrd.open_workbook(new_file)
    wb = copy(rb)
    ws = wb.get_sheet(2)
    for index, row in df2.iterrows():
        if type(row['level_1']) == float:
            continue
        new_row = judge_name(row['level_1'])
        if new_row in origin_container:
            data = origin_container[new_row]
            ws.write(index + 1, 3, data[0])
            ws.write(index + 1, 4, data[1])
            ws.write(index + 1, 5, data[-1])
        else:
            continue
        wb.save(end_file)
df = pd.read_excel(end_file, sheet_name=[2])[2]
df2 = df.reset_index()
web_data = df2[number - 2::]
web_dict = {}
for index, row in web_data.iterrows():
    row_key = row['level_2']
    if row['level_1'] == '一齐来学习广场舞' or row['level_1'] == '魅力广场舞等着您' or row['level_1'] == '一起来约广场舞':
        continue
    if isinstance(row['level_2'], str) and isnan(row['总粉数']):
        web_dict[row_key] = index
print(len(web_dict))
ACCOUNT_URL = 'http://ws.tuwme.com/smaccount/lists'
SEARCH_URL = 'http://ws.tuwme.com/smaccount/gotoken/pid/{}'
EXTENTIONS_URL = 'chrome://extensions/'
ANALYSICS_URL = 'https://mp.weixin.qq.com/misc/useranalysis?&token={}'
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2,
         "javascript": 2, }
chrome_options.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.implicitly_wait(10)
browser.get(url=EXTENTIONS_URL)
pyautogui.moveTo(300, 40, duration=0.5)
pyautogui.dragRel(100, 0, duration=0.5)
pyautogui.moveTo(40, 40, duration=0.5)
pyautogui.dragRel(300, 300, duration=0.5)
pyautogui.moveTo(630, 260, duration=3)
pyautogui.click()
browser.get(url='http://ws.tuwme.com/smaccount/lists')
browser.add_cookie({'name': 'PHPSESSID', 'value': 'je8crao0g3pke77rq2apiqvnc3'})
browser.add_cookie({'name': 'crm_uuid', 'value': '28a7de409e61728dc128c2f6f46ae3cc'})
browser.refresh()


def get_pagecode(key_):
    while True:
        try:
            browser.get(url=SEARCH_URL.format(key_))
            WebDriverWait(browser, timeout=10, poll_frequency=0.5) \
                .until(EC.visibility_of_element_located((By.CLASS_NAME, 'weui-desktop-mass-media__data__inner')))
            print('pagecode獲取成功')
        except Exception as e:
            continue
        else:
            break


rb = xlrd.open_workbook(end_file)
wb = copy(rb)
ws = wb.get_sheet(2)
for key_, value_ in web_dict.items():
    print('当前的key_', key_)
    get_pagecode(key_)
    cookie = {}
    for i in browser.get_cookies():
        cookie[i['name']] = i['value']
    while True:
        try:
            token = os.path.basename(browser.current_url).split('token')[1].replace('=', '')
            break
        except Exception as e:
            pass
    analysis_url = ANALYSICS_URL.format(token)
    while True:
        try:
            response = requests.get(analysis_url, cookies=cookie, timeout=10)
            pagecode = response.text.split('date: "{}"'.format(TASK_DAY))[1].split('}')[0]
            break
        except Exception as e:
            continue
    p = pagecode.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
    new_user = p.split('new_user:')[-1]
    cumulate_user = p.split('cumulate_user:')[-1].split(',')[0]
    cancel_user = p.split('cancel_user:')[-1].split(',')[0].split(':')[-1]
    print(key_, value_)
    ws.write(value_ + 1, 3, cumulate_user)
    ws.write(value_ + 1, 4, cancel_user)
    ws.write(value_ + 1, 5, new_user)
    wb.save('./red.xlsx')
 

猜你喜欢

转载自blog.csdn.net/qq_37995231/article/details/81979246
今日推荐