代码很伪,不适于学习
# -*- coding:utf-8 -*-
import os
from math import isnan
import pandas as pd
import pyautogui
import requests
import xlrd
import xlwt
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from xlutils.copy import copy
def judge_name(row: str):
row = row.strip()
if row == '夜听与你真情入眠(夜听与您温情入眠)':
row = '夜听与您温情入眠'
return row
elif row == '来学最美广场':
row = '来学最美广场舞(原名:夜听好眠入梦)'
return row
else:
return row
date = '8.21'
TASK_DAY = '2018-08-21'
number = 231
origin_file = r'C:\Users\小明\Desktop\【数据组】变现号及闲置号统计表8.22汇总.xlsx'
new_file = r'C:\Users\小明\Desktop\上海账号统计2018.8.22.xlsx'
end_file = './上海.xlsx'
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 10000)
pd.set_option('expand_frame_repr', False)
if not os.path.exists(end_file):
df = pd.read_excel(origin_file, sheet_name=[1, 3])
bianxian, xianzhi = df[1], df[3]
origin_container = {}
for index, row in bianxian.iterrows():
origin_container[row["公众号名称"].strip()] = [row["({})总粉丝数".format(date)], row['({})取消关注人数'.format(date)],
row['({})新增关注人数'.format(date)]]
for index, row in xianzhi.iterrows():
origin_container[row["公众号名称"].strip()] = [row["({})总粉丝数".format(date)], row['({})取消关注人数'.format(date)],
row['({})新增关注人数'.format(date)]]
new_container = {}
d = pd.read_excel(new_file, sheetname='男号+女号')
df2 = d.reset_index()
df2 = df2[number - 2::]
workbook = xlwt.Workbook(encoding='utf-8')
rb = xlrd.open_workbook(new_file)
wb = copy(rb)
ws = wb.get_sheet(2)
for index, row in df2.iterrows():
if type(row['level_1']) == float:
continue
new_row = judge_name(row['level_1'])
if new_row in origin_container:
data = origin_container[new_row]
ws.write(index + 1, 3, data[0])
ws.write(index + 1, 4, data[1])
ws.write(index + 1, 5, data[-1])
else:
continue
wb.save(end_file)
df = pd.read_excel(end_file, sheet_name=[2])[2]
df2 = df.reset_index()
web_data = df2[number - 2::]
web_dict = {}
for index, row in web_data.iterrows():
row_key = row['level_2']
if row['level_1'] == '一齐来学习广场舞' or row['level_1'] == '魅力广场舞等着您' or row['level_1'] == '一起来约广场舞':
continue
if isinstance(row['level_2'], str) and isnan(row['总粉数']):
web_dict[row_key] = index
print(len(web_dict))
ACCOUNT_URL = 'http://ws.tuwme.com/smaccount/lists'
SEARCH_URL = 'http://ws.tuwme.com/smaccount/gotoken/pid/{}'
EXTENTIONS_URL = 'chrome://extensions/'
ANALYSICS_URL = 'https://mp.weixin.qq.com/misc/useranalysis?&token={}'
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2,
"javascript": 2, }
chrome_options.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.implicitly_wait(10)
browser.get(url=EXTENTIONS_URL)
pyautogui.moveTo(300, 40, duration=0.5)
pyautogui.dragRel(100, 0, duration=0.5)
pyautogui.moveTo(40, 40, duration=0.5)
pyautogui.dragRel(300, 300, duration=0.5)
pyautogui.moveTo(630, 260, duration=3)
pyautogui.click()
browser.get(url='http://ws.tuwme.com/smaccount/lists')
browser.add_cookie({'name': 'PHPSESSID', 'value': 'je8crao0g3pke77rq2apiqvnc3'})
browser.add_cookie({'name': 'crm_uuid', 'value': '28a7de409e61728dc128c2f6f46ae3cc'})
browser.refresh()
def get_pagecode(key_):
while True:
try:
browser.get(url=SEARCH_URL.format(key_))
WebDriverWait(browser, timeout=10, poll_frequency=0.5) \
.until(EC.visibility_of_element_located((By.CLASS_NAME, 'weui-desktop-mass-media__data__inner')))
print('pagecode獲取成功')
except Exception as e:
continue
else:
break
rb = xlrd.open_workbook(end_file)
wb = copy(rb)
ws = wb.get_sheet(2)
for key_, value_ in web_dict.items():
print('当前的key_', key_)
get_pagecode(key_)
cookie = {}
for i in browser.get_cookies():
cookie[i['name']] = i['value']
while True:
try:
token = os.path.basename(browser.current_url).split('token')[1].replace('=', '')
break
except Exception as e:
pass
analysis_url = ANALYSICS_URL.format(token)
while True:
try:
response = requests.get(analysis_url, cookies=cookie, timeout=10)
pagecode = response.text.split('date: "{}"'.format(TASK_DAY))[1].split('}')[0]
break
except Exception as e:
continue
p = pagecode.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
new_user = p.split('new_user:')[-1]
cumulate_user = p.split('cumulate_user:')[-1].split(',')[0]
cancel_user = p.split('cancel_user:')[-1].split(',')[0].split(':')[-1]
print(key_, value_)
ws.write(value_ + 1, 3, cumulate_user)
ws.write(value_ + 1, 4, cancel_user)
ws.write(value_ + 1, 5, new_user)
wb.save('./red.xlsx')