代码很伪,不适于学习
# -*- coding:utf-8 -*-
import datetime
import json
import os
import pyautogui
import requests
import xlrd
import xlwt
from lxml import html
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from xlutils.copy import copy
def get_week_day(date):
week_day_dict = {
0: '星期一',
1: '星期二',
2: '星期三',
3: '星期四',
4: '星期五',
5: '星期六',
6: '星期日',
}
day = date.weekday()
return week_day_dict[day]
def get_pagecode():
while True:
try:
browser.get(url=SEARCH_URL.format(value_['ghid']))
WebDriverWait(browser, timeout=20, poll_frequency=0.5) \
.until(EC.visibility_of_element_located((By.CLASS_NAME, 'weui-desktop-mass-media__data__inner')))
print('pagecode獲取成功')
except Exception as e:
continue
else:
pagecode = browser.page_source
return pagecode
def get_read(cookie, week, pagecode, token):
task_list = {'task': [], 'state': 'disappear', 'new_user': '', 'cumulate_user': ''}
tree = html.fromstring(pagecode)
trees = tree.xpath('//em[@class="weui-desktop-mass__time"]')
for tr in trees:
day = tr.xpath('./text()')[0].split(' ')[0]
if day in week:
task_list['state'] = ''
y = tr.xpath('../following-sibling::div[1]//div[@class="weui-desktop-mass-appmsg__bd"]')
for x in y:
read_state = []
read_number = x.xpath(
'normalize-space(./ul//span[@class="weui-desktop-mass-media__data__inner"]/text())')
href = x.xpath('./a/@href')
if read_number == '':
pagecode = get_pagecode()
get_read(cookie, week, pagecode, token)
while True:
try:
response = requests.get(url=href[0].replace(' ', ''), timeout=20, cookies=cookie)
break
except Exception as e:
continue
if '小说我们会定时删文的哦' in response.text.encode('gbk', 'ignore').decode('gbk'):
read_state.extend([read_number, '小说'])
else:
read_state.extend([read_number, ''])
task_list['task'].append(read_state)
analysis_url = ANALYSICS_URL.format(token)
while True:
try:
response = requests.get(analysis_url, cookies=cookie, timeout=20)
pagecode = response.text.split('date: "{}"'.format(TASK_DAY))[1].split('}')[0]
break
except Exception as e:
continue
p = pagecode.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
new_user = p.split('new_user:')[-1]
cumulate_user = p.split('cumulate_user:')[-1].split(',')[0]
task_list['new_user'] = new_user
task_list['cumulate_user'] = cumulate_user
return task_list
return task_list
FAILE = []
null = ''
TASK = {}
TASK_DAY = '2018-08-21'
# WORK_EXCEL=R'C:\Users\小明\Desktop\男号模板阅读数统计.xlsx'
ACCOUNT_URL = 'http://ws.tuwme.com/smaccount/lists'
SEARCH_URL = 'http://ws.tuwme.com/smaccount/gotoken/pid/{}'
ANALYSICS_URL = 'https://mp.weixin.qq.com/misc/useranalysis?&token={}'
STATISTICS_FILE = r'E:\public_statistics\work_1.xlsx'
ACCOUNT_FILE = r'E:\public_statistics\account_1.json'
EXTENTIONS_URL = 'chrome://extensions/'
# WEI_URL = 'https://mp.weixin.qq.com'
# MISTAKE_TXT=r'E:\public_statistics\{}.txt'
HEADERS = {'Host': 'ws.tuwme.com', 'Connection': 'keep-alive',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'}
TOTAL_STATE = 'http://ws.tuwme.com/smaccount/lists?offset=0&pageSize=1699&order=asc&wechatname='
session = requests.Session()
chrome_options = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2,
"javascript": 2, }
chrome_options.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.implicitly_wait(10)
browser.get(url=EXTENTIONS_URL)
pyautogui.moveTo(300, 40, duration=0.5)
pyautogui.dragRel(100, 0, duration=0.5)
pyautogui.moveTo(40, 40, duration=0.5)
pyautogui.dragRel(300, 300, duration=0.5)
pyautogui.moveTo(630, 260, duration=3)
pyautogui.click()
browser.get(url='http://ws.tuwme.com/smaccount/lists')
browser.add_cookie({'name': 'PHPSESSID', 'value': 'je8crao0g3pke77rq2apiqvnc3'})
browser.add_cookie({'name': 'crm_uuid', 'value': '28a7de409e61728dc128c2f6f46ae3cc'})
browser.refresh()
cookies = {}
for i in browser.get_cookies():
cookies[i['name']] = i['value']
while True:
try:
response = requests.get(url=TOTAL_STATE, cookies=cookies, timeout=20, headers=HEADERS)
break
except Exception as e:
continue
rows = eval(response.text)['rows']
with open(file=ACCOUNT_FILE) as folder:
data = json.load(folder)
for row in rows:
TASK_ = {}
if row['wechatname'] in data:
TASK_['ghid'] = row['ghid']
TASK_['reason'] = row['reason']
TASK_['position'] = data[row['wechatname']]
TASK[row['wechatname']] = TASK_
# workbook = xlwt.Workbook(encoding='utf-8')
rb = xlrd.open_workbook(STATISTICS_FILE)
wb = copy(rb)
print('--------------------------------现在开始----------------------------', TASK_DAY)
ws = wb.get_sheet(0)
for key_, value_ in TASK.items():
pagecode = ''
if value_['reason'] == '自动获取cookie成功':
cookie = {}
pagecode = get_pagecode()
for i in browser.get_cookies():
cookie[i['name']] = i['value']
while True:
try:
token = os.path.basename(browser.current_url).split('token')[1].replace('=', '')
break
except Exception as e:
pass
task_all = [int(i.lstrip('0')) for i in TASK_DAY.split('-')]
week = get_week_day(datetime.datetime(task_all[0], task_all[1], task_all[2]))
task_res = get_read(cookie, week, pagecode, token)
lateral = int(data[key_]) - 1
if not task_res['state']:
vertical = 2
for j in task_res['task']:
if not j[-1]:
ws.write(int(lateral), vertical, j[0])
vertical += 1
else:
ws.write(int(lateral), vertical, j[0] + j[-1])
vertical += 1
ws.write(lateral, 10, task_res['cumulate_user'])
ws.write(lateral, 11, task_res['new_user'])
else:
ws.write(lateral, 2, '屏蔽')
print(task_res, lateral, key_)
wb.save(STATISTICS_FILE)
print('----------------------------------------结束-------------------------------------------------')