Python project: Get WeChat friend information (1) Grab WeChat friend data

The problems encountered in my own attempts to capture WeChat friend data .

First post the WeChat friend list to get the complete source code.

In the code: Use the QR code to log in to the web version of WeChat, download the friend's avatar, and save the friend's profile data into a csv file for further processing.

The environment I use is python3.5.


import them
import requests
import re
import time
import xml.dom.minidom
import json
import csv
import codecs
import sys
import math
import subprocess
import ssl
import threading
import urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse

DEBUG = False # Can be changed to True when testing, output test json data

MAX_GROUP_NUM = 2 # Number of people in each group
INTERFACE_CALLING_INTERVAL = 5 # Interface call time interval, if the interval is too short, "operations are too frequent", and the operation will be limited for about half an hour
MAX_PROGRESS_LEN = 50

QRImagePath = os.path.join(os.getcwd(), 'qrcode.jpg') # The location where the login QR code is stored does not need to be changed

tip = 0
uuid = ''

base_uri = ''
redirect_uri = ''
push_uri = ''

skey = ''
wxsid = ''
wxuin = ''
pass_ticket = ''
deviceId = 'e000000000000000'

BaseRequest = {}

ContactList = []
My = []
SyncKey = []

try:
    xrange
    range = xrange
except:
    # python 3
    pass


def responseState (func, BaseResponse):
    ErrMsg = BaseResponse['ErrMsg']
    Ret = BaseResponse ['Ret']
    if DEBUG or Ret != 0:
        print('func: %s, Ret: %d, ErrMsg: %s' % (func, Ret, ErrMsg))

    if Ret! = 0:
        return False

    return True


def getUUID():
    global uuid

    url = 'https://login.weixin.qq.com/jslogin'
    params = {
        'appid': 'wx782c26e4c19acffb',
        'fun': 'new',
        'lang': 'zh_CN',
        '_': int(time.time()),
    }

    r = myRequests.get(url=url, params=params)
    r.encoding = 'utf-8'
    data = r.text

    # print(data)

    # window.QRLogin.code = 200; window.QRLogin.uuid = "oZwt_bFfRg==";
    regx = r'window.QRLogin.code = (\d+); window.QRLogin.uuid = "(\S+?)"'
    pm = re.search(regx, data)

    code = pm.group(1)
    uuid = pm.group(2)

    if code == '200':
        return True

    return False


def showQRImage():
    global tip

    url = 'https://login.weixin.qq.com/qrcode/' + uuid
    params = {
        't': 'webwx',
        '_': int(time.time()),
    }

    r = myRequests.get(url=url, params=params)

    tip = 1

    f = open(QRImagePath, 'wb')
    f.write(r.content)
    f.close()
    time.sleep(1)

    if sys.platform.find('darwin') >= 0:  # 'darwin'
        subprocess.call(['open', QRImagePath])
    elif sys.platform.find('linux') >= 0:
        subprocess.call(['xdg-open', QRImagePath])
    else:
        os.startfile (QRImagePath)

    print('Please use WeChat to scan the QR code to log in')


def waitForLogin():
    global type, bases, redirects, pushes

    url = 'https://login.weixin.qq.com/cgi-bin/mmwebwx-bin/login?tip=%s&uuid=%s&_=%s' % (
        tip, uuid, int(time.time()))

    r = myRequests.get(url=url)
    r.encoding = 'utf-8'
    data = r.text

    # print(data)

    # window.code=500;
    regx = r'window.code=(\d+);'
    pm = re.search(regx, data)

    code = pm.group(1)

    if code == '201': # scanned
        print('Successful scan, please click on the phone to confirm to log in')
        tip = 0
    elif code == '200': # logged in
        print('Login...')
        regx = r'window.redirect_uri="(\S+?)";'
        pm = re.search(regx, data)
        redirect_uri = pm.group(1) + '&fun=new'
        base_uri = redirect_uri [: redirect_uri.rfind ('/')]

        # The correspondence between push_uri and base_uri (ranked in order) (that's how wonderful..)
        services = [
            ('wx2.qq.com', 'webpush2.weixin.qq.com'),
            ('qq.com', 'webpush.weixin.qq.com'),
            ('web1.wechat.com', 'webpush1.wechat.com'),
            ('web2.wechat.com', 'webpush2.wechat.com'),
            ('wechat.com', 'webpush.wechat.com'),
            ('web1.wechatapp.com', 'webpush1.wechatapp.com'),
        ]
        push_uri = push_uri
        for (searchUrl, pushUrl) in services:
            if base_uri.find(searchUrl) >= 0:
                push_uri = 'https://%s/cgi-bin/mmwebwx-bin' % pushUrl
                break

        # closeQRImage
        if sys.platform.find('darwin') >= 0:  # for OSX with Preview
            os.system("osascript -e 'quit app \"Preview\"'")
    elif code == '408': # timeout
        pass
    # elif code == '400' or code == '500':

    return code


def login():
    global skey, wxsid, wxuin, pass_ticket, BaseRequest

    r = myRequests.get(url=redirect_uri)
    r.encoding = 'utf-8'
    data = r.text

    # print(data)

    doc = xml.dom.minidom.parseString(data)
    root = doc.documentElement

    for node in root.childNodes:
        if node.nodeName == 'skey':
            skey = node.childNodes[0].data
        elif node.nodeName == 'wxsid':
            wxsid = node.childNodes[0].data
        elif node.nodeName == 'wxuin':
            wxuin = node.childNodes[0].data
        elif node.nodeName == 'pass_ticket':
            pass_ticket = node.childNodes[0].data

    # print('skey: %s, wxsid: %s, wxuin: %s, pass_ticket: %s' % (skey, wxsid,
    # wxuin, pass_ticket))

    if not all((skey, wxsid, wxuin, pass_ticket)):
        return False

    BaseRequest = {
        'Uin': int (wxuin),
        'Sid': wxsid,
        'Skey': skey,
        'DeviceID': deviceId,
    }

    return True


def webwxinit():
    url = (base_uri +
           '/webwxinit?pass_ticket=%s&skey=%s&r=%s' % (
               pass_ticket, skey, int(time.time())))
    params = {'BaseRequest': BaseRequest}
    headers = {'content-type': 'application/json; charset=UTF-8'}

    r = myRequests.post(url=url, data=json.dumps(params), headers=headers)
    r.encoding = 'utf-8'
    data = r.json()

    if DEBUG:
        f = open(os.path.join(os.getcwd(), 'webwxinit.json'), 'wb')
        f.write(r.content)
        f.close()

    # print(data)

    global ContactList, My, SyncKey
    dic = data
    ContactList = dec['ContactList']
    My = dic['User']
    SyncKey = dic['SyncKey']

    state = responseState ('webwxinit', dic ['BaseResponse'])
    return state


def webwxgetcontact():
    url = (base_uri +
           '/webwxgetcontact?pass_ticket=%s&skey=%s&r=%s' % (
               pass_ticket, skey, int(time.time())))
    headers = {'content-type': 'application/json; charset=UTF-8'}

    r = myRequests.post(url=url, headers=headers)
    r.encoding = 'utf-8'
    data = r.json()

    if DEBUG:
        f = open(os.path.join(os.getcwd(), 'webwxgetcontact.json'), 'wb')
        f.write(r.content)
        f.close()

    dic = data
    MemberList = dic['MemberList']

    # Traverse in reverse order, otherwise there will be problems when deleting..
    SpecialUsers = ["newsapp", "fmessage", "filehelper", "weibo", "qqmail", "tmessage", "qmessage", "qqsync",
                    "floatbottle", "lbsapp", "shakeapp", "medianote", "qqfriend", "readerapp", "blogapp", "facebookapp",
                    "masssendapp",
                    "meishiapp", "feedsapp", "voip", "blogappweixin", "weixin", "brandsessionholder", "weixinreminder",
                    "wxid_novlwrv3lqwv11", "gh_22b87fa7cb3c", "officialaccounts", "notification_messages", "wxitil",
                    "userexperience_alarm"]
    for i in range(len(MemberList) - 1, -1, -1):
        Member = MemberList[i]
        if Member['VerifyFlag'] & 8 != 0: # Official account/service account
            MemberList.remove(Member)
        elif Member['UserName'] in SpecialUsers: # special account
            MemberList.remove(Member)
        elif Member['UserName'].find('@@') != -1:  # 群聊
            MemberList.remove(Member)
        elif Member['UserName'] == My['UserName']:  # 自己
            MemberList.remove(Member)

    return MemberList


def syncKey():
    SyncKeyItems = ['%s_%s' % (item['Key'], item['Val'])
                    for item in SyncKey['List']]
    SyncKeyStr = '|'.join(SyncKeyItems)
    return SyncKeyStr


def syncCheck():
    url = push_uri + '/synccheck?'
    params = {
        'skey': BaseRequest ['Skey'],
        'sid': BaseRequest ['Sid'],
        'uin': BaseRequest ['Uin'],
        'deviceId': BaseRequest['DeviceID'],
        'synckey': syncKey(),
        'r': int(time.time()),
    }

    r = myRequests.get(url=url, params=params)
    r.encoding = 'utf-8'
    data = r.text

    # print(data)

    # window.synccheck={retcode:"0",selector:"2"}
    regx = r'window.synccheck={retcode:"(\d+)",selector:"(\d+)"}'
    pm = re.search(regx, data)

    retcode = pm.group(1)
    selector = pm.group(2)

    return selector


def webwxsync():
    global SyncKey

    url = base_uri + '/webwxsync?lang=zh_CN&skey=%s&sid=%s&pass_ticket=%s' % (
        BaseRequest['Skey'], BaseRequest['Sid'], urllib.parse.quote_plus(pass_ticket))
    params = {
        'BaseRequest': BaseRequest,
        'SyncKey': SyncKey,
        'rr': ~int(time.time()),
    }
    headers = {'content-type': 'application/json; charset=UTF-8'}

    r = myRequests.post(url=url, data=json.dumps(params))
    r.encoding = 'utf-8'
    data = r.json()

    # print(data)

    dic = data
    SyncKey = dic['SyncKey']

    state = responseState('webwxsync', dic['BaseResponse'])
    return state


def heartBeatLoop():
    while True:
        selector = syncCheck()
        if selector != '0':
            webwxsync()
        time.sleep(1)


def main():
    global myRequests

    if hasattr(ssl, '_create_unverified_context'):
        ssl._create_default_https_context = ssl._create_unverified_context

    headers = {
        'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.125 Safari/537.36'}
    myRequests = requests.Session()
    myRequests.headers.update(headers)

    if not getUUID():
        print('Failed to get uuid')
        return

    print('Getting the QR code image...')
    showQRImage()

    while waitForLogin() != '200':
        pass

    os.remove(QRImagePath)

    if not login():
        print('Login failed')
        return

    if not webwxinit():
        print('Initialization failed')
        return

    MemberList = webwxgetcontact()

    threading.Thread(target=heartBeatLoop)

    MemberCount = len(MemberList)
    print('There are %s friends in the address book' % MemberCount)
    print(MemberList)

    d = {}
    imageIndex = 0
    # write to csv
    csvfile = open('friend2.csv', 'w', newline='')
    # csvfile.write(codecs.BOM_UTF8)
    writer = csv.writer(csvfile)
    writer.writerow(['name', 'city', 'male', 'star', 'signature', 'remark', 'alias', 'nick'])
    for Member in MemberList:
        global writer
        imageIndex = imageIndex + 1
        name = 'D:\\Python\\Demo\\image\\image' + str(imageIndex) + '.jpg'
        imageUrl = 'https://wx.qq.com' + Member['HeadImgUrl']
        r = myRequests.get(url=imageUrl, headers=headers)
        imageContent = (r.content)
        fileImage = open(name, 'wb')
        fileImage.write(imageContent)
        fileImage.close()
        print('Downloading: ' + str(imageIndex) + 'A friend's avatar')
        d[Member['UserName']] = (Member['NickName'], Member['RemarkName'])
        city = Member['City']
        city = 'nocity' if city == '' else  city
        name = Member['NickName']
        name = 'noname' if name == '' else  name
        sign = Member['Signature']
        sign = 'nosign' if sign == '' else  sign
        remark = Member['RemarkName']
        remark = 'noremark' if remark == '' else remark
        alias = Member['Alias']
        alias = 'noalias' if alias == '' else alias
        nick = Member['NickName']
        nick = 'nonick' if nick == '' else nick
        print(name, '  ^+*+^  ', city, '  ^+*+^  ', Member['Sex'], ' ^+*+^ ', Member['StarFriend'], ' ^+*+^ ', sign,
              ' ^+*+^ ', remark, ' ^+*+^ ', alias, ' ^+*+^ ', nick)
        # write to csv
        writer.writerow([name.encode('gbk', 'ignore').decode('gbk'), city.encode('gbk', 'ignore').decode('gbk'),
                         Member['Sex'],
                         Member['StarFriend'], sign.encode('gbk', 'ignore').decode('gbk'),
                         remark.encode('gbk', 'ignore').decode('gbk'),
                         alias.encode('gbk', 'ignore').decode('gbk'),
                         nick.encode('gbk', 'ignore').decode('gbk')])
    csvfile.close()

if __name__ == '__main__':
    main()
    print('The program has exited safely...')

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324618588&siteId=291194637