爬取微信自动登录并初始化

这两天从早上写代码到半夜,终于找回一点做程序员的感觉,人闲太久了真没劲,所以没事可以多定定计划,找一找奋斗的感觉挺好。

闲话不多说,今天笔记主要是记录对微信爬取的整个过程

爬取分为几个步骤:

1.实现登录

2.实现用户初始化,获取最近联系人以及所有用户信息

3.实现对用户发送消息

4.实现对消息的接收

本文主要是使用flask框架以及requests库以及bs4来进行爬虫

代码结构:

步骤:

1.创建flask框架Wechat,到manage.py里面定义登录login函数:

#-*-coding:utf-8-*-
from flask import Flask,request,render_template,session,jsonify
import time
import requests,re
import json
from bs4 import BeautifulSoup
app = Flask(__name__)
app.debug = True

app.secret_key='abcdefghigklmn'
@app.route('/login',methods=['GET','POST'])
def login():
    if request.method == 'GET':
        ctime = str(int(time.time()*1000))
        qcode_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}'.format(ctime)
        res = requests.get(qcode_url)
        print res.text
        qcode = re.findall('uuid = "(.*)";',res.text)[0]
        # print qcode
        session['qcode'] = qcode
        return render_template('login.html',qcode=qcode)
    else:
        pass


@app.route('/check_login')
def check_login():
'''
发送GET请求检测是否已经扫码,登陆
https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=gbG3TQrkaA==&tip=0&r=-925318273&_=1529933650035
:return:
'''
response = {'code':408}
qcode = session.get('qcode')
ctime = str(int(time.time() * 1000))
check_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip=0&r=-925318273&_={1}'.format(qcode,ctime)
req = requests.get(check_url)
# print req.text
# print'获取的是登陆的状态'
if "code=201" in req.text:
#用户已扫码,获取用户头像
src = re.findall("userAvatar = '(.*)';",req.text)[0]
print 'src==',src
response['code'] = 201
response['src'] = src

# else:
# #用户未扫码
elif 'code=200' in req.text:
#确认登录

redirect_uri = re.findall('redirect_uri="(.*)";',req.text)[0]
# print ' redirect_uri', redirect_uri

redirect_uri = redirect_uri + '&fun=new&version=v2'
ticket_ret = requests.get(redirect_uri)
# print 'ticket_ret.text',ticket_ret.text
ticket_dict = xml_parser(ticket_ret.text)

session['ticket_dict']=ticket_dict
session['ticket_cookie'] = ticket_ret.cookies.get_dict()
response['code'] = 200
return jsonify(response)



def xml_parser(text):
dic ={}
soup = BeautifulSoup(text,'html.parser')
div = soup.find(name='error')
for item in div.find_all(recursive=False):
dic[item.name]=item.text
return dic
 

创建login.html,代码如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<div style="width:200px;margin:0 auto">
   <h1 style="text-align: center">  登陆</h1>
    <image id="img" style="height:200px;width:200px;" src="https://login.wx.qq.com/qrcode/{{qcode}}"></image>
</div>
<script src="/static/jquery-1.12.4.min.js"></script>
<script>
    $(function () {
        checkLogin();
    })
    function checkLogin(){
        $.ajax({
            url:'/check_login',
            type:'GET',
            dataType:'JSON',
            success:function(arg){
                if (arg.code === 201){
                    //扫码成功
                    console.log('src:',arg.src)

                    $('#img').attr('src',arg.src);
                    checkLogin();
                }else if(arg.code === 200){
                    //重定向到用户列表
                    location.href = '/index'
                }else{
                    checkLogin();
                }

            }
        })
    }
</script>

</body>
</html>

2.用户初始化并获取用户头像代码实现:

#用户初始化步骤
@app.route('/index')
def index():
    '''用户初始化信息'''
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')
    # print 'ticket_dict',ticket_dict  https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2115319983
    init_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-2132117709&pass_ticket={0}'.format(ticket_dict.get('pass_ticket'))
    data_dict = {
        'BaseRequest': {
            'DeviceID': "e292711087499063",
            'Sid': ticket_dict.get('wxsid'),
            'Uin': ticket_dict.get('wxuid'),
            'Skey': ticket_dict.get('skey'),
        },

    }
    init_ret = requests.post(
        url=init_url,
        json = data_dict,
        cookies= ticket_cookie,
       #这样就是jaon格式的数据了  相当于data = json.dumps(data_dict),headers = {'Content-type':''}
    )

    init_ret.encoding = 'utf-8'
    user_dict = init_ret.json()
    # get_img()
    session['current_user'] = user_dict['User']
    session['synckey'] = user_dict['SyncKey']
    return render_template('index.html',user_dict=user_dict)

@app.route('/get_img')
def get_img():
        #获取头像#
        #  https# ://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgeticon?seq=1182160498&username=@f04bb7e4d7821f504a4992ca85be95aa3e9957c7e3dfb224dc467af8639450e7&skey=@crypt_a1d89414_e0cf3503fac08d5ac1bf9fadcae86c0d
    current_user = session['current_user']
    ticket_cookie = session.get('ticket_cookie')
    head_url = "https://wx.qq.com"+current_user["HeadImgUrl"]
    img_ret = requests.get(head_url,cookies=ticket_cookie,headers={'Content-Type':'image/jpg'})
    return img_ret.content

前端代码index.html如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<h1>欢迎登陆{{user_dict.User.NickName}}</h1>
<div>
    <img src="/get_img" alt="">
    <h2>{{user_dict.User.NickName}}</h2>
    <h2>{{user_dict.User.UserName}}</h2>

</div>
<h3>最近登录联系人</h3>
<ul>
    {%  for user in user_dict.ContactList %}
    <li>{{ user.NickName}}</li>
    {% endfor %}
</ul>
<a href="/user_list">查看所有联系人</a>
</body>
</html>

3.到这里就能够实现自动登录并获取到最近联系人,接着我们获取所有联系人及信息

@app.route('/user_list')
def user_list():
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')

    ctime = int(time.time()*1000)
    skey = ticket_dict.get('skey')

    pass_ticket = ticket_dict.get('pass_ticket')
    user_list_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}".format(pass_ticket,ctime,skey)

    r1 = requests.get(user_list_url,cookies=ticket_cookie)

    r1.encoding = 'utf-8'
    wx_user_dict = r1.json()

    return render_template('user_list.html',wx_user_dict=wx_user_dict)

前端代码如下:

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
     <div>
         <div style="width:30%;float:left;">
             <h3>{{wx_user_dict.MemberCount}}</h3>
             <ul>
                 {% for item in wx_user_dict.MemberList %}
                 <li>{{ item.NickName }} ===== {{item.UserName}}</li>
                 {% endfor %}
             </ul>
         </div>
         <div style="width:7%;float:right;>

         </div>
     </div>
</body>
</html>

4.接下来可以实现发送消息的功能

首先创建前端send.html页面

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<form action="" method="post">
    <input type="text" name="to">
    <input type="text" name="content">
    <input type="submit" value="发送">
</form>
</body>
</html>

后台实现逻辑如下:

@app.route('/send',methods=['GET','POST'])
def send():
    if request.method == "GET":
        return render_template('send.html')
    current_user = session['current_user']
    ticket_dict = session.get('ticket_dict')
    ticket_cookie = session.get('ticket_cookie')
    pass_ticket = ticket_dict.get('pass_ticket')
    from_user = current_user["UserName"]
    to = request.form.get('to')
    content = request.form.get('content')
    ctime = str(time.time()*1000)
    msg_url = 'https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?pass_ticket={0}'.format(pass_ticket)
    data_dict = {
        'BaseRequest':{
            'DeviceID': "e956888515941054",
            'Sid': ticket_dict.get('wxsid'),
            'Uin': ticket_dict.get('wxuid'),
            'Skey': ticket_dict.get('skey'),
        },
        'Msg':{
            'ClientMsgId':ctime,
            'LocalID':ctime,
            'FromUserName':from_user,
            'ToUserName':to,
            'Content':content,
            'Type':1
        },
        'scene':0
    }
    ret = requests.post(
        url = msg_url,
        data = json.dumps(data_dict,ensure_ascii=False),
        cookies = ticket_cookie,
     
    )
    return ret.text

5.实现获取消息代码:

首先定义get_msg.html文件

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title</title>
</head>
<body>
<ul>
    {% for item in content.AddMsgList %}
        <li>
            <!--{{ item }}-->
            {{ item['Content']}} From-->  {{item['FromUserName']}}  To-->  {{item['ToUserName']}}
        </li>
    {% endfor %}
</ul>
<scrip src="/static/jquery-1.12.4.js"></scrip>
<script>
    $(function(){
        fetchMessage();
    });
    function fetchMessage(){
        $.ajax({
            url:'/get_msg',
            type: 'GET',
            success:function(arg){
                fetchMessage();
            }
        })
    }
</script>
</body>
</html>

后台实现逻辑如下:

@app.route('/get_msg')
def get_msg():

    #检查是否有新消息到来
    SyncKey_1 =    session['synckey']
    sync_url = "https://webpush.wx.qq.com/cgi-bin/mmwebwx-bin/synccheck"
    sync_data_list = []
    for item in SyncKey_1['List']:
        temp = "%s_%s"%(item['Key'],item['Val'])
        sync_data_list.append(temp)
    sync_data_str = "|".join(sync_data_list)
    nid = int(time.time())
    ticket_dict = session.get('ticket_dict')

    sync_dict = {
        "r":nid,
        "skey": ticket_dict['skey'],
        "sid":ticket_dict['wxsid'],
        "uin":ticket_dict['wxuin'],
        "deviceid":"e590082815481369",
        "synckey":sync_data_str,
    }
    ticket_cookie = session.get('ticket_cookie')
    response_sync = requests.get(sync_url,params=sync_dict,cookies=ticket_cookie)
    pass_ticket = ticket_dict.get('pass_ticket')
    #获取消息内容
    if 'selector:"2"' in response_sync.text:
        fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid={0}&skey={1}&lang=zh_CN&pass_ticket={2}".format(ticket_dict['wxsid'],ticket_dict['skey'],pass_ticket)
        form_data = {
            'BaseRequest':{
                'DeviceID':"e616487029833324",
                'Sid':ticket_dict['wxsid'],
                'Skey':ticket_dict['skey'],
                'Uin':ticket_dict['wxuin'],
            },
            'SyncKey':SyncKey_1,
            'rr':str(time.time())
        }
        response_fetch_msg = requests.post(fetch_msg_url,json=form_data)
        response_fetch_msg.encoding = 'utf-8'
        content = response_fetch_msg.json()

        return render_template('get_msg.html',content=content)

最后:

if __name__ == '__main__':
    app.run()

执行代码即可实现微信网页版自动登录,获取联系人信息,发送并接收消息

猜你喜欢

转载自www.cnblogs.com/ymb2580/p/9288071.html