坦白说自动获取有效好友

收到的QQ坦白说通常会提供两条信息,比如‘一个天蝎座的男生’,‘一个认识5年的女生’,那如何找出符合以上条件的好友呢?如果你有几百甚至几千个好友,一个一个找不现实。自动寻找方法当然是通过网络爬虫获取信息再分析过滤实现。
思路:
1. 登录自己QQ空间,获取所有好友QQ号
2. 通过好友QQ号,爬好友空间,获得好友的星座、性别、年龄等个人信息
3. 通过自己QQ号和好友QQ号,获取认识天数,因为认识天数是和两个你和你的好友两个QQ号一起决定的
4. 如果遇到好友空间不让你访问,这个不影响以上信息,因为就算你不能访问好友空间,也可以看见好友的个人信息
5. 如果好友没有填写信息,你当然获取不到相关信息
编程语言:Python
用到的库:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re 
import datetime

用到的知识:
1. 通过网址访问云端获取好友信息
2. 用selenium和浏览器配合抓取网页
3. 用正则表达式等处理网页数据,获得有效信息

注意:频繁抓取网页会被腾讯当成违规操作,造成封号两个小时,我的QQ已经被封号两次了,现在还在封号中,通过每获取5个好友信息就退出登录,等两分钟后再登录也许,只是也许可以解决问题。

代码如下(改进登录时间):
下载代码也可以在链接:https://download.csdn.net/download/gengli2017/10619273
对于初学者,想知道一些代码意思,尽快写一个理解说明。
初学时也找不到资料。

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re 
import datetime

def frankSpeak(account, password) :
    myQQ = account
    driver = loginQQ(account, password)
    gtk, g_qzonetoken = getGtk_Token(driver)
    friendsDict = getFriends(driver, myQQ, gtk, g_qzonetoken)
    i = 1
    friendInfoVector = []
    for friendQQ in friendsDict :
        i += 1
        (age, city, constellation, gender, province) = getFriendInfo(driver, friendQQ, gtk, g_qzonetoken)
        knownTime = getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken)
        friendInfo = (friendQQ, friendsDict[friendQQ], age, gender, constellation, knownTime, province, city)
        #(age,city,constellation,gender,province)
        #male:gender=1, famale:gender=0, else:gender=2 
        friendInfoVector.append(friendInfo)

        #to aviod your QQ locked by Tenser, find 5 friends infomation then reload
        if(i % 5 == 0) :
            time.sleep(120)
            driver = loginQQ(account, password)
            gtk, g_qzonetoken = getGtk_Token(driver)

    print('一个双鱼座的女生') 
    for i in range(len(friendInfoVector)) :
        if(friendInfoVector[i][4] == '双鱼座' and gender == '0') :
            print(friendInfoVector[i][1])



    print('所有好友信息')
    print('(QQ号, 备注, 年龄, 性别, 星座, 认识时间, 省份, 城市)')
    print('女:性别=0   男:性别=1   未注明:性别=2')
    for i in range(len(friendInfoVector)) :
        print(friendInfoVector[i])

 #  print(gtk)
 #  print(g_qzonetoken)
 #  print(friendsDick)


#登录QQ,获取QQ页面 
def loginQQ(account, password) :
    chrome_options = Options()
    chrome_options.add_argument("--disable-infobars")
    driver = webdriver.Chrome() #这个是chormedriver的地址   
    driver.get('https://qzone.qq.com/')
    driver.switch_to.frame('login_frame')
    driver.find_element_by_id('switcher_plogin').click()
    driver.find_element_by_id('u').clear()
    driver.find_element_by_id('u').send_keys(account) 
    driver.find_element_by_id('p').clear()
    driver.find_element_by_id('p').send_keys(password)
    driver.find_element_by_id('login_button').click()
    time.sleep(2)
    return driver


#从Cookie获取GTK
def getGTKFromCookie(cookie):
    hashes = 5381
    for letter in cookie['p_skey']:
        hashes += (hashes << 5) + ord(letter)
    return hashes & 0x7fffffff


#获取gtk和g_qzonetoken    
def getGtk_Token(driver) :
    cookie = {}                                 #初始化cookie字典
    for elem in driver.get_cookies():           #取cookies
        cookie[elem['name']] = elem['value']
    gtk = getGTKFromCookie(cookie)

    html = driver.page_source
    g_qzonetoken=re.search('window\.g_qzonetoken = \(function\(\)\{ try\{return (.*?);\} catch\(e\)',html)
    #从网页源码中提取g_qzonetoken
    g_qzonetoken = str(g_qzonetoken[0]).split('\"')[1]
    return gtk, g_qzonetoken


#获得好友列表 
def getFriends(driver, myQQ, gtk, g_qzonetoken) :
    friendUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_hat_get.cgi?hat_seed=1&uin=' + str(myQQ) +'&fupdate=1&g_tk='+str(gtk)+'&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)

    driver.get(friendUrl)
    friend_list = driver.page_source
    friend_list = str(friend_list )

    pattern  =  re.compile('\"(.\d*)\":\{\\n"realname":"(.*?)"}',re.S)
    QQ_name_list = re.findall(pattern, str(friend_list))
    friendDick=dict()          #numList => (QQnum:QQname)
    for friend in QQ_name_list:
        friendDick[str(friend[0])]=str(friend[1]) 

    return friendDick


#通过好友QQ号获取好友信息    
def getFriendInfo(driver, friendQQ, gtk, g_qzonetoken) :
    friendInfoUrl = 'https://mobile.qzone.qq.com/profile_get?qzonetoken='+str(g_qzonetoken) + '&g_tk='+str(gtk)+'&format=json&hostuin=' + str(friendQQ)

    driver.get(friendInfoUrl)
    friendInfo = driver.page_source
    friendInfo = str(friendInfo)

    #savefile(str(friendQQ)+'txt', friendInfo)

    pattern = re.compile( r'"age":(\d*).*"city":"(\w*)".*"constellation":"(\w*).*"gender":(-?\d*).*"province":"(\w*)"')
    usefulInfo = pattern.findall(friendInfo)

    if (any(usefulInfo)) :
        return usefulInfo[0]
    else :
        return ('-1','NULL','NULL','2','NULL')
    #usefulInfo=[(age,city,constellation,gender,province)]
    #usefulInfo[0]=(age,city,constellation,gender,province)
    #male:gender=1, famale:gender=0, else:gender=2


#通过自己QQ和好友QQ获取认识的时间    
def getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken) :
    knownDaysUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/friendship/cgi_friendship?activeuin=' + str(myQQ) +'&passiveuin=' + str(friendQQ) +'&situation=1&isCalendar=1&g_tk='+str(gtk) + '&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)

    driver.get(knownDaysUrl)
    knownDaysInfo = driver.page_source
    knownDaysInfo = str(knownDaysInfo)

    beginStamp = re.search(r'"addFriendTime":(\d+)', knownDaysInfo)
    beginStamp = str(beginStamp.group(1))
    beginTime = datetime.date.fromtimestamp(int(beginStamp))
    beginY = beginTime.year
    beginM = beginTime.month
    beginD = beginTime.day

    lastStamp = re.search(r'"systemTime":(\d+)', knownDaysInfo)
    lastStamp = str(lastStamp.group(1))
    lastTime = datetime.date.fromtimestamp(int(lastStamp))
    lastY = lastTime.year
    lastM = lastTime.month
    lastD = lastTime.day

    if(lastY > beginY) :
        return (str(lastY - beginY + 1) + '年')
    elif(lastM > lastM) :
        return (str(lastM - beginM + 1) + '月')
    else :
        return (str(lastD - beginD + 1) + '日'


#运行程序,把QQNumber换成要登录QQ, password换成密码     
frankSpeak('QQNumber', 'password')

猜你喜欢

转载自blog.csdn.net/gengli2017/article/details/81940707