bing查询旁站脚本

 1 #!/usr/bin/env python
 2 # -*- coding: UTF-8 -*-
 3 #by i3ekr
 4 
 5 import re,optparse,sys,requests,time,os
 6 
 7 parse = optparse.OptionParser(usage="python %prog -i '127.0.0.1'",version="%prog 1.0")
 8 parse.add_option('-i','--ip',action='store',dest='ip',help='ip parse...')
 9 parse.add_option('-o','--out',action='store',dest='out',help='this parse is out result file exp:-o "/tmp/result.txt""')
10 parse.set_defaults(v=1.2)
11 options,args=parse.parse_args()
12 
13 
14 def gethtml(ip,page):
15     try:
16         html = requests.get("https://www.bing.com/search?q=ip:"+ip+"&qs=ds&first="+str(page)+"&FORM=PERE4").content
17         return html
18     except Exception as e:
19         return "访问错误"
20         exit()
21 
22 def chongfu():
23     with open("./tmp.txt","r") as f:
24         tmp = f.read()
25         url = tmp.split("\r\n")
26         for i in set(url):
27             with open("ok.txt","a") as f:
28                 f.write(i+'\r\n')
29 
30 def geturl(html):
31     try:
32         url = re.findall(r"(?<=<cite>).*?(?=</cite>)", html)
33         print url
34         for u in url:
35             with open("./tmp.txt","a") as f:
36                 f.write(u+"\r\n")
37                 f.close()
38     except Exception as e:
39         raise e
40 
41 #根据索引出来的搜索量来判断有多少个页面,返回值是页面数量
42 def result_page():
43     try:
44         num = str(re.findall(r"<span class=\"sb_count\">(.*?)</span><span class=\"ftrB\"",html)[0]).strip(" 条结果")
45         page = int(num.replace(",",""))
46         return page/10
47     except Exception as e:
48         print "没有与此相关的结果"
49         exit()
50 
51 
52 
53 
54 if __name__ == "__main__":
55     print """
56             =========================
57             [+] by i3ekr
58             [+] Blog nul1.cnblogs.com
59             [+] Time 2018/6/13
60             =========================
61     """
62     if len(sys.argv) > 2:
63         url_pangzhan = []
64         pg = 1
65         ip = options.ip
66         f = False
67         while True:
68             if f == False:
69                 html = requests.get("https://www.bing.com/search?q=ip:"+ip+"&qs=ds&first=1&FORM=PERE4").content
70                 result_page()
71                 f = True
72             else:
73                 for i in xrange(0,result_page()):
74                     html = gethtml(ip,pg)
75                     url = geturl(html)
76                     print "第[%s]页"%(i+1)
77                     pg+=11
78 
79                 chongfu()
80                 os.remove('tmp.txt')
81                 exit()
82             
83     else:
84         print options.usage()
85         exit()

猜你喜欢

转载自www.cnblogs.com/nul1/p/9189692.html