工具篇–子域名收集
import sys
from threading import Thread
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
def bing_search(site, page):
headers = {
'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'85.0.4183.102 Safari/537.36',
'Accept-Encoding': 'gzip,deflate',
'Accept-Language': 'en-US,en;q=0,5',
'Referer': 'https://cn.bing.com/search?q=site%3Abaidu.com&qs=n&form=QBLH&sp=-1&pq=site%3Abaidu.com'
'&sc=0-14&sk=&cvid=852BA524E035477EBE906058D68F4D70',
'cookie': 'SRCHD=AF=WNSGPH; SRCHUID=V=2&GUID=D1F8852A6B034B4CB229A2323F653242&dmnchg=1; _EDGE_V=1; '
'MUID=304D7AA1FB94692B1EB575D7FABA68BD; MUIDB=304D7AA1FB94692B1EB575D7FABA68BD; '
'_SS=SID=1C2F6FA53C956FED2CBD60D33DBB6EEE&bIm=75:; ipv6=hit=1604307539716&t=4; '
'_EDGE_S=F=1&SID=1C2F6FA53C956FED2CBD60D33DBB6EEE&mkt=zh-cn; SRCHUSR=DOB=20200826&T=1604303946000;'
' SRCHHPGUSR=HV=1604303950&WTS=63739900737&CW=1250&CH=155&DPR=1.5&UTC=480&DM=0&BZA=0&BRW=N&BRH=S'
}
for i in range(1, int(page) + 1):
url = "https://cn.bing.com/search?q=site:" + site + "&go=Search&qs=ds&first=" + str((int(i) - 1) * 10 + 1)
html = requests.get(url, headers=headers)
soup = BeautifulSoup(html.content, 'html.parser')
job_bt = soup.findAll('h2')
for j in job_bt:
link = j.a.get('href')
domain = str(urlparse(link).scheme + "://" + urlparse(link).netloc)
if domain in Subdomain:
pass
else:
Subdomain.append(domain)
def baidu_search(site, page):
headers = {
'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'85.0.4183.102 Safari/537.36',
'Referer': 'https://www.baidu.com/s?wd=nsfocus'
}
for i in range(1, int(page) + 1):
baidu_url = "https://www.baidu.com/s?wd=site:" + site + "&pn=" + str(
(int(i) - 1) * 10) + "&oq=site:" + site + "&ie=utf-8"
conn = requests.session()
resp = conn.get(baidu_url, headers=headers)
soup = BeautifulSoup(resp.text, 'lxml')
tagh3 = soup.findAll('h3')
for h3 in tagh3:
href = h3.find('a').get('href')
resp_site = requests.get(href,headers=headers)
domain = str(urlparse(resp_site.url).scheme + "://" + urlparse(resp_site.url).netloc)
if domain in Subdomain:
pass
else:
Subdomain.append(domain)
def read_file():
with open(r'c:\users\xxxx\desktop\xxx.txt', mode='r') as f:
for line in f.readlines():
print(line.strip())
def write_file():
with open(r'c:\users\xxx\desktop\xxx.txt', mode='w') as f:
for domain in Subdomain:
f.write(domain)
f.write('\n')
if __name__ == '__main__':
if len(sys.argv) == 3:
domain = sys.argv[1]
num = sys.argv[2]
else:
print("Usage: %s baidu.com 10" % sys.argv[0])
sys.exit(-1)
Subdomain = []
bingt = Thread(target=bing_search, args=(domain, num,))
bait = Thread(target=baidu_search, args=(domain, num,))
bingt.start()
bait.start()
bingt.join()
bait.join()
write_file()