【信息安全】工具篇--子域名收集

工具篇–子域名收集

# 导入模块
import sys
from threading import Thread
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup


# bing搜索子域名
def bing_search(site, page):
    headers = {
    
    
        'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
                      '85.0.4183.102 Safari/537.36',
        'Accept-Encoding': 'gzip,deflate',
        'Accept-Language': 'en-US,en;q=0,5',
        'Referer': 'https://cn.bing.com/search?q=site%3Abaidu.com&qs=n&form=QBLH&sp=-1&pq=site%3Abaidu.com'
                   '&sc=0-14&sk=&cvid=852BA524E035477EBE906058D68F4D70',
        'cookie': 'SRCHD=AF=WNSGPH; SRCHUID=V=2&GUID=D1F8852A6B034B4CB229A2323F653242&dmnchg=1; _EDGE_V=1; '
                  'MUID=304D7AA1FB94692B1EB575D7FABA68BD; MUIDB=304D7AA1FB94692B1EB575D7FABA68BD; '
                  '_SS=SID=1C2F6FA53C956FED2CBD60D33DBB6EEE&bIm=75:; ipv6=hit=1604307539716&t=4; '
                  '_EDGE_S=F=1&SID=1C2F6FA53C956FED2CBD60D33DBB6EEE&mkt=zh-cn; SRCHUSR=DOB=20200826&T=1604303946000;'
                  ' SRCHHPGUSR=HV=1604303950&WTS=63739900737&CW=1250&CH=155&DPR=1.5&UTC=480&DM=0&BZA=0&BRW=N&BRH=S'
    }
    for i in range(1, int(page) + 1):
        url = "https://cn.bing.com/search?q=site:" + site + "&go=Search&qs=ds&first=" + str((int(i) - 1) * 10 + 1)
        html = requests.get(url, headers=headers)
        soup = BeautifulSoup(html.content, 'html.parser')

        job_bt = soup.findAll('h2')
        for j in job_bt:
            link = j.a.get('href')
            domain = str(urlparse(link).scheme + "://" + urlparse(link).netloc)
            if domain in Subdomain:
                pass
            else:
                Subdomain.append(domain)

# 百度搜索
def baidu_search(site, page):
    headers = {
    
    
        'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
                      '85.0.4183.102 Safari/537.36',
        'Referer': 'https://www.baidu.com/s?wd=nsfocus'
    }

    for i in range(1, int(page) + 1):
        # 拼接搜索链接
        baidu_url = "https://www.baidu.com/s?wd=site:" + site + "&pn=" + str(
            (int(i) - 1) * 10) + "&oq=site:" + site + "&ie=utf-8"
        conn = requests.session()
        resp = conn.get(baidu_url, headers=headers)
        soup = BeautifulSoup(resp.text, 'lxml')
        tagh3 = soup.findAll('h3')
        for h3 in tagh3:
                href = h3.find('a').get('href')
                resp_site = requests.get(href,headers=headers)
                # 获取url链接地址
                domain = str(urlparse(resp_site.url).scheme + "://" + urlparse(resp_site.url).netloc)
                # 将子域名追加到列表中
                if domain in Subdomain:
                    pass
                else:
                    Subdomain.append(domain)



# 从保存的文件中读取内容
def read_file():
    with open(r'c:\users\xxxx\desktop\xxx.txt', mode='r') as f:
        for line in f.readlines():
            print(line.strip())


#    将结果写入文件
def write_file():
    with open(r'c:\users\xxx\desktop\xxx.txt', mode='w') as f:
        for domain in Subdomain:
            f.write(domain)
            f.write('\n')


if __name__ == '__main__':
	# 需要用户传入需要查询的站点域名及希望查询的页数
    if len(sys.argv) == 3:
        domain = sys.argv[1]
        num = sys.argv[2]
    else:
        print("Usage: %s baidu.com 10" % sys.argv[0])
        sys.exit(-1)
    Subdomain = []
    # 多行程执行子域名查找
    bingt = Thread(target=bing_search, args=(domain, num,))
    bait = Thread(target=baidu_search, args=(domain, num,))
    bingt.start()
    bait.start()
    bingt.join()
    bait.join()
    # 写入文件
    write_file()

猜你喜欢

转载自blog.csdn.net/Nicky_Zheng/article/details/109469879