需求:想通过脚本实现爬取fofa查询后的目标ip地址,然后保存在本地。昨天花了几个小时写的,还有些不足之处,以后再慢慢完善增加功能,比如用多线程实现之类的,不过现在已经够用了。
fofa_ip.py运行结果如下
脚本内容如下
import requests
from requests.packages import urllib3
urllib3.disable_warnings()
import base64
import argparse
from lxml import etree
import os
def para():
parser = argparse.ArgumentParser(description='fofa查询脚本')
parser.add_argument('--word',type=str,required=True,help='fofa查询的语句')
parser.add_argument('--cookie',type=str,required=True,help='登录fofa账号后的cookie')
parser.add_argument('--pro',type=str,required=False,help='代理服务器ip和端口(可选参数),如:xx.xx.xx.xx:7890')
args = parser.parse_args()
global word
global cookie
global pro
word = args.word
cookie = args.cookie
pro = args.pro
if not pro:
pro = ''
print('[+]author:xcc')
print("[-]fofa查询,批量获取目标ip",)
#return word,cookie
#获取页面数量
def pag_num():
#查询词进行baase64编码
global s
s = (base64.b64encode(word.encode('utf-8'))).decode('utf-8')
#封装头
global headers
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; PCRT00 Build/N2G48H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.158 Safari/537.36 fanwe_app_sdk sdk_type/android sdk_version_name/4.0.1 sdk_version/2020042901 screen_width/720 screen_height/1280',
'cookie':cookie
}
param = {
'page':1,
'qbase64':s
}
url = f"https://fofa.so/result?page=4&qbase64={s}"
try:
text = requests.get(url=url,headers=headers,verify=False,timeout=15,proxies={'https':'%s' %(pro)}).text
#获取页面数量
tree = etree.HTML(text)
pag_num = tree.xpath('//*[@id="will_page"]/a/text()')[-1]
print('fofa爬取页面数量为:'+pag_num)
global num
num = input('请输入您想要爬取的页面数量:')
if num.isdigit():
num = int(num)
else:
print('请输入一个整数')
os._exit(0)
except Exception as error:
print('发生错误:',error)
os._exit(0)
def fofa():
file_name = word+'.txt'
try:
if os.path.exists(file_name):
os.remove(file_name)
except Exception as error:
print('发生错误:',error)
os._exit(0)
ip_list = []
for i in range(1,num+1):
param = {
'page':num,
'qbase64':s
}
#获取页面源码
url = f"https://fofa.so/result?page={i}&qbase64={s}"
try:
text = requests.get(url=url,headers=headers,verify=False,timeout=15,proxies={'https':'%s' %(pro)}).text
tree = etree.HTML(text)
#提取页面ip地址
r = tree.xpath('//*[@id="ajax_content"]/div[@class="right-list-view-item clearfix"]//div[@class="re-domain"]/a/@href')
for n in r:
n = str(n)
if "//" in n:
#将ip地址保存
ip_list.append(n)
else:
pass
print(f'[-]第{i}页爬取完毕!')
except Exception as error:
print('[-]发生错误:',error)
os._exit(0)
ip_list_new = list(set(ip_list))
for i in ip_list_new:
with open(file_name,'a',encoding='utf-8') as f:
f.write(i+'\r')
print('[-]全部页面爬取完毕')
print(f'[+]共爬取{len(ip_list_new)}个ip地址')
print(f'[+]所有目标ip保存完毕,保存地址为当前目录下:{file_name}')
if __name__ == '__main__':
para()
pag_num()
fofa()