爬取fofa查询的目标ip地址

需求:想通过脚本实现爬取fofa查询后的目标ip地址,然后保存在本地。昨天花了几个小时写的,还有些不足之处,以后再慢慢完善增加功能,比如用多线程实现之类的,不过现在已经够用了。

fofa_ip.py运行结果如下

脚本内容如下

import requests
from requests.packages import urllib3
urllib3.disable_warnings()
import base64
import argparse
from lxml import etree 
import os
def para():
		parser = argparse.ArgumentParser(description='fofa查询脚本')
		parser.add_argument('--word',type=str,required=True,help='fofa查询的语句')
		parser.add_argument('--cookie',type=str,required=True,help='登录fofa账号后的cookie')
		parser.add_argument('--pro',type=str,required=False,help='代理服务器ip和端口(可选参数),如:xx.xx.xx.xx:7890')

		args = parser.parse_args() 
		global word
		global cookie
		global pro
		word = args.word
		cookie = args.cookie
		pro = args.pro
		if not pro:
			pro = ''
		print('[+]author:xcc')
		print("[-]fofa查询,批量获取目标ip",)
		#return word,cookie
#获取页面数量
def pag_num():
	#查询词进行baase64编码
	global s
	s = (base64.b64encode(word.encode('utf-8'))).decode('utf-8') 
	#封装头
	global headers
	headers = {
			'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; PCRT00 Build/N2G48H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/66.0.3359.158 Safari/537.36 fanwe_app_sdk sdk_type/android sdk_version_name/4.0.1 sdk_version/2020042901 screen_width/720 screen_height/1280',
			'cookie':cookie
		}
	param = {
			'page':1,
			'qbase64':s
		}
	url = f"https://fofa.so/result?page=4&qbase64={s}"
	try:
		text = requests.get(url=url,headers=headers,verify=False,timeout=15,proxies={'https':'%s' %(pro)}).text

		#获取页面数量
		tree = etree.HTML(text)
		pag_num = tree.xpath('//*[@id="will_page"]/a/text()')[-1]

		print('fofa爬取页面数量为:'+pag_num)
		global num
		num = input('请输入您想要爬取的页面数量:')
		if num.isdigit():
			num = int(num)
		else:
			print('请输入一个整数')
			os._exit(0)
	except Exception as error:
		print('发生错误:',error)
		os._exit(0)

def fofa():
	file_name = word+'.txt'
	try:
		if os.path.exists(file_name):
			os.remove(file_name)
	except Exception as error:
		print('发生错误:',error)
		os._exit(0)

	ip_list = []
	for i in range(1,num+1):
		param = {
			'page':num,
			'qbase64':s
		}
		#获取页面源码
		url = f"https://fofa.so/result?page={i}&qbase64={s}"
		try:
			text = requests.get(url=url,headers=headers,verify=False,timeout=15,proxies={'https':'%s' %(pro)}).text
			tree = etree.HTML(text)
			#提取页面ip地址
			r = tree.xpath('//*[@id="ajax_content"]/div[@class="right-list-view-item clearfix"]//div[@class="re-domain"]/a/@href')
			for n in r:
				n = str(n)
				if "//" in n:
					#将ip地址保存
					ip_list.append(n)
				else:
					pass
			print(f'[-]第{i}页爬取完毕!')
		except Exception as error:
			print('[-]发生错误:',error)
			os._exit(0)

	ip_list_new = list(set(ip_list))
	for i in ip_list_new:
		with open(file_name,'a',encoding='utf-8') as f:
			f.write(i+'\r')
	print('[-]全部页面爬取完毕')
	print(f'[+]共爬取{len(ip_list_new)}个ip地址')
	print(f'[+]所有目标ip保存完毕,保存地址为当前目录下:{file_name}')

if __name__ == '__main__':
	para()
	pag_num()
	fofa()

猜你喜欢

转载自blog.csdn.net/qq_44159028/article/details/114685623