ipip.net IP位置信誉信息爬取

批量查询IP位置和信誉信息

import requests
from lxml import etree
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

requests.packages.urllib3.disable_warnings()#忽略警告
filename="ip_black_file.txt"
result="ipip.csv"
url="https://www.ipip.net/ip.html"

page_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0',
        'Connection': 'keep-alive',
        'Content-Type':'application/x-www-form-urlencoded',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
    }

def url_open(check_ip):
    try:
        post_data={'ip':check_ip}
        res=requests.post(url,headers=page_headers,data=post_data,verify=False)
        status=res.status_code
        content=res.text
        #print(status)
        return status,content
    except Exception as e:
        return 0,0

def check_ipip(check_ip):
    try:
        status,content=url_open(check_ip)
        if status==200:
            html=etree.HTML(content)
            tag=html.xpath('//div[@class="outer tableNormal ipSearch"]//div[@class="inner"]//table//tr//td/span/text()')
            tag=str(tag[1:]).replace("（更多数据请查看', '）","").replace(", ' '","").replace("(', ')","").replace("', '","';'").replace(" ","").replace("['","").replace("']","").replace("'","")
            #print(tag)
            return tag
        else:
            return "查询失败"
    except Exception as e:
        print(str(e))
        return "查询失败"
        
if __name__=="__main__":  
	with open(filename,'r',encoding='utf-8') as fh:
		data=fh.read().split('\n')
		with open(result,'a+',encoding='utf-8') as f:
			for ip in data:
				if len(ip)>0:
					print("正在查询IP："+ip)
					tag=check_ipip(ip)
					f.write(ip+";"+tag+'\n')
ipip.net IP位置信誉信息爬取

批量查询IP位置和信誉信息

猜你喜欢