利用python抓取淘宝提供的ip库信息并保存.
淘宝ip库网站:http://ip.taobao.com
淘宝提供的API地址为:http://ip.taobao.com/service/getIpInfo.php?ip=
这个接口提供的QPS=10
以下为用python简单实现(剔除了私有ip地址段,在判断的时候后也只取了IP段的前三节,第四节统一设置为0,因为只要根据前三段就可以判断ip的归属地信息了)
# -*- decoidng:utf-8 -*- from urllib import request import time def writefile(L=[]): with open('ip.txt','a') as f: for s in L: f.write(s) f.write('\n') f.close() l=[] a = 1 while a < 256: if a == 10: a=a+1 continue b=0 while b < 256: if a == 172 and b>15 and b<32: b=b+1 continue if a == 192 and b==168: b = b + 1 continue c=0 while c < 256: ip = str(a) + "."+ str(b) + "." + str(c) + "." + "0" url='http://ip.taobao.com/service/getIpInfo.php?ip='+ip with request.urlopen(url)as f: data = f.read() l.append(str(data.decode('utf-8'))) if len(l) > 100: writefile(l) l=[] time.sleep(0.2) c=c+1; b=b+1 a=a+1 if len(l)>0: writefile(l)