简单粗暴的提取西刺IP和端口(附源码,爬虫小白,求勿喷)

import urllib
import re
import time
from urllib import request
from urllib import parse
import chardet
proxy = {"http": "123.207.30.131:80"}
proxy_support = request.ProxyHandler(proxy)
opener = request.build_opener(proxy_support)
request.install_opener(opener)
url = "http://www.xicidaili.com/nn"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
patternIP = re.compile(r'(?<=<td>)[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}\.[\d]{1,3}')
patternPORT = re.compile(r'(?<=<td>)[\d]{2,5}(?=</td>)')
req = request.Request(url, headers=headers)
response = request.urlopen(req)
html = response.read()
findIP = re.findall(patternIP,str(html))
findPORT = re.findall(patternPORT,str(html))
charset = chardet.detect(html)['encoding']
IP_data =[]
for i in range(len(findIP)):
     findIP[i] = findIP[i] + ":" + findPORT[i]
     IP_data.extend(findIP)
print(charset)
print(IP_data)

猜你喜欢

转载自blog.csdn.net/lzz781699880/article/details/81072022