需要用的工具
pycharm,python3.7,selenium库,request库,os文件库,re模块的正则表达式运用,自行下载chromedriver自测软件
代碼如下
具體的描述代碼可以看
from selenium import webdriver # 从selenium导入webdriver
from selenium.webdriver.chrome.options import Options
import requests
import re
from bs4 import BeautifulSoup
# 获取免费的代理并验证代理的可用性
from lxml import etree
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
#获取代理ip网站的代理ip
def get_free_proxy():
url = 'https://www.xicidaili.com/nn/'
response = requests.get(url, headers=headers)
html_ele = etree.HTML(response.content)
tr_eles = html_ele.xpath('//table[@id="ip_list"]//tr')
tr_eles.pop(0)
for tr_ele in tr_eles:
ip_str = tr_ele.xpath('./td[2]/text()')[0]
port = tr_ele.xpath('./td[3]/text()')[0]
yield ip_str + ':' + port
#验证代理ip的可用性
def validate_proxy(proxy_str):
url = 'https://www.baidu.com'
proxy = {
'http': 'http://' + proxy_str,
'https': 'http://'+proxy_str
}
try:
response = requests.get(url, proxies=proxy, timeout=2)
if response.status_code == 200:
return True
except:
return False
if __name__ == '__main__':
for item in get_free_proxy():
if validate_proxy(item):
print('可用的代理IP:')
print(item)