"""
目标:python 爬取ip代理池
所需模块:request bs4
模块安装:
bs4: pip install bs4
开发环境:Pycharm python3.8
爬虫代码必备步骤:
1.确定需求
2.发送请求
3.获取数据
4.解析数据
5.打印数据
"""
import requests
from bs4 import BeautifulSoup
import time
proxies_list = []
def download(response):
global proxies_list
data_list = BeautifulSoup(response.text, "lxml")
tr_list = data_list.select("#freelist > table > tbody")
tr_list = BeautifulSoup(str(tr_list), "lxml")
for tr in tr_list.find_all("tr"):
proxies_dict = {
}
td = BeautifulSoup(str(tr), "lxml")
try:
http_type = td.find(attrs={
"data-title": "类型"}).text.split(",")[1].strip()
except:
http_type = td.find(attrs={
"data-title": "类型"}).text
ip = td.find(attrs={
"data-title": "IP"}).text
port = td.find(attrs={
"data-title": "PORT"}).text
# print(http_type, ip, port)
proxies_dict[http_type] = ip + ":" + port
proxies_list.append(proxies_dict)
if __name__ == "__main__":
for i in range(3):
url = "https://www.kuaidaili.com/ops/proxylist/" + str(i+1)
response = requests.get(url=url)
download(response)
num=0
for proxies in proxies_list:
num+=1
print(f'NO.{
num}.{
proxies}')
print(f'爬取ip代理个数{
num}')
【python爬虫】 爬取ip代理池
猜你喜欢
转载自blog.csdn.net/weixin_52049271/article/details/127175599
今日推荐
周排行