python在ip代理网站爬可用ip

版权声明:归所有菜鸟所有 https://blog.csdn.net/weixin_41752475/article/details/89439719

https://www.xicidaili.com/nn/1

import re
import requests

# 取ip,防止自己ip被反爬
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36"}
#爬1-100
for i in range(1,100):

    url = "https://www.xicidaili.com/nn/{}".format(i)

    response = requests.get(url, headers=headers)
    # print(response.text)
    html = response.text
    # re.S忽略换行符的干扰
    ips = re.findall("<td>(\d+\.\d+\.\d+..\d+\d+)</td>", html, re.S)
    ports = re.findall("<td>(\d+)</td>", html, re.S)
    print(ips)
    print(ports)

    for ip in zip(ips, ports):
        # 验证ip是否可用

        proxies = {
            "http": "http://" + ip[0] + ":" + ip[1],
            "https": "https://" + ip[0] + ":" + ip[1],
        }
        try:
            res = requests.get("http://www.baid.com", proxies=proxies, timeout=3)
            print(ip, "能使用")
            with open("ipok.text", mode="a+")as f:
                f.write(":".join(ip))
        except Exception as e:
            with open("ipnot.text", mode="a+")as fn:
                fn.write(":".join(ip))
            print(ip, "不能使用")

     # print(ip)
    # 验证ip能不能用

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_41752475/article/details/89439719