改进python ip 地址获取 验证

# -*- coding: utf-8 -*
import requests
import random
import re
import json
import demjson # Python 对象编码成 JSON 字符串
from requests.packages.urllib3.exceptions import InsecureRequestWarning #进行GET时,关闭证书验证
from requests.exceptions import ReadTimeout,ConnectionError,RequestException #异常链接模块

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)  #进行GET时,关闭证书验证



UAPOOLS=['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0',
         'Mozilla/5.0 (Windows NT 6.1; rv:49.0) Gecko/20100101 Firefox/49.0',
         'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
         'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0'
         ] #头部信息列表池

def get_one_page(url):
    try:
        head = {}
        head['User-Agent'] = random.choice(UAPOOLS) # 从序列中随机选择一个元素,写入User Agent信息
        response = requests.get(url,headers=head)  # 打开网址
        if response.status_code == 200:  # 判断状态码
            return response.text  # 状态码成功返回网页源代码
        return None  # 状态码其他结果返回None
    except RequestException:
        print('请求ip代理页面出错')
        return None

def parse_page_index(html): #获取代理
    ip_pattern = re.compile('<tr class="odd">.*?<td>(.*?)</td>.*?<td>(.*?)</td>',re.S)
    ip = re.findall(ip_pattern, html) #获取所有ip地址
    data = dict(ip)#转化成字典
    return (data)


def main():
    url = 'http://www.xicidaili.com/nt' #西刺ip代理
    html = get_one_page(url)
    page = parse_page_index(html)
    ip_dic2 = {}
    #RESULT = []
    for key in page.keys():
        jian = key
        zhi = page[key]
        ip = key + ':' + page[key]
        dic = {}
        dic['http'] = ip

        try:
            response = requests.get("http://httpbin.org/ip", proxies=dic, verify=False)
            if response.status_code == 200:
                print(dic)
                ip_dic2[key] = zhi
                print(ip_dic2)
        except RequestException:
            print("不可用")
            pass
    return(ip_dic2)



if __name__ == '__main__':
    ip_dic2 = main()
    print(ip_dic2)

猜你喜欢

转载自blog.csdn.net/qq_15907907/article/details/80311047