请求十个ip地址

import re

import lxml.html
import requests
from fake_useragent import UserAgent

encoderchars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="

#用python重写生成session的算法
def f1(a):
    i, len_str = 0,0
    c, c2, c3 = 0,0,0
    len_str = len(a)
    i = 0
    b = ""
    while i < len_str:
        c = ord(a[i]) & 0xff
        i += 1
        if i == len_str:
            b += encoderchars[c >> 2]
            b += encoderchars[(c & 0x3) << 4]
            b += "=="
            break
        c2 = ord(a[i])
        i += 1
        if i == len_str:
            b += encoderchars[c >> 2]
            b += encoderchars[(((c & 0x3) << 4) | ((c2 & 0xf0) >> 4))]
            b += encoderchars[((c2 & 0xf) << 2)]
            b += "="
            break
        c3 = ord(a[i])
        b += encoderchars[c >> 2]
        b += encoderchars[(((c & 0x3) << 4) | ((c2 & 0xf0) >> 4))]
        b += encoderchars[(((c2 & 0xf) << 2) | ((c3 & 0xc0) >> 6))]
        b += encoderchars[c3 & 0x3f]
        i += 1

    return b

s = requests.session()
result = s.get('http://datamining.comratings.com/exam')
session_id = result.cookies.get_dict()['session']
#调用算法生成session
r_cookies = 'session={};c1={}; c2={}; path=/'.format(session_id,f1(session_id[1:4]),f1(session_id))

headers = {
    "User-Agent":UserAgent().random,
    "Cookie":r_cookies
}

ss = s.get('http://datamining.comratings.com/exam3',headers=headers).text


pattern = re.compile(r'<body>(.*?)<body>',re.DOTALL)
#用正则切出body内容
result = pattern.findall(ss)
result1 = result[0].split('<br>')   #根据<br>切出来的十一个片段

sty = re.compile(r'<style>(.*?)</style>',re.DOTALL)
sty1 = sty.findall(ss)  #style里面的内容
sty2 = re.compile(r'.(.*?){')
sty22 = sty2.findall(sty1[0])  # style里面定义的四个属性组成的集合

data = []  #空数组,存放十个IP
data.append(result1[0])

for data_res in result1[1:]:  #遍历按照换行切开的十段字符串
    line_array = data_res.split('\n')  #每一行组成的数组
    ip_data = []  # 组成IP的四个数字存放的数组
    ip_str = ''
    for line_str in line_array:   #遍历每一行
        ip_regex = re.compile(r'\d+')
        if sty22[0] not in line_str and sty22[1] not in line_str  and 'none' not in line_str:
            #利用正则切出符合条件的数字
            ip_array = ip_regex.findall(line_str)

            if ip_array != []:
                ip_data.append(ip_array[0])

    #把筛选出来的的四个数字组合成IP
    ip_str = ip_data[0] + '.' + ip_data[1] + '.' + ip_data[2] + '.' + ip_data[3]

    #把每一个IP存到数组里
    data.append(ip_str)

for i in data:
    print(i)

猜你喜欢

转载自www.cnblogs.com/wdz1226/p/10434290.html