[Python] crawler IP encryption agent used provided a session request header

1: ip proxy requests stored in Redis:

# Request ip proxy connection, update the proxy ip redis 
DEF proxy_redis (): 
    SR = redis.Redis (connection_pool = Pool) 
    proxys_text = requests.get ( " Your request proxy IP address " ) .text
     # update Redis 
    RET = SR. the SET ( ' proxy_list ' , proxys_text) 


# detect ip proxy usefulness is useful to return True, useless returns False 
DEF check_proxy (proxy_list):
     Print ( " ip cell detector is " , proxy_list)
     the try :
         for proxy in proxy_list: 
            RET = Requests. get ("https://www.baidu.com/",proxies={"https": "http://账号:密码@{}".format(proxy)}).text
        print("IP可用")
        return True
    except Exception as e:
        return False

# 返回请求代理池
def get_proxy_list():
    while True:
        sr = redis.Redis(connection_pool=Pool)
        ip_list = sr.get("proxy_list")                    #redies get data 
        IF ip_list == None: 
            proxy_redis () 
        proxy_list = ip_list.split ( " \ R & lt \ n- " )              # Data structures formatted list 
        RET = check_proxy (proxy_list)                      # detect whether the agent has the effect of 
        IF RET == True:
             Print ( " request Broker pool: " , proxy_list)
             return proxy_list
             BREAK 
        the else : 
            proxy_redis () 
            Print ( " Redis pool updated successfully ." )

2: Set N sets the session request header:

    # . 1: 
        session requests.Session = ()   # Set a group session request 
        session.proxies = {
             " HTTP " : " HTTP: // login: password} {@ " .format (Proxy),
             " HTTPS " : " HTTP: @ account: password} {@ " .format (Proxy), 
        } 
        headers = head () 
        session.headers.update (headers)   # update request header custom request header 
        session.post ( " HTTPS: //www.amazon. COM / GP / Delivery / Ajax / address-change.html " , Data)       # 设置cookeis
        ret = session.get("https://www.amazon.com/dp/B0047CJZLM", headers=headers).text
        IUSC.append(session)
    
        # 1

        # 2
        session = requests.Session()  # 设置一组session请求
        session.proxies = {
            "http": "http://sellerbdata:lv6nv17v@{}".format(proxy),
            "https": "http://sellerbdata:lv6nv17v@{}".format(proxy),
        }
        headers = head()
        session.headers.update(headers)
        session.post("https://www.amazon.com/gp/delivery/ajax/address-change.html", data,)  # 设置邮编
        IUSC.append(session)
        # 2

 

Guess you like

Origin www.cnblogs.com/wanghong1994/p/11994064.html