python get请求 post请求 +伪装

#get请求:

#第一个方法
import urllib
import urllib2

def getUrllibFun(url):
    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'}
    req = urllib2.Request(url, headers = headers)
    res_data = urllib2.urlopen(req,timeout=20)
    res = res_data.read()
    res_data.close()
    return res


if __name__ == "__main__":
    url = "https://blog.csdn.net/weixin_42749765/article/details/81625924"
    restdata = getUrllibFun(url)
    print restdata


#第二个方法
import httplib

def getUrllibFun(urlf,port):
    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'}
    conn = httplib.HTTPConnection(port)
    conn.request(method="GET",url=urlf,headers = headers)
    res = conn.getresponse()
    resdata= res.read()
    res.close()
    conn.close()
    return resdata

if __name__ == "__main__":
    url = "https://blog.csdn.net/weixin_42749765/article/details/81625924"
    restdata = getUrllibFun(url,"https://blog.csdn.net")
    print restdata

#post请求:
#第一个方法
import urllib
import urllib2


def postHttpFun(requrl,datapamse):
    data_urlencode = urllib.urlencode(datapamse)
    headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:22.0) Gecko/20100101 Firefox/22.0'}
    req = urllib2.Request(url = requrl,data =data_urlencode,headers = headers)
    res_data = urllib2.urlopen(req,timeout=20)
    res = res_data.read()
    res_data.close()
    return res


if __name__=="__main__":
    requrl = "https://blog.csdn.net/weixin_42749765/article/details/81625924"
    test_data = {'qwe': 'qqq', 'qqqqw': 'qwew'}
    resdata = postHttpFun(requrl,test_data)
    print resdata


#伪装ip
import random
import urllib2

def download_html(url):
    ip = ['121.31.159.197', '175.30.238.78', '124.202.247.110']
    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36',
        'X-Forwarded-For': ip[random.randint(0, 2)]}
    request = urllib2.Request(url, None, header)
    response = urllib2.urlopen(request)
    return response.read()


if __name__ == "__main__":
    url = "https://blog.csdn.net/weixin_42749765/article/details/81625924"
    html = download_html(url)



##代理服务器请求

#encoding=utf8
import urllib2
import BeautifulSoup
import random

##传入访问路径和代理服务器地址
def dlgetFun(url,dlPort):
    header = {
        "Connection": "keep-alive",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36",
        "Accept": " text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip,deflate",
        "Accept-Language": "zh-CN,zh;q=0.8"
    }
    proxy={"http":dlPort}
    proxy_support = urllib2.ProxyHandler(proxy)  # 注册代理

    opener = urllib2.build_opener(proxy_support)
    urllib2.install_opener(opener)

    req = urllib2.Request(url, headers=header)
    response = urllib2.urlopen(req, None,5)
    soup = BeautifulSoup.BeautifulSoup(response)
    return soup


if __name__=="__main__":
    ##爬去网页地址
    urlHttp = "https://ssxkkd.com"
    ##代理服务器ip端口
    dlport = "111.114.613.191:9000"
    resdata = dlgetFun(urlHttp,dlport)
    print resdata



猜你喜欢

转载自blog.csdn.net/weixin_42749765/article/details/81625924