python库的解析--requests常用指令(requests库)

import requests
import re
import urllib.request as ur
import http.cookiejar
import logging
from requests.auth import HTTPBasicAuth
from requests_oauthlib import OAuth1


def basic_message():
    url = 'http://www.baidu.com'
    res = requests.get(url=url)
    print("res is ", res)
    print("type of res is ", type(res))
    print("the url of res is ", res.url)
    print("the headers of res is ", res.headers)
    print("the status code of res is ", res.status_code)
    print("the text of res is ", res.text)

    # res.content返回的是bytes字节流
    print("the content of res is ", res.content)
    print("the cookies of res is ", res.cookies)
    print("the encoding of res is ", res.encoding)
    '''
        request完成其他请求
        res = requests.get(url=url)
        res = requests.post(url=url)
        res = requests.head(url=url)
        res = requests.put(url=url)
        res = requests.delete(url=url)
        res = requests.options(url=url)
        <=> res = urllib.request.urlopen(urllib.request.Request(url=url), method='')
    '''


def basic_get_params():
    # 建立基础get请求
    url = 'http://httpbin.org/get'
    data = {
    
    
        'name': 'Mike',
        'age': 22
    }
    r = requests.get(url, params=data)
    print(r.text)
    # 返回的数据是json对象,可以直接调用json方法把它变成一个字典
    print(r.json())


# 案例 爬取知乎探索上的连接标题
# 添加headers参数
def exa_zhihu_link():
    headers = {
    
    
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/86.0.4209.2 Safari/537.36'
    }
    r = requests.get(url='https://zhihu.com/explore', headers=headers)
    # print(r.text)
    pattern = re.compile('<a class="ExploreSpecialCard-contentTitle".*?>(.*?)</a>', re.S)
    titles = re.findall(pattern, r.text)
    print(titles)


# 获取图片、视频等文件
def get_img_file(url):
    r = requests.get(url)
    print(r.text)
    print(r.content)
    with open('logo.icon', 'wb') as f:
        f.write(r.content)


def basic_post_params():
    # 建立基础post请求
    url = 'http://httpbin.org/post'
    data = {
    
    
        'name': 'Mike',
        'age': 22
    }
    r = requests.post(url, data=data)
    print(r.text)
    print(r.json())
    exit() if not r.status_code == requests.status_codes.codes.ok else print("Request Successfully!")


# 高级用法 文件上传
def high_file_up():
    url = 'http://httpbin.org/post'
    files = {
    
    
        'file': open('logo.icon', 'rb')
    }
    r = requests.post(url=url, files=files)
    print(r.text)


# 获取cookie
def high_get_cookie():
    # requests获取cookie
    url = 'https://www.baidu.com'
    r = requests.get(url=url)
    print(r.cookies)
    for key, value in r.cookies.items():
        print(key + '=' + value)

    # urllib.request获取cookie
    url = 'https://www.baidu.com'
    cookie = http.cookiejar.CookieJar()
    cookie_handler = ur.HTTPCookieProcessor(cookie)
    opener = ur.build_opener(cookie_handler)
    r = opener.open(url)
    print(cookie)
    for item in cookie:
        print(item.name + '=' + item.value)


# cookie封装
def high_decorate_cookie():
    # 可以将cookie参数封装在headers参数中
    headers = {
    
    
        'Cookie': '',
        'Host': "www.zhihu.com",
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/86.0.4209.2 Safari/537.36'
    }
    r = requests.get(url='https://zhihu.com', headers=headers)
    print(r.text)

    # 也可以将cookie参数直接传递给request的cookie参数中
    # 但这样需要构建一个RequestsCookieJar对象
    cookies = ''
    jar = requests.cookies.RequestsCookieJar()
    headers = {
    
    
        'Host': "www.zhihu.com",
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/86.0.4209.2 Safari/537.36'
    }
    for cookie in cookies.split(';'):
        key, value = cookie.split('=', 1)
        jar.set(key, value)
    r = requests.get(url='https://zhihu.com', headers=headers, cookies=jar)
    print(r.text)


# session会话
def high_set_session():
    # 不设置session会话
    requests.get('http://httpbin.org/cookies/set/number/123456789')
    r = requests.get('http://httpbin.org/cookies')
    print(r.text)

    # 设置session会话
    s =requests.Session()
    s.get('http://httpbin.org/cookies/set/number/123456789')
    r = s.get('http://httpbin.org/cookies')
    print(r.text)


# SSL证书验证
def high_ssl_sign():
    # 捕捉警告到日志的方式忽略警告
    logging.captureWarnings(True)
    res = requests.get('https://www.12306.cn/', verify=False)
    '''
        也可以使用忽略警告的方式屏蔽
        from requests.packages import urllib3
        urllib3.disable_warnings()
    '''
    print(res.status_code)

    # 也可以指定本地证书用做客户端证书
    res = requests.get('https://www.12306.cn/', cert=('path/server.crt', 'path/key'))
    '''
        cert中传入的key必须是解密状态的
    '''
    print(res.status_code)


# 代理设置
def high_proxy_set():
    proxies = {
    
    
        'http': '',
        'https': ''
    }
    res = requests.get('https://www.taobao.com', proxies=proxies)
    '''
        若代理需要使用HTTP Basic Auth,则可使用以下语法来设置代理
            http://user:password@host:port
        若使用SOCKS协议代理
            socks://user:password@host:port   
    '''


# 超时设置
def high_set_timeout():
    r = requests.get('https://www.taobao.com', timeout=1)
    '''
        timeout参数包括两个阶段 连接和读取
        如果想要永久等待 可以将timeout、设置为None, 或者不设置直接留空
        如果timeout只设置了一个参数 例如timeout=1
            那么连接和读取的时间综合为1s
        如果timeout传入一个元组,那么就是分别设置连接和读取时间
    '''


# 身份验证
def high_person_identify():
    r = requests.get('http://localhost:5000', auth=HTTPBasicAuth('username', 'password'))
    print(r.status_code)
    '''
        如果用户名和密码正确就会自动验证成功,返回200的状态码
        如果验证失败,则会返回401状态码
        
        也可以直接传入一个元组,requests默认是HTTPBasicAuth对象
    '''


# Oauth身份验证
def high_oauth_person_identify():
    auth = OAuth1('your app key', 'your app secret', 'user Oauth token', 'user Oauth token secret')
    r = requests.get('http://localhost:5000', auth=auth)
    print(r.status_code)

猜你喜欢

转载自blog.csdn.net/hide_in_darkness/article/details/108257758