会话对象

s = requests.Session()
s.get(‘http://httpbin.org/cookies/set/sessioncookie/123456789’)
r = s.get(“http://httpbin.org/cookies”)
print(r.text)

s = requests.Session()
s.auth = (‘user’, ‘pass’)
s.headers.update({‘x-test’: ‘true’})
#both ‘x-test’ and ‘x-test2’ are sent
s.get(‘http://httpbin.org/headers’, headers={‘x-test2’: ‘true’})

s = requests.Session()
r = s.get(‘http://httpbin.org/cookies’, cookies={‘from-my’: ‘browser’})
print(r.text)
#’{“cookies”: {“from-my”: “browser”}}’
r = s.get(‘http://httpbin.org/cookies’)
print(r.text)
#’{“cookies”: {}}’

with requests.Session() as s:
s.get(‘http://httpbin.org/cookies/set/sessioncookie/123456789’)

请求与响应对象

r = requests.get(‘http://en.wikipedia.org/wiki/Monty_Python’)
r.headers
{‘content-length’: ‘56170’, ‘x-content-type-options’: ‘nosniff’, ‘x-cache’:
‘HIT from cp1006.eqiad.wmnet, MISS from cp1010.eqiad.wmnet’, ‘content-encoding’:
‘gzip’, ‘age’: ‘3080’, ‘content-language’: ‘en’, ‘vary’: ‘Accept-Encoding,Cookie’,
‘server’: ‘Apache’, ‘last-modified’: ‘Wed, 13 Jun 2012 01:33:50 GMT’,
‘connection’: ‘close’, ‘cache-control’: ‘private, s-maxage=0, max-age=0,
must-revalidate’, ‘date’: ‘Thu, 14 Jun 2012 12:59:39 GMT’, ‘content-type’:
‘text/html; charset=UTF-8’, ‘x-cache-lookup’: ‘HIT from cp1006.eqiad.wmnet:3128,
MISS from cp1010.eqiad.wmnet:80’}

准备的请求（Prepared Request）

from requests import Request, Session

s = Session()
req = Request('GET', url,
    data=data,
    headers=header
)
prepped = req.prepare()

# do something with prepped.body
# do something with prepped.headers

resp = s.send(prepped,
    stream=stream,
    verify=verify,
    proxies=proxies,
    cert=cert,
    timeout=timeout
)

print(resp.status_code)

from requests import Request, Session

s = Session()
req = Request('GET',  url,
    data=data
    headers=headers
)

prepped = s.prepare_request(req)

# do something with prepped.body
# do something with prepped.headers

resp = s.send(prepped,
    stream=stream,
    verify=verify,
    proxies=proxies,
    cert=cert,
    timeout=timeout
)

print(resp.status_code)

SSL 证书验证

requests.get(‘https://requestb.in’)
requests.exceptions.SSLError: hostname ‘requestb.in’ doesn’t match either of ‘*.herokuapp.com’, ‘herokuapp.com’

requests.get(‘https://github.com’, verify=True)
<Response [200]>

requests.get(‘https://github.com’, verify=’/path/to/certfile’)

s = requests.Session()
s.verify = ‘/path/to/certfile’

requests.get(‘https://kennethreitz.org’, verify=False)
<Response [200]>

客户端证书

requests.get(‘https://kennethreitz.org’, cert=(’/path/client.cert’, ‘/path/client.key’))
<Response [200]>

s = requests.Session()
s.cert = ‘/path/client.cert’
requests.get(‘https://kennethreitz.org’,
cert=’/wrong_path/client.pem’)
SSLError: [Errno 336265225] _ssl.c:347: error:140B0009:SSL routines:SSL_CTX_use_PrivateKey_file:PEM lib

CA 证书

Requests 默认附带了一套它信任的根证书，来自于 Mozilla trust store。然而它们在每次 Requests 更新时才会更新。这意味着如果你固定使用某一版本的 Requests，你的证书有可能已经太旧了。

从 Requests 2.4.0 版之后，如果系统中装了 certifi 包，Requests 会试图使用它里边的证书。这样用户就可以在不修改代码的情况下更新他们的可信任证书。

为了安全起见，我们建议你经常更新 certifi！

响应体内容工作流

tarball_url = ‘https://github.com/kennethreitz/requests/tarball/master’
r = requests.get(tarball_url, stream=True)
if int(r.headers[‘content-length’]) < TOO_LONG:
content = r.content
…

流式上传

with open(‘massive-body’) as f:
requests.post(‘http://some.url/streamed’, data=f)

块编码请求

def gen():
    yield 'hi'
    yield 'there'

requests.post('http://some.url/chunked', data=gen())

POST 多个分块编码的文件

url = ‘http://httpbin.org/post’
multiple_files = [
(‘images’, (‘foo.png’, open(‘foo.png’, ‘rb’), ‘image/png’)),
(‘images’, (‘bar.png’, open(‘bar.png’, ‘rb’), ‘image/png’))]

r = requests.post(url, files=multiple_files)
r.text
{
…
‘files’: {‘images’: ‘data:image/png;base64,iVBORw …’}
‘Content-Type’: ‘multipart/form-data; boundary=3131623adb2043caaeb5538cc7aa0b3a’,
…
}

事件挂钩

hooks=dict(response=print_url)
def print_url(r, *args, **kwargs):
print(r.url)
requests.get(‘http://httpbin.org’, hooks=dict(response=print_url))
http://httpbin.org
<Response [200]>

自定义身份验证

from requests.auth import AuthBase

class PizzaAuth(AuthBase):
    """Attaches HTTP Pizza Authentication to the given Request object."""
    def __init__(self, username):
        # setup any auth-related data here
        self.username = username

    def __call__(self, r):
        # modify and return the request
        r.headers['X-Pizza'] = self.username
        return r
>> requests.get('http://pizzabin.org/admin', auth=PizzaAuth('kenneth'))
<Response [200]>

流式请求

import json
import requests

r = requests.get('http://httpbin.org/stream/20', stream=True)

for line in r.iter_lines():

    # filter out keep-alive new lines
    if line:
        decoded_line = line.decode('utf-8')
        print(json.loads(decoded_line))


r = requests.get('http://httpbin.org/stream/20', stream=True)

if r.encoding is None:
    r.encoding = 'utf-8'

for line in r.iter_lines(decode_unicode=True):
    if line:
        print(json.loads(line))

代理

import requests

proxies = {
  "http": "http://10.10.1.10:3128",
  "https": "http://10.10.1.10:1080",
}

requests.get("http://example.org", proxies=proxies)

SOCKS

$ pip install requests[socks]
proxies = {
‘http’: ‘socks5://user:pass@host:port’,
‘https’: ‘socks5://user:pass@host:port’
}

合规性

Requests 符合所有相关的规范和 RFC，这样不会为用户造成不必要的困难。但这种对规范的考虑导致一些行为对于不熟悉相关规范的人来说看似有点奇怪。

编码方式

当你收到一个响应时，Requests 会猜测响应的编码方式，用于在你调用 Response.text 方法时对响应进行解码。Requests 首先在 HTTP 头部检测是否存在指定的编码方式，如果不存在，则会使用 charade 来尝试猜测编码方式。

只有当 HTTP 头部不存在明确指定的字符集，并且 Content-Type 头部字段包含 text 值之时， Requests 才不去猜测编码方式。在这种情况下， RFC 2616 指定默认字符集必须是 ISO-8859-1 。Requests 遵从这一规范。如果你需要一种不同的编码方式，你可以手动设置 Response.encoding 属性，或使用原始的 Response.content。

HTTP动词

import requests
r = requests.get(‘https://api.github.com/repos/requests/requests/git/commits/a050faf084662f3a352dd1a941f2c7c9f886d4ad’)
if (r.status_code == requests.codes.ok):
… print r.headers[‘content-type’]
…
application/json; charset=utf-8

commit_data = r.json()

print commit_data.keys()
[u’committer’, u’author’, u’url’, u’tree’, u’sha’, u’parents’, u’message’]

print commit_data[u’committer’]
{u’date’: u’2012-05-10T11:10:50-07:00’, u’email’: u’[email protected]’, u’name’: u’Kenneth Reitz’}

print commit_data[u’message’]
makin’ history

verbs = requests.options(r.url)
verbs.status_code
500

verbs = requests.options(‘http://a-good-website.com/api/cats’)
print verbs.headers[‘allow’]
GET,HEAD,POST,OPTIONS

r=requests.get(‘https://api.github.com/requests/kennethreitz/requests/issues/482’)
r.status_code
200

issue = json.loads(r.text)
print(issue[u’title’])
Feature any http verb in docs

print(issue[u’comments’])
3

r = requests.get(r.url + u’/comments’)
r.status_code
200

comments = r.json()
print comments[0].keys()
[u’body’, u’url’, u’created_at’, u’updated_at’, u’user’, u’id’]

print comments[2][u’body’]
Probably in the “advanced” section

print comments[2][u’user’][u’login’]
kennethreitz

body = json.dumps({u"body": u"Sounds great! I’ll get right on it!"})
url = u"https://api.github.com/repos/requests/requests/issues/482/comments"
r = requests.post(url=url, data=body)
r.status_code
404

from requests.auth import HTTPBasicAuth
auth = HTTPBasicAuth(‘[email protected]’, ‘not_a_real_password’)
r = requests.post(url=url, data=body, auth=auth)
r.status_code
201

content = r.json()
print(content[u’body’])
Sounds great! I’ll get right on it.

print(content[u"id"])
5804413

body = json.dumps({u"body": u"Sounds great! I’ll get right on it once I feed my cat."})
url = u"https://api.github.com/repos/requests/requests/issues/comments/5804413"
r = requests.patch(url=url, data=body, auth=auth)
r.status_code
200

r = requests.head(url=url, auth=auth)
print r.headers
…
‘x-ratelimit-remaining’: ‘4995’
‘x-ratelimit-limit’: ‘5000’
…

定制动词

r = requests.request(‘MKCOL’, url, data=data)
r.status_code
200 # Assuming your call was correct

响应头链接字段

url = ‘https://api.github.com/users/kennethreitz/repos?page=1&per_page=10’
r = requests.head(url=url)
r.headers[‘link’]
‘https://api.github.com/users/kennethreitz/repos?page=2&per_page=10; rel=“next”, https://api.github.com/users/kennethreitz/repos?page=6&per_page=10; rel=“last”’

r.links[“next”]
{‘url’: ‘https://api.github.com/users/kennethreitz/repos?page=2&per_page=10’, ‘rel’: ‘next’}

r.links[“last”]
{‘url’: ‘https://api.github.com/users/kennethreitz/repos?page=7&per_page=10’, ‘rel’: ‘last’}

传输适配器

s = requests.Session()
s.mount(‘http://www.github.com’, MyAdapter())

示例: 指定的 SSL 版本

import ssl

from requests.adapters import HTTPAdapter
from requests.packages.urllib3.poolmanager import PoolManager


class Ssl3HttpAdapter(HTTPAdapter):
    """"Transport adapter" that allows us to use SSLv3."""

    def init_poolmanager(self, connections, maxsize, block=False):
        self.poolmanager = PoolManager(num_pools=connections,
                                       maxsize=maxsize,
                                       block=block,
                                       ssl_version=ssl.PROTOCOL_SSLv3)

阻塞和非阻塞

使用默认的传输适配器，Requests 不提供任何形式的非阻塞 IO。 Response.content 属性会阻塞，直到整个响应下载完成。如果你需要更多精细控制，该库的数据流功能（见流式请求）允许你每次接受少量的一部分响应，不过这些调用依然是阻塞式的。

如果你对于阻塞式 IO 有所顾虑，还有很多项目可以供你使用，它们结合了 Requests 和 Python 的某个异步框架。典型的优秀例子是 grequests 和 requests-futures。

超时（timeout）

r = requests.get(‘https://github.com’, timeout=5)
r = requests.get(‘https://github.com’, timeout=(3.05, 27))
r = requests.get(‘https://github.com’, timeout=None)

爬不下来就自闭

发布了65 篇原创文章 · 获赞 41 · 访问量 4万+

私信关注

requests库高级用法

会话对象

请求与响应对象

准备的请求（Prepared Request）

SSL 证书验证

客户端证书

CA 证书

响应体内容工作流

流式上传

块编码请求

POST 多个分块编码的文件

事件挂钩

自定义身份验证

流式请求

代理

SOCKS

合规性

编码方式

HTTP动词

定制动词

响应头链接字段

传输适配器

示例: 指定的 SSL 版本

阻塞和非阻塞

超时（timeout）

猜你喜欢

requests库高级用法

会话对象

请求与响应对象

准备的请求 （Prepared Request）

SSL 证书验证

客户端证书

CA 证书

响应体内容工作流

流式上传

块编码请求

POST 多个分块编码的文件

事件挂钩

自定义身份验证

流式请求

代理

SOCKS

合规性

编码方式

HTTP动词

定制动词

响应头链接字段

传输适配器

示例: 指定的 SSL 版本

阻塞和非阻塞

超时（timeout）

猜你喜欢

准备的请求（Prepared Request）