【spider02】requests

什么是Requests?

Requests使用Python语言编写,基于urllib,采用Apache2 Licensed 开源协议的HTTP库。
它比urllib更加方便,完全满足HTTP测试需求。

import requests

response = requests.get('https://www.baidu.com/')
print(type(response))
print(response.status_code)
print(type(response.text))
print(response.text)
print(response.cookies)
print(response.headers)
import requests

requests.post('http://httpbin.org/post')
requests.put('http://httpbin.org/put')
requests.delete('http://httpbin.org/delete')
requests.head('http://httpbin.org/get')
requests.options('http://httpbin.org/get')
<Response [200]>
import requests

response = requests.get('http://httpbin.org/get')
print(response.text)
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.18.4"
  }, 
  "origin": "117.184.110.231", 
  "url": "http://httpbin.org/get"
}

带参数GET请求

import requests
response = requests.get('http://httpbin.org/get?name=HM&age=22')
print(response.text)
{
  "args": {
    "age": "22", 
    "name": "HM"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.18.4"
  }, 
  "origin": "117.184.110.231", 
  "url": "http://httpbin.org/get?name=HM&age=22"
}
import requests

data = {
    'name':'Germany',
    'age':22
}
response = requests.get('http://httpbin.org/get',params = data)
print(response.text)
{
  "args": {
    "age": "22", 
    "name": "Germany"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.18.4"
  }, 
  "origin": "117.184.110.231", 
  "url": "http://httpbin.org/get?name=Germany&age=22"
}

解析json

import requests
import json

response = requests.get('http://httpbin.org/get')
print(type(response.text))
print(response.json())
print(json.loads(response.text))
print(type(response.json))
<class 'str'>
{'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.18.4'}, 'origin': '117.184.110.231', 'url': 'http://httpbin.org/get'}
{'args': {}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.18.4'}, 'origin': '117.184.110.231', 'url': 'http://httpbin.org/get'}
<class 'method'>

获取二进制数据

import requests

response = requests.get('http://github.com/favicon.ico')
print(type(response.text),type(response.content))
print(response.text)
print(response.content)
import requests

response = requests.get('http://github.com/favicon.ico')
with open('favicon.ico','wb')as f:
    f.write(response.content)
    f.close()

添加headers

import requests

response = requests.get('http://www.zhihu.com/explore')
print(response.text)#知乎要识别浏览器信息
import requests


headers = {
    'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
}
response = requests.get('http://www.zhihu.com/explore',headers = headers)
print(response.text)

基本POST请求

import requests

data = {
    'name':'Germany',
    'age':22
}
response = requests.post('http://httpbin.org/post',data = data)
print(response.text)
import requests

data = {
    'name':'Germany',
    'age':22
}
headers = {
    'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
}
response = requests.post('http://httpbin.org/post',data = data,headers = headers)
print(response.json())

响应

response属性

import requests

response = requests.get('http://www.jianshu.com')
print(type(response.status_code),response.status_code)
print(type(response.headers),response.headers)
print(type(response.history),response.history)
print(type(response.cookies),response.cookies)
print(type(response.url),response.url)

状态码判断

import requests

response = requests.get('http://www.jianshu.com/hello.html')
exit() if not (response.status_code == requests.codes.not_found) else print("404 not found")
import requests

response = requests.get('http://www.jianshu.com')
exit() if not(response.status_code == 200) else print("Requests Successfully")

高级操作

文件上传

import requests

files = {
    'file':open('favicon.ico','rb')
}
response = requests.post('http://httpbin.org/post',files = files)
print(response.text)

获取cookie

import requests

response = requests.get('http://www.zhihu.com')
print(response.cookies)
for key,value in response.cookies.items():
    print(key+'='+value)

会话维持

//模拟登陆

import requests

requests.get('http://httpbin.org/cookies/set/number/123456789')
response = requests.get('http://httpbin.org/cookies')
print(response.text)#cookies并未更新,可以理解为在不同的浏览器set和get cookies
import requests

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
response = s.get('http://httpbin.org/cookies')
print(response.text)#session 模拟了在一个浏览器维持了多次请求

证书验证

import requests

response = requests.get('https://www.12306.cn')
print(response.status_code)#12306证书过期,程序中断
import requests
from requests.packages import urllib3
urllib3.disable_warnings()
response = requests.get('https://www.12306.cn',verify = False)
print(response.status_code)#取消证书验证
import requests

response = requests.get('https://www.12306.cn',cert = ('/path/server.crt','/path/key'))
print(response.status_code)
#连接本地安全证书

代理设置

import requests

proxies = {
    'http':'http://127.0.0.1:9743',
    'https':'https://127.0.0.1:9743'
}

response = requests.get("https://www.taobao.com",proxies = proxies)
print(response.status_code)
#需要密码的代理
import requests

proxies = {
    'http':'http://[email protected]:9743/'
}

response = requests.get('https://www.taobao.com',proxies = proxies)
print(response.status_code)
pip install 'requests[socks]'#socks代理
import requests

proxies = {
    'http':'socks5://127.0.0.1:9743',
    'https':'socks5://127.0.0.1:9743'
}

response = requests.get("https://www.taobao.com",proxies = proxies)
print(response.status_code)

超时代理

import requests
from requests.exceptions import ReadTimeout
try:
    response = requests.get("http://httpbin.org/get",timeout = 0.2)
    print(response.status_code)
except ReadTimeout:
    print("Timeout")#无法成功?需要socks代理?

认证设置

import requests
from requests.auth import HTTPBasicAuth

r = requests.get('http://120.27.34.24:9001',auth = HTTPBasicAuth('user','123'))
print(r.status_code)
import requests

r = requests.get('http://120.27.34.24:9001',auth = ('user','123'))
print(r.status_code)

异常处理

import requests
from requests.exceptions import ReadTimeout, ConnectionError, RequestException
try:
    response = requests.get('http://httpbin.org/get',timeout = 0.1)
    print(response.status_code)
except ReadTimeout:
    print('Timeout')
except ConnectionError:
    print("Connection error")
except RequestException:
    print("Error")
Connection error

猜你喜欢

转载自blog.csdn.net/cs_xuhuan/article/details/82860272
今日推荐