# -*- coding: utf-8 -*-
import requests,re
r = requests.get('https://mp.csdn.net/editor/html/109675834')
r.encoding = r.status_code
headers = r.headers.get('Content-Type')
get_encoding = ''.join(re.findall(r'charset=([gbkutf8-]+)',headers))
encoding = get_encoding if get_encoding else 'utf-8'
r.encoding = encoding
html = r.content.decode(encoding)
print(r.content.decode('utf-8'))
# py3.8版本为unicode,解码通过decode() 进行解码
# window 默认gbk mac,linux 默认为utf-8
import requests,re
r = requests.get('https://httpbin.org/get') # https://httpbin.org/get为测试地址
res = r.json() # text content 只返回字符串 json 返回字典类型
# print(res,type(res))# type(res) 返回的是字典类型 <class 'dict'>
print(res.get('url')) # 通过get+字典键值 获取对应的值
data 传递参数
import requests
headers ={
'User-Agent':'leo/1.1'
}
# form = {'name':'leo','age':'22','yzm':'asdn'}
form = (('name','leo'),('age','22')) # 元祖或字典形式在post中均会被转化为字典形式
res = requests.post('https://httpbin.org/post', data = form, headers = headers) #如果目标地址返回来的是json的数据,那么可以使用json方法将其转化为dict
print(res.json())
print(res.json().get('origin')) #可以通过get获取元素中键值 'origin': '113.118.215.74'
通过chardet获取url编码 , params 沟通url参数
import chardet
import requests
#构造参数
url = 'http://www.irobotq.com/website2/index.html'
params = {
'name':'leo',
'age':'123'
}
r = requests.get(url,params = params)
print(r.url) # get构造参数完成
encoding = chardet.detect(r.content).get('encoding') # 监测并获取url的编码
r.encoding = encoding
print(r.text)
图片上传到URL地址
import requests
url = 'https://httpbin.org/post'
f = {"file":open("111.png","rb")}
res = requests.post(url,files = f ) # 图片上传到url地址
print(res.json())
代理IP使用
import requests,re
proxies = {
'http' : 'http://182.34.34.20:9999', # 代理IP
'https': 'http://182.34.34.20:9999'
}
url = 'http://www.baidu.com'
try :
r = reque.get(url, timeout = 3 ,proxies = proxies)
except requests.RequestException:
print('请求超时')
else:
r.encoding = 'utf-8'
print(r.text)