Python--requests

requsts库:

学习视频

小规模 爬取网页(request库)-->中规模 爬取系列网站(Scrapy库)-->爬取全网(定制、搜索引擎)

#HTTP协议: url时通过http协议存取资源的Internet路径,一个url对应一个数据资源
# patch:节约网络带宽
# head:获取概要信息
# post: post(字典) 形成表单 // post(字符串) 形成data
# put:
# 参数:params
#http://www.qq.com/robots.txt

import requests
r = requests.get("http://www.baidu.com")
print(r.status_code) # 检测请求状态码 200为正常
r.encoding = 'utf-8'
print(r.text)
# 通用代码:
import requests

def getHTMLText(url):
    try:
        r = requests.get(url,timeout=30)
        r.raise_for_status() #如果状态不是200  引发异常
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return "erro"

#下面测试代码
if __name__ =="__main__":
    url = "http://www.baidu.com"
    print(getHTMLText(url))

if __name__ =="__main__":
    url = "baidu.com"
    print(getHTMLText(url))
import requests
# get() url\paramas\response\request
# 5个属性
r = requests.get("http://www.baidu.com")
print(r.status_code) # 检测请求状态码 200为正常
print(r.encoding) # ISO-8859-1,默认的,不能返回中文
print(r.apparent_encoding) #它更准确,utf-8
r.encoding = 'utf-8'
print(r.text)

# r=requests.get(url)  #万能的
import requests
url = "https://item.jd.com/51961258926.html"
try:
    r = requests.get(url)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
    print(r.text[:1000])
except:
    print("error")


import requests
kv = {'wd':'Python'}
r = requests.get("http://www.baidu.com/s",params = kv)
print(r.request.url)
r.raise_for_status()
print(r.text[:1000])
发布了46 篇原创文章 · 获赞 3 · 访问量 1848

猜你喜欢

转载自blog.csdn.net/QXK_Jack/article/details/104522880
今日推荐