request获取html
安装:
pip install requests
使用:
import requests
HTTP请求:GET、POST、PUT、DELETE、HEAD、OPTIONS
- get
res = requests.get(“https://www.baidu.com”) - post
res = requests.post(“https://www.baidu.com”); - put
res = requests.put(“https://www.baidu.com”); - delete
res = requests.delete(“https://www.baidu.com”); - head
res = requests.head(“https://www.baidu.com”) ; - options
res = requests.options(“https://www.baidu.com”)
#获取文章的html
from urllib import request
import random
import time
import lxml
import re
from bs4 import BeautifulSoup as bs
def Get_Target_ip():
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
req = request.Request(r'#这里写你csdn主页的url', headers=headers)
response = request.urlopen(req)
html = response.read().decode('utf-8')
soup = bs(html, 'lxml')
hList = soup.find_all(name='h4', attrs={'class': ''})
for h in hList:
href = h.find(name='a').get('href')
requestList.append(request.Request(href))
return requestList
if __name__ == '__main__':
requestList = Get_Target_ip()
BeautifulSoup 可以用来获取html中想要字段的文本