Beautiful Soup 是 python 的一个库,最主要的功能是从网页抓取数据
Beautiful Soup 自动将输入文档转换为 Unicode 编码,输出文档转换为 utf-8 编码。
#coding:utf-8
import requests
from bs4 import BeautifulSoup
def get_all_websites():
url = "https://www.baidu.com"
html_content = requests.get(url).text
soup = BeautifulSoup(html_content, "html.parser")
# find_all会将所有满足条件的值取出,组成一个list
link_nodes = soup.find_all("a")
for node in link_nodes:
print(node.get("href"))
get_all_websites()
def visit_website(web_list):
http = urllib3.PoolManager(num_pools=5, headers={'User-Agent': 'ABCDE'})
# str = 'https://blog.csdn.net/weixin_48262500/article/details/121183173?spm=1001.2014.3001.5501'
for str in tqdm(web_list):
# web_list为存储了网址的列表,通过http.request可以访问指定的网址
resp1 = http.request('GET',str)