爬取简书图片(使用BeautifulSoup)

import requests
from bs4 import BeautifulSoup
url_list = []
kv = {'User-Agent':'Mozilla/5.0'}
r = requests.get('https://www.jianshu.com/p/42df25cfc1ec',timeout=30,headers = kv)
# print(r.request.headers)
dome = r.text
soup = BeautifulSoup(dome,'html.parser')
# print(soup.prettify())

img_ = soup.find_all('img')
for i in img_:
    # print(i.get('src'))
    if i.get('data-original-src') != '//cdn2.jianshu.io/assets/web/nav-logo-4c7bbafe27adc892f3046e6978459bac.png' and i.get('data-original-src') != None:
        url_list.append(i.get('data-original-src'))
for i in url_list:
    image_name = i.split('/')[-1]
    response = requests.get(f'https:{i}')
    image_data = response.content
    with open(f"./image/{image_name}",'wb') as fw:
        fw.write(image_data)

猜你喜欢

转载自www.cnblogs.com/kuck/p/11325922.html