爬虫遇到HTTP Error 403的问题

# coding=utf-8


from bs4 import BeautifulSoup
import requests
import urllib
x = 1
y = 1

def crawl(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')
    global y
    with open(f'C:/Users/Administrator/Desktop/alien/pachong/xnt/{y}.txt','w',encoding="utf-8") as f:
        f.write(str(soup))
        y += 1
    yinhuns = soup.select('img')
    print()
    for yh in yinhuns:
        print(yh)
        link = yh.get('src')
        print(link)
        global x    
        urllib.request.urlretrieve(link, f'C:/Users/Administrator/Desktop/alien/pachong/xnt/{x}.jpg')
        print(f'正在下载第{x}张图片')
        x += 1
        
for i in range(1,5):
    url = "https://acg.fi/hentai/23643.htm/" + str(i)
    
    try:
        crawl(url)
    except ValueError as f:
        continue
    except Exception as e:
        print(e)

猜你喜欢

转载自www.cnblogs.com/rener0424/p/10970096.html