Crawler practice to obtain the works of a certain Douban actor

Preface

The content of the official account needs to be crawled for a single page of the actor's work and its year
Insert picture description here

Code

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import time
browser = webdriver.Chrome()
url = "https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gb18030&word=%B9%FE%CA%BF%C6%E6&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=000000"
headers = {  # 模拟浏览器身份头向对方发送消息
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
browser.get("https://movie.douban.com/celebrity/1050210/movies?sortby=vote&format=pic")
info_list = browser.find_elements_by_xpath("//div[@id='content']//h6/a")
year_list = browser.find_elements_by_xpath("//div[@id='content']//h6/span[1]")

for (info,year) in zip(info_list,year_list):
    print(info.text,year.text)

Get
Insert picture description here
it, get the poster down

url_list = browser.find_elements_by_xpath("//a[@class='nbg']/img")
for i,url in enumerate(url_list):
    imgurl = str(url.get_attribute("src")).replace("webp","jpg")
    print(imgurl)
    img = requests.get(imgurl).content
    with open("D:/base/%s.jpg"%i,"wb") as fp:
        fp.write(img)
        print("正在下载海报%s"%i)

Insert picture description here
Then you can copy it to the official account article

Guess you like

Origin blog.csdn.net/qq_51598376/article/details/114036871