豆瓣电影页面python爬虫解析

https://movie.douban.com/celebrity/1396751/

from bs4 import BeautifulSoup

#data是类似页面的html数据
def douban_subject_parser(data):
    soup_html = BeautifulSoup(data, "html.parser")
    info = soup_html.find(name='div',attrs={'id':'info'})
    info = info.text
    print(info)
    score = soup_html.find(name='div',attrs={'id':'interest_sectl'})
    score = score.text.replace('引用','').replace(' ','').replace('\n',' ')
    score =score.split(' ')
    _score = []
    for i in score:
        if i != '':
            _score.append(i)
    score = _score
    print(score)

    report = soup_html.find(name='div',attrs={'id':'link-report'})
    report = report.text.replace('©豆瓣','')
    print(report)
发布了23 篇原创文章 · 获赞 0 · 访问量 2026

猜你喜欢

转载自blog.csdn.net/qq_33913982/article/details/100671862