https://movie.douban.com/celebrity/1396751/
from bs4 import BeautifulSoup
#data是类似页面的html数据
def douban_subject_parser(data):
soup_html = BeautifulSoup(data, "html.parser")
info = soup_html.find(name='div',attrs={'id':'info'})
info = info.text
print(info)
score = soup_html.find(name='div',attrs={'id':'interest_sectl'})
score = score.text.replace('引用','').replace(' ','').replace('\n',' ')
score =score.split(' ')
_score = []
for i in score:
if i != '':
_score.append(i)
score = _score
print(score)
report = soup_html.find(name='div',attrs={'id':'link-report'})
report = report.text.replace('©豆瓣','')
print(report)