哔哩哔哩 番列表爬取

import re
import requests
import json
#from urllib import parse
#from lxml import etree
#UA信息:
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'
}


def bilibili_get_list(url):
    response = requests.get(url,headers=headers).text
    bjson = str(re.findall('__INITIAL_STATE__=(.*?);',response))
    bjson= json.loads(bjson[2:-2])
    
    print("-"*10+"-"*10)
    print('标题:'+bjson['h1Title'])

    mediaInfo=bjson['mediaInfo']

    print("-"*10+"-"*10)
    print('简介:'+mediaInfo['evaluate'])

    #mediaInfo['cover'] 封面
    pub=mediaInfo['pub']

    print("-"*10+"-"*10)
    print('上架时间:'+pub['timeShow'])
    #mediaInfo['time'] 上架时间
    newestEp=mediaInfo['newestEp']

    print("-"*10+"-"*10)
    print('状态:'+newestEp['desc'])
    epList=bjson['epList']
    for i in epList:
        print(i['titleFormat']+':'+i['badge'])
        #i['cover'] 封面

bilibili_get_list('https://www.bilibili.com/bangumi/play/ss27992/?from=search&seid=5161697106725892816')

直接复制就可以用(大概)

猜你喜欢

转载自www.cnblogs.com/wjbk/p/11306531.html