# -*- coding: utf-8 -*- """ Created on Mon May 07 11:05:49 2018 Station B crawler: Function: 1 Get the number of comments, the number of likes, the user's personal information and comments, etc. @author: Alis """ import re,time import requests import them import json headers = {'user-agents':'User-Agent:Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'} def getavID(pn): avID = [] for i in range(1,pn+1): url = 'https://api.bilibili.com/x/web-interface/dynamic/region?&jsonp=jsonp&pn=%d&ps=50&rid=24&_=1525679623909'%i r = requests.get(url,headers = headers).text data = json.loads(r) archives = data['data']['archives'] for ac in archives: avID.append(ac['aid']) print 'aid: ',ac['aid'] print 'title: ',ac['title'] print 'attribute: ',ac['attribute'] time.sleep(2) return avID def getHTMLText(i,num = 2): for n in range(1,num): url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn="+str(n)+"&type=1&oid="+str(i)+"&sort=0&_=1496477384198" r = requests.get(url,headers = headers) text = r.text print("Printing page "+str(n)+" comment!") num = printTXT (text) if num > 1: break for n in range(2,num+1): url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn="+str(n)+"&type=1&oid="+str(i)+"&sort=0&_=1496477384198" r = requests.get(url,headers = headers) text = r.text print("Printing page "+str(n)+" comment!") printTXT (text) time.sleep(1) def printTXT(text): data = json.loads(text) reply = data['data']['replies'] for t in reply: print u"User nickname:",t['member']['uname'],u'gender',t['member']['sex'] print u'comment information',t['content']['message'] if len(t['replies']) > 0: for t2 in t['replies']: print u"User nickname:",t2['member']['uname'],u'gender',t2['member']['sex'] print u'comment information',t2['content']['message'] pageNum = data['data']['page']['count'] / 20 + 1 # number of comment pages return pageNum if __name__ == "__main__": #i = input(u"Please enter the av number (number):") begin = time.clock() avid = getavID(1) map(getHTMLText,avid) end = time.clock() print 'cost time is: ',round(end-begin,3),'s'
bilibili website crawler
Guess you like
Origin http://43.154.161.224:23101/article/api/json?id=325682827&siteId=291194637
Recommended
Ranking