python reptile small applications

A, b station subscriber information crawling

  The crawling 1-10 b station user's personal information, including a nickname, gender, head, the number of fans, the number of players and the like.

1  Import Requests
 2  Import JSON
 . 3  Import OS
 . 4  
. 5  
. 6 vip_type = {0: ' normal user ' , 1: " small Member " , 2: " Large members ' }
 . 7 headers = {
 . 8      ' the Referer ' : ' HTTPS: // space.bilibili.com ' ,
 . 9      ' the User-- Agent ' : ' the Mozilla / 5.0 (the Windows NT 10.0; the WOW64) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 79.0.3945.130 Safari / 537.36 '
10  }
 . 11  
12 is  DEF download_face (URL, MID, name):
 13 is      ' '' download avatar '' ' 
14      Print ( ' downloading the user avatar ... ' )
 15      IF  Not os.path.exists ( ' B station user avatar ' ):
 16          os.mkdir ( ' B station user avatar ' ) # Create a directory for avatar 
. 17      IMG = requests.get (URL, headers = headers) .content # obtain a binary source head 
18 is      with Open ( ' B station user avatar /{}_{}.jpg ' .format (MID, name), 'wb' ) AS F:
 . 19          f.write (IMG)
 20 is  
21 is  DEF get_user_ele_info (URL):
 22 is      '' ' obtain information about users '' ' 
23 is      jsonData = requests.get (URL) .json () [ ' Data ' ]
 24      jsonData = MID [ ' MID ' ]
 25      name = jsonData [ ' name ' ]
 26 is      Sex = jsonData [ ' Sex ' ]
 27      Level = jsonData [ ' Level ']
28     sign = jsondata[' Sign ' ]
 29      Official = jsonData [ ' Official ' ] [ ' title ' ]
 30      vip_num = jsonData [ ' VIP ' ] [ ' type ' ]
 31 is      face_url = jsonData [ ' face ' ]
 32      download_face (face_url, MID, name) #
 33 is      Print ( ' user the UID: {} ' .format (MID))
 34 is      Print ( ' user name: {} ' .format (name))
35      Print ( ' Gender: {} ' .format (Sex))
 36      Print ( ' User Level: {} ' .format (Level))
 37 [      Print ( ' User Type: {} ' .format (vip_type.get (vip_num )))
 38 is      Print ( ' user signature: {} ' .format (Sign))
 39      Print ( ' user official certification: {} ' .format (official))
 40  
41 is  DEF get_user_follow (URL):
 42 is      ' '' __jp3 get the user concerned about the number and the number of fans ' '' 
43     = requests.get text_data (URL, headers = headers) .text [. 6: -1] # STR 
44 is      # acquired nonstandard json data format, what do the string interception operation to remove redundant data 
45      jsonData = json. loads (text_data) # with json packet reloaded at string becomes dict type 
46 is      Data jsonData = [ ' Data ' ]   
 47      following Data = [ ' following ' ]
 48      follower = Data [ ' follower ' ]
 49      Print ( ' user Follow number: {} ' .format (following))
 50      Print ( ' user number of fans: {}' .Format (follower))
 51 is  
52 is  DEF get_user_achieve (URL):
 53 is      '' ' __jp4 achievements acquired user, such as the number of fans, is eligible Like numbers, the number of other players '' ' 
54 is      text_data = requests.get (URL, headers = headers ) .text [. 6: -1 ] 
 55      jsonData = json.loads (text_data) 
 56 is      Data jsonData = [ ' Data ' ]
 57 is      VIDEO_VIEWS = Data [ ' Archive ' ] [ ' View ' ]
 58      Likes = Data [ ' Likes ' ]
 59      Print ( 'Like the number of users eligible: {} ' .format (Likes))
 60      Print ( ' User Video Player Number: {} ' .format (VIDEO_VIEWS))
 61 is  
62 is  
63 is  DEF main ():
 64      for I in Range (1,11 ) :
 65          url_ele_info = ' https://api.bilibili.com/x/space/acc/info?mid={}&jsonp=json ' .format (I)
 66          url_follow = ' https://api.bilibili.com/ X / Relation / STAT? VMID = {} = & JSONP JSONP the callback & JP3 = __ ' .format (I)
 67          url_achieve = 'https://api.bilibili.com/x/space/upstat?mid={}&jsonp=jsonp&callback=__jp4'.format(i)
68         get_user_ele_info(url_ele_info)
69         get_user_follow(url_follow)
70         get_user_achieve(url_achieve)
71         print('-'*100)
72         
73 if __name__ == '__main__':
74     main()

operation result:

profile picture:

 

two,

to be continued. . .

Guess you like

Origin www.cnblogs.com/wangyi0419/p/12612558.html