网易云Excel数据爬取

import json
import requests
import pandas as pd
import time
payload = {
           "keyword":"Excel",
           "pageIndex":1,
           "pageSize":50,
           "relativeOffset":0,
          }
headers = {
          'Accept':'application/json',
          'content-type':'application/json',
          'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36'
          }
url = 'http://study.163.com/p/search/studycourse.json'
fullinfo=[]
for i in list(range(1,14)):
    payload['pageIndex']=i
    payload['relativeOffset']=50*i-50
    time.sleep(1)
    r=requests.post(url,data=json.dumps(payload),headers=headers)
    content=r.json()
    fullinfo=fullinfo+content['result']['list']
    print("第{}部分已加载".format(i))
df2=pd.DataFrame(fullinfo)
my_data=df2[["productName","discountPrice","discountRate","lectorName","originalPrice","description","provider","score","scoreLevel"]]

猜你喜欢

转载自blog.csdn.net/qq_42052864/article/details/80738647