Sample Python crawling Json data

Python tutorial section introduces examples of crawling Json data

Related free learning recommendations: python tutorial (video)

In this article, take the AI ​​flow rate data crawled as an example.

The response content returned by this address is of Json type, and the item marked with the red box is the AI ​​flow rate value:

Insert picture description here

The implementation code is as follows:

import requests

import json

import csv

  

# 爬虫地址

url = 'https://databank.yushanfang.com/api/ecapi?path=/databank/crowdFullLink/flowInfo&fromCrowdId=3312&beginTheDate=201810{}&endTheDate=201810{}&toCrowdIdList[0]=3312&toCrowdIdList[1]=3313&toCrowdIdList[2]=3314&toCrowdIdList[3]=3315'

  

# 携带cookie进行访问

headers = {
    
    

'Host':'databank.yushanfang.com',

'Referer':'https://databank.yushanfang.com/',

'Connection':'keep-alive',

'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',

'Cookie':'_tb_token_=iNkDeJLdM3MgvKjhsfdW; bs_n_lang=zh_CN; cna=aaj1EViI7x0CATo9kTKvjzgS; ck2=072de851f1c02d5c7bac555f64c5c66d; c_token=c74594b486f8de731e2608cb9526a3f2; an=5YWo5qOJ5pe25Luj5a6Y5pa55peX6Iiw5bqXOnpmeA%3D%3D; lg=true; sg=\"=19\"; lvc=sAhojs49PcqHQQ%3D%3D; isg=BPT0Md7dE_ic5Ie3Oa85RxaMxbLK3UqJMMiN6o5VjH8C-ZRDtt7aRXb3fXGEAVAP',

}

  

rows = []

for n in range(20, 31):

  row = []

  row.append(n)

  for m in range (21, 32):

    if m < n + 1:

      row.append("")

    else:

       

      # 格式化请求地址,更换请求参数

      reqUrl = url.format(n, m)

       

      # 打印本次请求地址

      print(url)

       

      # 发送请求,获取响应结果

      response = requests.get(url=reqUrl, headers=headers, verify=False)

      text = response.text

       

      # 打印本次请求响应内容

      print(text)

       

      # 将响应内容转换为Json对象

      jsonobj = json.loads(text)

       

      # 从Json对象获取想要的内容

      toCntPercent = jsonobj['data']['interCrowdInfo'][1]['toCntPercent']

       

      # 生成行数据

      row.append(str(toCntPercent)+"%")

       

  # 保存行数据   

  rows.append(row)

   

# 生成Excel表头

header = ['AI流转率', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31']

  

# 将表头数据和爬虫数据导出到Excel文件

with open('D:\\res\\pachong\\tmall.csv', 'w', encoding='gb18030') as f :

  f_csv = csv.writer(f)

  f_csv.writerow(header)

  f_csv.writerows(rows)
import csv

import json

import ssl

import urllib.request

  

# 爬虫地址

url = 'https://databank.yushanfang.com/api/ecapi?path=/databank/crowdFullLink/flowInfo&fromCrowdId=3312&beginTheDate=201810{}&endTheDate=201810{}&toCrowdIdList[0]=3312&toCrowdIdList[1]=3313&toCrowdIdList[2]=3314&toCrowdIdList[3]=3315'

  

# 不校验证书

ssl._create_default_https_context = ssl._create_unverified_context

  

# 携带cookie进行访问

headers = {
    
    

'Host':'databank.yushanfang.com',

'Referer':'https://databank.yushanfang.com/',

'Connection':'keep-alive',

'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',

'Cookie':'_tb_token_=iNkDeJLdM3MgvKjhsfdW; bs_n_lang=zh_CN; cna=aaj1EViI7x0CATo9kTKvjzgS; ck2=072de851f1c02d5c7bac555f64c5c66d; c_token=c74594b486f8de731e2608cb9526a3f2; an=5YWo5qOJ5pe25Luj5a6Y5pa55peX6Iiw5bqXOnpmeA%3D%3D; lg=true; sg=\"=19\"; lvc=sAhojs49PcqHQQ%3D%3D; isg=BPT0Md7dE_ic5Ie3Oa85RxaMxbLK3UqJMMiN6o5VjH8C-ZRDtt7aRXb3fXGEAVAP',

}

  

rows = []

n = 20

while n <31:

  row = []

  row.append(n)

   

  m =21

  while m <32:

     

    if m < n + 1:

      row.append("")

    else:

       

      # 格式化请求地址,更换请求参数

      reqUrl = url.format(n, m)

       

      # 打印本次请求地址

      print(reqUrl)

       

      # 发送请求,获取响应结果

      request = urllib.request.Request(url=reqUrl, headers=headers)

      response = urllib.request.urlopen(request)

      text = response.read().decode('utf8')

       

      # 打印本次请求响应内容

      print(text)

       

      # 将响应内容转换为Json对象

      jsonobj = json.loads(text)

       

      # 从Json对象获取想要的内容

      toCntPercent = jsonobj['data']['interCrowdInfo'][1]['toCntPercent']

       

      # 生成行数据

      row.append(str(toCntPercent) + "%")

       

    m = m+1

     

  rows.append(row)   

  n = n+1

   

# 生成Excel表头

header = ['AI流转率', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31']

  

# 将表头数据和爬虫数据导出到Excel文件

with open('D:\\res\\pachong\\tmall.csv', 'w', encoding='gb18030') as f :

  f_csv = csv.writer(f)

  f_csv.writerow(header)

  f_csv.writerows(rows)

The export content is as follows:
Insert picture description here
This article is from php Chinese website: python tutorial section https://www.php.cn/course/list/30.html

Guess you like

Origin blog.csdn.net/Anna_xuan/article/details/111226525