pandas读取json文件

pandas读取json文件

from pandas.io.json import json_normalize
import pandas as pd
import json
import time
 
# 读入数据
data_str = open('AgriculturalDisease_train_annotations.json').read()
 
# 测试json_normalize
start_time = time.time()
for i in range(0, 300):
    data_list = json.loads(data_str)
    df = json_normalize(data_list)
end_time = time.time()
print (end_time - start_time)#耗时109秒
 
# 测试自己构造
start_time = time.time()
for i in range(0, 300):
    data_list = json.loads(data_str)
    data = [[d["disease_class"], d["image_id"]] for d in data_list]
    df = pd.DataFrame(data, columns=["disease_class", "image_id"])
end_time = time.time()
print (end_time - start_time)#耗时22秒
 
#  测试read_json
start_time = time.time()
for i in range(0, 300):
    df = pd.read_json(data_str, orient='records')
end_time = time.time()
print (end_time - start_time)#耗时36秒
# read_json
df2 = pd.read_json(data_str, orient='records')

# 自己构造
data_list = json.loads(data_str)
data = [[d["disease_class"], d["image_id"]] for d in data_list]
df = pd.DataFrame(data, columns=["disease_class", "image_id"])
df.head(5)

三种代码输出均如下
  disease_class	image_id
0		1		62fd8bf4d53a1b94fbac16738406f10b.jpg
1		1		0bdec5cccbcade6b6e94087cb5509d98.jpg
2		1		8951e940341f77c8d361c1872c67b16d.jpg
3		1		7ed158da58c451f75fb790530d6f19cc.jpg
4		1		9b7399aa-1c3c-4137-ae4e-196cd23fe573___FREC_Sc...

猜你喜欢

转载自blog.csdn.net/wsp_1138886114/article/details/83302339