pandas读取json文件
from pandas.io.json import json_normalize
import pandas as pd
import json
import time
# 读入数据
data_str = open('AgriculturalDisease_train_annotations.json').read()
# 测试json_normalize
start_time = time.time()
for i in range(0, 300):
data_list = json.loads(data_str)
df = json_normalize(data_list)
end_time = time.time()
print (end_time - start_time)#耗时109秒
# 测试自己构造
start_time = time.time()
for i in range(0, 300):
data_list = json.loads(data_str)
data = [[d["disease_class"], d["image_id"]] for d in data_list]
df = pd.DataFrame(data, columns=["disease_class", "image_id"])
end_time = time.time()
print (end_time - start_time)#耗时22秒
# 测试read_json
start_time = time.time()
for i in range(0, 300):
df = pd.read_json(data_str, orient='records')
end_time = time.time()
print (end_time - start_time)#耗时36秒
# read_json
df2 = pd.read_json(data_str, orient='records')
# 自己构造
data_list = json.loads(data_str)
data = [[d["disease_class"], d["image_id"]] for d in data_list]
df = pd.DataFrame(data, columns=["disease_class", "image_id"])
df.head(5)
三种代码输出均如下
disease_class image_id
0 1 62fd8bf4d53a1b94fbac16738406f10b.jpg
1 1 0bdec5cccbcade6b6e94087cb5509d98.jpg
2 1 8951e940341f77c8d361c1872c67b16d.jpg
3 1 7ed158da58c451f75fb790530d6f19cc.jpg
4 1 9b7399aa-1c3c-4137-ae4e-196cd23fe573___FREC_Sc...