这里写自定义目录标题
读取mat文件
最近,帮博士师兄处理数据集,场景是,他下好了开源的数据集,但是需要处理成自己的实验的数据:
1.从6个text(分别是训练集和测试集的图像特征、文字特征和对应的标签)文件中读取数据,并写入mat文件
2.从6个mat(分别是训练集和测试集的图像特征、文字特征和对应的标签)文件读取制作好的数据,并合并写入同一个mat文件(源mat是5.0版本的,而目标是7.3版本的)
text----》mat
import numpy as np
# array=numpy.loadtxt(r'E:\NUSWIDE\training_data_NUS_WIDE\feature\train_img.txt')
# print(array.size)
from numpy import *
import hdf5storage
import scipy.io as sio
def I_tr(path):
file = open(path)
filelist = file.readlines()
lenghth = len(filelist)
to_mat= zeros((lenghth,4096)) #这里4096指想要转化成矩阵是2列的,数据是几列就初始化成几。
index = 0
file = open(path)
for data in file.readlines():
data = data.strip('/n')
nums = data.split(" ")
#print(type(nums))
#print(nums)
nums=np.array(nums)
nums=nums[:-1]
#print(len(nums))
#print(type(nums))
#print(nums)
nums = [float(x) for x in nums]
to_mat[index,:] = nums[:]
index +=1
#print(to_mat)
# print(type(to_mat))
to_mat = matrix(to_mat)
# print(to_mat)
# print(type(to_mat))
print(len(to_mat))#行数
print(shape(to_mat)[1])
print(len(to_mat.T))#转置后,行数
print(shape(to_mat.T)[1])
return to_mat
def T_tr(path):
file = open(path)
filelist = file.readlines()
lenghth = len(filelist)
to_mat= zeros((lenghth,1000)) #这里1000指想要转化成矩阵是2列的,数据是几列就初始化成几。
index = 0
file = open(path)
for data in file.readlines():
data = data.strip('/n')
nums = data.split()
#print(type(nums))
#print(nums)
nums=np.array(nums)
# nums=nums[:-1]
#print(len(nums))
#print(type(nums))
#print(nums)
nums = [int(x) for x in nums]
to_mat[index,:] = nums[:]
index +=1
#print(to_mat)
# print(type(to_mat))
to_mat = matrix(to_mat)
# print(to_mat)
# # print(type(to_mat))
# print(len(to_mat))#行数
# print(shape(to_mat)[1])
# print(len(to_mat.T))#转置后,行数
# print(shape(to_mat.T)[1])
print(shape(to_mat.T),shape(to_mat))
return to_mat
def L_tr(path):
file = open(path)
filelist = file.readlines()
lenghth = len(filelist)
to_mat= zeros((lenghth,10)) #这里10指想要转化成矩阵是2列的,数据是几列就初始化成几。
index = 0
file = open(path)
for data in file.readlines():
data = data.strip('/n')
nums = data.split()
#print(type(nums))
#print(nums)
nums=np.array(nums)
# nums=nums[:-1]
#print(len(nums))
#print(type(nums))
#print(nums)
nums = [int(x) for x in nums]
to_mat[index,:] = nums[:]
index +=1
#print(to_mat)
# print(type(to_mat))
to_mat = matrix(to_mat)
print(to_mat)
# print(type(to_mat))
print(len(to_mat))#行数
print(shape(to_mat)[1])
print(len(to_mat.T))#转置后,行数
print(shape(to_mat.T)[1])
return to_mat
def WriteMatlab(data_np, VarName, FileName):
matcontent = {}
matcontent[VarName] = data_np
hdf5storage.write(matcontent, filename=FileName, matlab_compatible=True)
if __name__ == '__main__':
XTrain=I_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\feature\train_img.txt')
XTest = I_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\feature\validation_img.txt')
YTrain=T_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\feature\train_txt.txt')
YTest = T_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\feature\validation_txt.txt')
LTrain=L_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\list\train_label.txt')
LTest = L_tr(r'E:\NUSWIDE\training_data_NUS_WIDE\list\validation_label.txt')
matcontent = {}
matcontent['I_tr'] = XTrain
#print(shape(YTrain))
matcontent['T_tr'] = YTrain
matcontent['L_tr'] = LTrain
matcontent['I_te'] = XTest
matcontent['T_te'] = YTest
matcontent['L_te'] = LTest
hdf5storage.write(matcontent, filename='Database.mat', matlab_compatible=True)
#sio.savemat('database.mat', {'XTrain': XTrain, 'YTrain': YTrain,'LTrain':LTrain})
print('mat file saved success')
其中,matcontent是存入mat文件中的内容,其实就是字典类型,key是字符串,对应的value是矩阵。最重要的是读取txt文件中的字符流数据时,要把空格和换行符去掉:
5.0—>7.3mat
有两种场景:
1.首先一个5.0mat文件转成一个7.3mat
from numpy import *
import hdf5storage
from scipy import io
def get_m(path):
matcontent = {}
x = io.loadmat(path)
a = list(x.keys())
a.remove('__header__')
a.remove('__version__')
a.remove('__globals__')
for i in range(len(a)):
f=x[a[i]]
if(a[i]=='VDatabase'):
matcontent['I_tr']=f
elif(a[i]=='VTest'):
matcontent['I_te']=f
elif(a[i]=='YDatabase'):
matcontent['T_tr']=f
elif(a[i]=='YTest'):
matcontent['T_te']=f
elif(a[i]=='XDatabase'):
matcontent['Gist_tr']=f
elif(a[i]=='XTest'):
matcontent['Gist_te']=f
elif(a[i]=='databaseL'):
matcontent['L_tr']=f
else:
matcontent['L_te']=f
return matcontent
if __name__ == '__main__':
m=get_m(r'E:\studyfile\pycharm\vgg19_feature\YZdataset_design\flickr-25k.mat')
hdf5storage.write(m, filename='flickr_database.mat', matlab_compatible=True)
print('mat file saved success')
2.多个5.0mat文件转成一个7.3mat文件
from numpy import *
import hdf5storage
from scipy import io
def I_tr(path):
x = io.loadmat(path)
a=list(x.keys())
#print(a)
a.remove('__header__')
a.remove('__version__')
a.remove('__globals__')
#print(a)
for i in range(len(a)):
f=x[a[i]]
return f
if __name__ == '__main__':
XTrain=I_tr(r'E:\NUSWIDE\pascal\train_img.mat')
XTest = I_tr(r'E:\NUSWIDE\pascal\test_img.mat')
YTrain = I_tr(r'E:\NUSWIDE\pascal\train_txt.mat')
YTest = I_tr(r'E:\NUSWIDE\pascal\test_txt.mat')
LTrain = I_tr(r'E:\NUSWIDE\pascal\train_img_lab.mat')
LTest = I_tr(r'E:\NUSWIDE\pascal\test_img_lab.mat')
matcontent = {}
matcontent['I_tr'] = XTrain
print(shape(YTrain))
matcontent['T_tr'] = YTrain
matcontent['L_tr'] = LTrain
matcontent['I_te'] = XTest
matcontent['T_te'] = YTest
matcontent['L_te'] = LTest
hdf5storage.write(matcontent, filename='pascal_database.mat', matlab_compatible=True)
print('mat file saved success')
其中,每个5.0mat文件只有一个key。