1.获取mhd和raw
import cv2
import os
import pydicom
import numpy
import SimpleITK
# 路径和列表声明
rootpath="E:/DcmData/xlc/Fracture_data/Me/"
PathDicom = "E:/DcmData/xlc/Fracture_data/Me/3004291153/3307885/" # 与python文件同一个目录下的文件夹,存储dicom文件
SaveRawDicom = "E:/DcmData/xlc/Fracture_data/mhd_raw/" # 与python文件同一个目录下的文件夹,用来存储mhd文件和raw文件
def getSubPaths(dir):
list = []
# 判断路径是否存在
if (os.path.exists(dir)):
# 获取该目录下的所有文件或文件夹目录
files = os.listdir(dir)
for file in files:
# 得到该文件下所有目录的路径
m = os.path.join(dir, file)
# 判断该路径下是否是文件夹
if (os.path.isdir(m)):
h = os.path.split(m)
list.append(m)
return list
def get_mhd_raw(PathDicom,SaveRawDicom):
lstFilesDCM = []
# for root, dirs, files in os.walk(PathDicom):
# for name in files:
# print(os.path.join(root, name))
# for name in dirs:
# print(os.path.join(root, name))
# 将PathDicom文件夹下的dicom文件地址读取到lstFilesDCM中
for dirName, subdirList, fileList in os.walk(PathDicom):
for filename in fileList:
if ".dcm" in filename.lower(): # 判断文件是否为dicom文件
#print(filename)
lstFilesDCM.append(os.path.join(dirName, filename)) # 加入到列表中
# 第一步:将第一张图片作为参考图片,并认为所有图片具有相同维度
RefDs = pydicom.read_file(lstFilesDCM[0]) # 读取第一张dicom图片
print(RefDs.SOPInstanceUID)
# 第二步:得到dicom图片所组成3D图片的维度
ConstPixelDims = (int(RefDs.Rows), int(RefDs.Columns), len(lstFilesDCM)) # ConstPixelDims是一个元组
# 第三步:得到x方向和y方向的Spacing并得到z方向的层厚
ConstPixelSpacing = (float(RefDs.PixelSpacing[0]), float(RefDs.PixelSpacing[1]), float(RefDs.SliceThickness))
# 第四步:得到图像的原点
Origin = RefDs.ImagePositionPatient
# 第五步:得到序列名称用于命名
Seriesname=RefDs.SeriesInstanceUID
# 根据维度创建一个numpy的三维数组,并将元素类型设为:pixel_array.dtype
ArrayDicom = numpy.zeros(ConstPixelDims, dtype=RefDs.pixel_array.dtype) # array is a numpy array
# 第五步:遍历所有的dicom文件,读取图像数据,存放在numpy数组中
i = 0
for filenameDCM in lstFilesDCM:
ds = pydicom.read_file(filenameDCM)
#print(ds.SOPInstanceUID)
#print(lstFilesDCM.index(filenameDCM))
ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)] = ds.pixel_array
#cv2.imwrite("out_" + str(i) + ".png", ArrayDicom[:, :, lstFilesDCM.index(filenameDCM)])
i += 1
# 第六步:对numpy数组进行转置,即把坐标轴(x,y,z)变换为(z,y,x),这样是dicom存储文件的格式,即第一个维度为z轴便于图片堆叠
ArrayDicom = numpy.transpose(ArrayDicom, (2, 0, 1))
# 第七步:将现在的numpy数组通过SimpleITK转化为mhd和raw文件
sitk_img = SimpleITK.GetImageFromArray(ArrayDicom, isVector=False)
sitk_img.SetSpacing(ConstPixelSpacing)
sitk_img.SetOrigin(Origin)
SimpleITK.WriteImage(sitk_img, os.path.join(SaveRawDicom, Seriesname+ ".mhd"))
list_classes = getSubPaths(rootpath)
for li in range(len(list_classes)):
lc=getSubPaths(list_classes[li])
PathDicom=lc[0]
get_mhd_raw(PathDicom,SaveRawDicom)
2.根据csv(这里是dec文件,这是解码的锅,在pandas中功能与csv一致)获取转换后的数据csv
import pandas as pd
import os
import pydicom
#import csv
import numpy as np
#任意的多组列表
rootpath='E:/DcmData/xlc/Fracture_data/Me/'
#PathDicom = 'E:/DcmData/xlc/Fracture_data/Me/3004276169/3302845/'
#candidates = os.path.join(PathDicom,'RibFracture.dec')
def getSubPaths(dir):
list = []
# 判断路径是否存在
if (os.path.exists(dir)):
# 获取该目录下的所有文件或文件夹目录
files = os.listdir(dir)
for file in files:
# 得到该文件下所有目录的路径
m = os.path.join(dir, file)
# 判断该路径下是否是文件夹
if (os.path.isdir(m)):
h = os.path.split(m)
list.append(m)
return list
def dcm_rename(dir):
# 判断路径是否存在
if (os.path.exists(dir)):
# 获取该目录下的所有文件或文件夹目录
files = os.listdir(dir)
for file in files:
# 得到该文件下所有目录的路径
m = os.path.join(dir, file)
#mp=os.path.splitext(file)[0] #获取文件名前缀,[-1]为后缀。
if ".dcm" in file.lower():
RefDs = pydicom.read_file(m)
filename = RefDs.SOPInstanceUID
os.rename(m, os.path.join(dir, filename + ".DCM"))
def csv_ch(PathDicom,rootpath):
seriesuid = []
coordX = []
coordY = []
coordZ = []
DX = []
DY = []
cl = []
candidates = os.path.join(PathDicom, 'RibFracture.dec')
candidatesList = pd.read_csv(candidates)
for i in range(len(candidatesList)):
m = os.path.join(PathDicom, candidatesList.loc[i][5]+'.DCM')
#print(m)
if not os.path.isfile(m):#防止csv里SOPInstanceUID找不到对应dcm,相当于这些标记无用
continue
RefDs = pydicom.read_file(m)
coordZ.append(RefDs.ImagePositionPatient[2])
seriesuid.append(RefDs.SeriesInstanceUID)
deslist = np.array(['正常', '隐匿型', '无错位', '有错位', '有骨痂', '畸形愈合'])
for j in range(6):
if candidatesList.loc[i][6] == deslist[j]:
cl.append(j)
break
X = candidatesList.loc[i][9].split(';')
Y = candidatesList.loc[i][10].split(';')
ax = []
ay = []
for xi in range(len(X)-1):
ax.append(X[xi])
for yi in range(len(Y)-1):
ay.append(Y[yi])
ax = list(map(float, ax))
ay = list(map(float, ay))
minx = np.min(ax)*RefDs.PixelSpacing[0]+RefDs.ImagePositionPatient[0]
maxx = np.max(ax)*RefDs.PixelSpacing[0]+RefDs.ImagePositionPatient[0]
miny = np.min(ay)*RefDs.PixelSpacing[1]+RefDs.ImagePositionPatient[1]
maxy = np.max(ay)*RefDs.PixelSpacing[1]+RefDs.ImagePositionPatient[1]
coordX.append(minx)
coordY.append(miny)
DX.append(maxx-minx)
DY.append(maxy-miny)
csv_name=RefDs.SeriesInstanceUID+'.csv'
csv_name=os.path.join(rootpath,csv_name)
#字典中的key值即为csv中列名(放一起它的顺序很乱,只能一个一个往后面插入)
dataframe = pd.DataFrame({'seriesuid':seriesuid})
dataframe['coordX'] = coordX
dataframe['coordY'] = coordY
dataframe['coordZ'] = coordZ
dataframe['DistanceX_mm'] = DX
dataframe['DistanceY_mm'] = DY
dataframe['class'] = cl
#将DataFrame存储为csv,index表示是否显示行名,default=True
dataframe.to_csv(csv_name,index=False,sep=',')
return csv_name
# dcm_rename(PathDicom)
# csv_ch(PathDicom)
csv_path=os.path.join(rootpath,'candidates.csv')
list_classes = getSubPaths(rootpath)
for li in range(len(list_classes)):
lc=getSubPaths(list_classes[li])
PathDicom=lc[0]
#print(PathDicom)
dcm_rename(PathDicom)
csv_ch(PathDicom,rootpath)
3.将这些csv合并
import pandas as pd
import os
import glob
csv_files = glob.glob('E:/DcmData/xlc/Fracture_data/Me/*.csv')
df = df = pd.DataFrame(columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'DistanceX_mm','DistanceY_mm','class'])
for csv in csv_files:
df = pd.merge(df,pd.read_csv(csv),how='outer')
os.remove(csv)
df_to_save = pd.DataFrame(df,columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'DistanceX_mm','DistanceY_mm','class'])
df_to_save.to_csv('annotations.csv',index=False)