方法:使用训练好的识别ImageNet的InceptionV3网络,后连接两个全连接层,一个进行定位学习,一个进行分类学习
数据集:实验室自有数据集,不能给,进行描述如下:图片为JPEG的CT图,有一个txt文档,内有对应图片名的肿瘤标记坐标,坐标值4个,对应肿瘤的左下角,右上角,形成方框。
代码很详细,可以作为学习参考,如果有自己的项目,下载一下InceptionV3网络,更改一下路径,根据自己的数据集格式设置数据的输入就好:
import glob import os.path import random import numpy as np import tensorflow as tf from tensorflow.python.platform import gfile #Inception-V3模型瓶颈层的节点个数 BOTTLENECK_TENSOR_SIZE=2048 BOTTLENECK_TENSOR_NAME ='pool_3/_reshape:0' #图像输入张量所对应的名称 JPEG_DATA_TENSOR_NAME='DecodeJpeg/contents:0' #下载好的Inception-v3模型目录 MODEL_DIR='F:/dataSpace/InceptionV3/' MODLE_FILE='tensorflow_inception_graph.pb'#模型文件名 CACHE_DIR='F:/pythonTest/Medical_Image/tmp/bottleneck' # 图片数据文件夹。 INPUT_DATA = 'F:/pythonTest/Medical_Image/JPEGImages' LABEL_FILE='output.txt'#这个文件中保存了图像名,图像所对应的肿瘤类型,肿瘤位置坐标信息 #验证的数据百分比 VALIDATION_PERCENTAFE=10 #测试的数据百分比 TEST_PERCENTAGE=10 #定义神经网络的设置 LEARNING_RATE_Label=0.8 LEARNING_RATE_Coordinate=0.99 STEPS=100000 BATCH=50 n_classes=2 def calvar(var): with tf.name_scope('summaries'): mean=tf.reduce_mean(var) tf.summary.scalar('mean',mean) with tf.name_scope('stddev'): stddev=tf.sqrt(tf.reduce_mean(tf.square(var-mean))) tf.summary.scalar('stddev',stddev) tf.summary.scalar('max',tf.reduce_max(var)) tf.summary.scalar('min',tf.reduce_min(var)) tf.summary.histogram('histogram',var) #读取文件夹中所有图片列表,并按训练、验证、测试数据分开 def create_image_list(testing_percentage,validation_percentage): #得到的所有图片存在result字典里,字典的key为类别的名称,value也是一个字典,字典里存储了所有的图片名称. result={} #result结构是类似这样的{'Benign': {'situation': 'Benign', 'training': [['B_000001_2.jpg', [319.0, 455.0, 418.0, 546.0]]} #获取当前目录下的子目录 label_name=['Benign','Malignancy'] for situation in label_name: #分别对两种类型肿瘤图片进行处理 file_list=[] training_images=[] validation_images=[] testing_images=[] with open(LABEL_FILE) as f: for line in f.readlines(): curline=line.strip().split(' ') base_name=curline[0]#图片名称 cur_situation=curline[1]#如果是当前肿瘤类型,加入当前字典 ordinate=[float(x) for x in curline[2:]]#坐标信息 image_with_cordinates=[] image_with_cordinates.append(base_name) image_with_cordinates.append(ordinate) if cur_situation==situation: chance=np.random.randint(100) if chance<validation_percentage: validation_images.append(image_with_cordinates) if chance<(validation_percentage+testing_percentage): testing_images.append(image_with_cordinates) else: training_images.append(image_with_cordinates) result[situation]={'situation':situation,'training':training_images,'testing':testing_images,'validation':validation_images} return result # 这个函数通过类别名称、所属数据集和图片编号获取一张图片的地址。 # image_lists参数给出了所有图片信息,也就是上面的result # image_dir参数给出了根目录。存放图片数据的根目录和存放图片特征向量的根目录地址不同。 # label_name参数给定了类别的名称。 # index参数给定了需要获取的图片的编号。 # category参数指定了需要获取的图片是在训练数据集、测试数据集还是验证数据集 def get_image_path(image_lists,image_dir,label_name,index,category): label_lists=image_lists[label_name]#要得到的是哪种肿瘤的路径? category_list=label_lists[category]#返回此种肿瘤的字典,内有训练样本图片名和坐标信息 mod_index=index%len(category_list)#害怕index溢出 base_name=category_list[mod_index][0]#第mod_index张图片名 full_path=os.path.join(image_dir,base_name) return full_path,mod_index # 这个函数获取某图片的Bottleneck张量的路径,此路径为CACHE_DIR+base_name+.txt def get_bottleneck_path(image_lists,label_name,index,category): full_path,mod_index=get_image_path(image_lists,CACHE_DIR,label_name,index,category) return (full_path+'.txt',mod_index) # 这个函数使用加载的训练好的Inception-v3模型处理一张图片,得到这个图片的特征向量。 def run_bottleneck_on_image(sess,image_data,image_data_tensor,bottleneck_tensor): # 这个过程实际上就是将当前图片作为输入计算瓶颈张量的值。这个瓶颈张量的值就是这张图片的新的特征向量。 bottleneck_values=sess.run(bottleneck_tensor,{image_data_tensor:image_data}) bottleneck_values=np.squeeze(bottleneck_values) return bottleneck_values # 这个函数获取一张图片经过Inception-v3模型处理之后的特征向量。 # 这个函数会先试图寻找已经计算且保存下来的特征向量,如果找不到则先计算这个特征向量,然后保存到文件。 def get_or_create_bottleneck(sess,image_lists,label_name,index,category,jpeg_data_tensor,bottleneck_tensor): label_lists=image_lists[label_name] sub_dir=label_lists['situation'] sub_dir_path=os.path.join(CACHE_DIR,sub_dir) if not os.path.exists(sub_dir_path):os.makedirs(sub_dir_path) # 如果这个特征向量文件不存在,则通过Inception-v3模型来计算特征向量,并将计算的结果存入文件。 bottleneck_path,mod_index=get_bottleneck_path(image_lists,label_name,index,category) if not os.path.exists(bottleneck_path): image_path,mod_index=get_image_path(image_lists,INPUT_DATA,label_name,index,category) image_data=gfile.FastGFile(image_path,'rb').read() bottleneck_values=run_bottleneck_on_image(sess,image_data,jpeg_data_tensor,bottleneck_tensor) bottleneck_string=','.join(str(x) for x in bottleneck_values) with open(bottleneck_path,'w') as bottleneck_file: bottleneck_file.write(bottleneck_string) else: with open(bottleneck_path,'r') as bottleneck_file: bottleneck_string=bottleneck_file.read() bottleneck_values=[float(x) for x in bottleneck_string.split(',')] return bottleneck_values,mod_index # 这个函数随机获取一个batch的图片作为训练数据 def get_random_chached_bottlenecks(sess,n_classes,image_lists,how_many,category,jpeg_data_tensor,bottleneck_tensor): bottlenecks=[] ground_truths=[] coordinate_truths=[] for _ in range(how_many): #随机一个类别和图片的编号加入当前的训练数据 label_index=random.randrange(n_classes) label_name=list(image_lists.keys())[label_index]#随机类别名称 image_index=random.randrange(65536) bottleneck,mod_index=get_or_create_bottleneck(sess,image_lists,label_name,image_index,category,jpeg_data_tensor,bottleneck_tensor) ground_truth=np.zeros(n_classes,dtype=np.float32) coordinate_truth=np.zeros([n_classes,4]) coordinate_truth[label_index]=image_lists[label_name][category][mod_index][1]#坐标值 coordinate_truth=coordinate_truth.reshape([8]) coordinate_truth = np.squeeze(coordinate_truth) ground_truth[label_index]=1.0 bottlenecks.append(bottleneck) ground_truths.append(ground_truth) coordinate_truths.append(coordinate_truth) coordinate_truths=np.array(coordinate_truths) return bottlenecks,ground_truths,coordinate_truths #获取全部的测试数据,并计算标记正确率 def get_test_bottlnecks(sess,image_lists,n_classes,jpeg_data_tensor,bottleneck_tensor): bottlenecks=[] ground_truths=[] coordinate_truths=[] label_name_list=list(image_lists.keys())#获取所有的类别键 #枚举所有类别和每个类别中的测试图片 for label_index,label_name in enumerate(label_name_list): category='testing' f=open('test_image.txt', 'w') f.truncate() for index,unused_base_name in enumerate(image_lists[label_name][category]): bottleneck,mod_index=get_or_create_bottleneck(sess,image_lists,label_name,index,category,jpeg_data_tensor,bottleneck_tensor) ground_truth = np.zeros(n_classes, dtype=np.float32) ground_truth[label_index] = 1.0 coordinate_truth = np.zeros([n_classes, 4]) coordinate_truth[label_index, :] = image_lists[label_name][category][mod_index][1] # 坐标值 coordinate_truth = coordinate_truth.reshape([8]) coordinate_truth = np.squeeze(coordinate_truth) coordinate_truths.append(coordinate_truth) bottlenecks.append(bottleneck) ground_truths.append(ground_truth) f.write(image_lists[label_name][category][mod_index][0]+'\n') f.close() coordinate_truths = np.array(coordinate_truths) return bottlenecks, ground_truths,coordinate_truths def main(argv=None): print('开始计算....') logdir='Inception_log' if not os.path.exists(logdir): os.makedirs(logdir) #读取所有图片 image_lists=create_image_list(TEST_PERCENTAGE,VALIDATION_PERCENTAFE) with gfile.FastGFile(os.path.join(MODEL_DIR,MODLE_FILE),'rb') as f: # 新建GraphDef文件,用于临时载入模型中的图 graph_def=tf.GraphDef() graph_def.ParseFromString(f.read()) tf.import_graph_def(graph_def,name='') print('graph loaded') bottleneck_tensor,jpeg_data_tensor=tf.import_graph_def(graph_def,return_elements=[BOTTLENECK_TENSOR_NAME,JPEG_DATA_TENSOR_NAME]) #定义新的神经网络输入,这个输入就是新的图片经过模型前向传播,到达平层层的节点取值 bottleneck_input=tf.placeholder(tf.float32,[None,BOTTLENECK_TENSOR_SIZE],name='BottleneckInputPlaceholder') label_ground_truth_input=tf.placeholder(tf.float32,[None,n_classes],name='LabelGroundTruthInput') coordinates_ground_truth_input=tf.placeholder(tf.float32,[None,n_classes*4],name='CoordinateGroundTruthInput') #定义新的全连接层来解决新的图片分类问题 with tf.name_scope('classify_training_ops'): print('分类层正在计算') with tf.name_scope('class_layer1'): weights_cl1 = tf.Variable(tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, 1000], stddev=0.001), name='wights') biases_cl1 = tf.Variable(tf.zeros([1000]), name='biases') # calvar(weights) # calvar(biases) logits1 = tf.matmul(bottleneck_input, weights_cl1) + biases_cl1 with tf.name_scope('class_layer2'): weights_cl2 = tf.Variable(tf.truncated_normal([1000, n_classes], stddev=0.001), name='wights') biases_cl2 = tf.Variable(tf.zeros([n_classes]), name='biases') logits=tf.matmul(logits1, weights_cl2) + biases_cl2 final_tensor=tf.nn.softmax(logits,name='class_label') #定义交叉熵损失函数 cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=label_ground_truth_input) cross_entropy_mean=tf.reduce_mean(cross_entropy) tf.summary.scalar('cross_entropy_mean_loss',cross_entropy_mean) global_step = tf.Variable(0, trainable=False) learning_rate1 = tf.train.exponential_decay(LEARNING_RATE_Label,global_step=global_step,decay_steps=100, decay_rate=0.95,staircase=True) train_step=tf.train.AdamOptimizer(learning_rate=learning_rate1).minimize(cross_entropy_mean,global_step=global_step) #计算正确率 with tf.name_scope('evaluation'): correct_prediction=tf.equal(tf.argmax(final_tensor,1),tf.argmax(label_ground_truth_input,1)) evaluation_step=tf.reduce_mean(tf.cast(correct_prediction,tf.float32)) #tf.summary.scalar('accuracy',evaluation_step) with tf.name_scope('Positioning_Fc_op'): print('定位层正在计算...') with tf.name_scope('layer1'): weights1 = tf.Variable(tf.truncated_normal([BOTTLENECK_TENSOR_SIZE,1000], stddev=0.001), name='weights')#clasess*4的坐标 biases1 = tf.Variable(tf.zeros([1000]), name='biases') score = tf.matmul(bottleneck_input, weights1) + biases1 score1 = tf.nn.relu(score,name='final_layer1') with tf.name_scope('layer2'): weights2 = tf.Variable(tf.truncated_normal([1000, 8], stddev=0.001),name='weights') # clasess*4的坐标 biases2 = tf.Variable(tf.zeros([8]), name='biases') score2 = tf.matmul(score1, weights2) + biases2 final_coordinates = tf.nn.relu(score2, name='coordinates') loss_mat=[] loss=0.0 for i in range(BATCH): if tf.argmax(label_ground_truth_input[i]) == 0: loss_mat.append(tf.square(coordinates_ground_truth_input[i, 0:4] - final_coordinates[i, 0:4])) else: loss_mat.append(tf.square(coordinates_ground_truth_input[i, 4:] - final_coordinates[i, 4:])) loss=tf.reduce_mean(loss_mat) tf.summary.scalar('coordinate_loss',loss) learning_rate2 = tf.train.exponential_decay(LEARNING_RATE_Coordinate, global_step=global_step, decay_steps=200, decay_rate=0.99, staircase=True) train_step_coordinate=tf.train.AdadeltaOptimizer(learning_rate=learning_rate2).minimize(loss=loss) merged = tf.summary.merge_all() saver=tf.train.Saver() with tf.Session() as sess: ckptdir='ckpt_dir' if not os.path.exists(ckptdir): os.makedirs(ckptdir) init=tf.initialize_all_variables() sess.run(init) writer = tf.summary.FileWriter(logdir, sess.graph) for i in range(STEPS): train_bottlenecks,train_groud_truth,train_coordinates_ground_truth_input=get_random_chached_bottlenecks(sess,n_classes,image_lists,BATCH,'training',jpeg_data_tensor,bottleneck_tensor) summary,_,_=sess.run([merged,train_step,train_step_coordinate],feed_dict={bottleneck_input:train_bottlenecks,label_ground_truth_input:train_groud_truth,coordinates_ground_truth_input:train_coordinates_ground_truth_input}) if i%10==0 or i+1==STEPS: validation_bottlenecks,validation_groud_truth,validation_coordinate_truth=get_random_chached_bottlenecks(sess,n_classes,image_lists,BATCH,'validation',jpeg_data_tensor,bottleneck_tensor) validation_accuracy=sess.run(evaluation_step,feed_dict={bottleneck_input:validation_bottlenecks,label_ground_truth_input:validation_groud_truth,coordinates_ground_truth_input:validation_coordinate_truth}) print('Step%d:验证集正确率为%.3f' % (i, validation_accuracy * 100)) print('Step%d:learning_rate1为%.3f' % ( sess.run(global_step),sess.run(learning_rate1))) print('Step%d:learning_rate2为%.3f' % (sess.run(global_step), sess.run(learning_rate2))) #print(print('Step%d:坐标损失为%.3f' % (i, sess.run(loss)))) if i%1000==0: print('训练了%d步,保存了模型,'%i) saver.save(sess,ckptdir+'/model.ckpt',global_step=global_step) writer.add_summary(summary,i ) if(sess.run(learning_rate2)<1e-3):break test_bottlenecks, test_groud_truth,test_gound_coordinate = get_test_bottlnecks(sess,image_lists,n_classes,jpeg_data_tensor,bottleneck_tensor) test_accuracy = sess.run(evaluation_step, feed_dict={bottleneck_input: test_bottlenecks, label_ground_truth_input: test_groud_truth,coordinates_ground_truth_input:test_gound_coordinate}) print('最终测试集正确率为%.3f' % ( validation_accuracy * 100)) if __name__=='__main__': tf.app.run()
网络结构如下:
使用GPU为GTX1050,跑了10万次,损失值曲线如下: