用ＣＮＮ与RNN(LSTM)提取DNA序列中的特征的定义函数

# -*- coding: utf-8 -*-
# read the data of npy,the data is input(label)
import os
import numpy as np
import tensorflow as tf
import time
import math

batch_size = 128
path =os.getcwd()
keep_prob = tf.placeholder(tf.float32)

# In order to get the suitable type of data and label 
def initial_data(all_data,all_label): #data and label are all the name of .npy
    Data_Examples = np.load(path+'/'+ all_data)   # all_data = .npy
    Data_Labels = np.load(path+'/'+ all_label)
    Data_Labels = tf.one_hot(Data_Labels,2,1,0)
    Data_Labels = tf.to_float(Data_Labels,name = 'Data_Labels')
    return Data_Examples,Data_Labels

    
def get_batch_data(t_enh,t_enh_label,t_pro,t_pro_label):
    t_enh = tf.cast(t_enh, tf.float32)
    t_enh_label = tf.cast(t_enh_label, tf.float32)
    t_pro = tf.cast(t_pro, tf.float32)
    t_pro_label = tf.cast(t_pro_label, tf.float32)
    
    input_queue = tf.train.slice_input_producer([t_enh,t_enh_label,t_pro,t_pro_label], shuffle=False)
    t_enh_b,t_enh_label_b,t_pro_b,t_pro_label_b = tf.train.batch(input_queue, batch_size=batch_size, num_threads=2,                                                                                       capacity=20000)
    return t_enh_b,t_enh_label_b,t_pro_b,t_pro_label_b

# train_batch_holder#
train_enh_batch_holder = tf.placeholder(tf.float32,shape=(None,10001,4,1),name = 'x-input')
train_pro_batch_holder = tf.placeholder(tf.float32,shape=(None,10001,4,1),name = 'x2-input')
train_enh_label_holder = tf.placeholder(tf.float32,shape=(None,2),name = 'y-input')

def cnn_1(train_batch_holder,filter_size_1,filter_number_1,pool_1_size):
    filters_1 = tf.Variable(tf.random_normal([filter_size_1,4,1,filter_number_1],dtype=tf.float32,seed=1))
    biases_1 = tf.Variable(tf.random_normal([filter_number_1],dtype=tf.float32,seed=1))
    conv_1 =tf.nn.conv2d(train_batch_holder,filters_1,strides=[1,1,1,1],padding='VALID')    #####train_batch_holder#####
    bias_1 = tf.nn.bias_add(conv_1,biases_1)
    actived_conv_1 = tf.nn.relu(bias_1)
    pool_1 = tf.nn.max_pool(actived_conv_1,ksize=[1,pool_1_size,1,1],strides=[1,pool_1_size,1,1],padding="VALID")
    return pool_1
    
def cnn_2(pool_1,filter_size_2,filter_number_1,filter_number_2,pool_2_size):
    filters_2 = tf.Variable(tf.random_normal([filter_size_2,1,filter_number_1,filter_number_2],seed=1))
    biases_2 = tf.Variable(tf.random_normal([filter_number_2],dtype=tf.float32))
    conv_2 = tf.nn.conv2d(pool_1,filters_2,strides=[1,1,1,1],padding='VALID')           # input pool_1
    bias_2 = tf.nn.bias_add(conv_2,biases_2)
    actived_conv_2 = tf.nn.relu(bias_2)                
    pool_2 = tf.nn.max_pool(actived_conv_2,ksize=[1,pool_2_size,1,1],strides=[1,pool_2_size,1,1],padding="VALID")
    return pool_2
    
def cat_cnn(pool_2_enh,pool_2_pro):    # cat the enh_pro for next rnn
    pool_cat = tf.concat([pool_2_enh,pool_2_pro],1)
    pool_cat_1_ndim = pool_cat.get_shape()[0].value
    pool_cat_2_ndim = pool_cat.get_shape()[1].value
    pool_cat_4_ndim = pool_cat.get_shape()[3].value
    pool_cat_3ndim = tf.reshape(pool_cat,[-1,pool_cat_2_ndim,pool_cat_4_ndim])
    x = pool_cat_3ndim
    return x,pool_cat_2_ndim,pool_cat_4_ndim

def rnn_1(x, pool_cat_2_ndim,pool_cat_4_ndim,filter_size_1,filter_size_2):    
    n_input = pool_cat_2_ndim  # w
    n_step = pool_cat_4_ndim # h
    n_hidden = 128
    n_classes = 2
    weights = {'in': tf.Variable(tf.random_normal([n_input, n_hidden])),
                'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))}
    biases = {'in': tf.Variable(tf.constant(0.1, shape=[n_hidden])),
                'out': tf.Variable(tf.constant(0.1, shape=[n_classes]))}
    X = tf.reshape(x, [-1, n_input])
    X_in = tf.matmul(X, weights['in'])+biases['in']
    X_in = tf.reshape(X_in, [-1, n_step, n_hidden]) 
    #define LSTMcell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)
    with tf.variable_scope("lstm_cell"+str(filter_size_1)+'_'+str(filter_size_2), reuse=None):  
        init_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)
        outputs, final_state = tf.nn.dynamic_rnn(lstm_cell, X_in, initial_state=init_state, time_major=False)
    results = tf.matmul(final_state[1], weights['out']) + biases['out']    # 2 ndim
    dim = results.get_shape()[1].value
    return results,dim
#以上是对.npy数据的读入操作，以及接入CNN，RNN等操作，后续部分接入全连接层与计算 ACC等是常规流程，在此不做赘述。
用ＣＮＮ与RNN(LSTM)提取DNA序列中的特征的定义函数

猜你喜欢