基于tensorflow的对数几率回归

import tensorflow as tf
import os

# 初始化变量和模型参数，定义训练闭环中的运算
W = tf.Variable(tf.zeros([5, 1]), name="weights")
b = tf.Variable(0., name="bias")


def combine_inputs(X):  # 计算推断模型在数据X上的输出，并将结果保存
    return tf.matmul(X, W) + b


def inference(X):  # 计算推断模型在数据X上的输出，并将结果保存
    return tf.sigmoid(combine_inputs(X))


def loss(X, Y):  # 依据训练数据X和期望输出Y计算损失
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=combine_inputs(X), logits=Y))


def read_csv(batch_size, file_name, record_defaults):
    filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(), file_name)])

    reader = tf.TextLineReader(skip_header_lines=1)
    key, value = reader.read(filename_queue)

    # decode_csv will convert a Tensor from type string (the text line) in
    # a tuple of tensor columns with the specified defaults, which also
    # sets the data type for each column
    decoded = tf.decode_csv(value, record_defaults=record_defaults)

    # batch actually reads the file and loads "batch_size" rows in a single tensor
    return tf.train.shuffle_batch(decoded,
                                  batch_size=batch_size,
                                  capacity=batch_size * 50,
                                  min_after_dequeue=batch_size)


def inputs():  # 读取或生成训练数据X及其期望输出Y
    passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
        read_csv(100, "titanic/train.csv",
                 [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])

    # convert categorical data
    is_first_class = tf.to_float(tf.equal(pclass, [1]))
    is_second_class = tf.to_float(tf.equal(pclass, [2]))
    is_third_class = tf.to_float(tf.equal(pclass, [3]))

    gender = tf.to_float(tf.equal(sex, ["female"]))

    # Finally we pack all the features in a single matrix;
    # We then transpose to have a matrix with one example per row and one feature per column.
    features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age]))
    survived = tf.reshape(survived, [100, 1])
    return features, survived


def train(total_loss):  # 依据计算的总损失训练或调整模型参数
    learning_rate = 0.01
    return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)


def evaluate(sess, X, Y):  # 对训练得到的模型进行评估
    predicted = tf.cast(inference(X) > 0.5, tf.float32)
    print(sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32))))


# 在一个会话对象中启动数据流图，搭建流程
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    X, Y = inputs()

    total_loss = loss(X, Y)
    train_op = train(total_loss)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # 实际的训练迭代次数
    training_steps = 1000
    for step in range(training_steps):
        sess.run([train_op])
        # 处于调试和学习的目的，查看损失在训练过程中的递减情况
        if step % 10 == 0:
            print("loss:", sess.run([total_loss]))
    evaluate(sess, X, Y)

    import time

    time.sleep(5)

    coord.request_stop()
    coord.join(threads)
    sess.close()
数据来源： https://www.kaggle.com/c/titanic/data
基于tensorflow的对数几率回归

猜你喜欢