从原始文本到tensorflow1.4代码实现(一)线性回归(Linear Regression,LR)入门

从原始文本到tensorflow1.4代码实现(一)线性回归(Linear Regression,LR)

前言:

       线性回归算是比较容易理解和实现的机器学习算法了,初中x与y的函数关系: y=ax+b就是她的一个特例,只是在这里,a,b参数都是标量,x与y也是标量,现在,我们常用的就是使用矩阵W、Y或者张量W、Y来拓展它,从某种意义上来说,标量也是一种张量,只是特殊情况罢了。文绉绉的说,线性回归的目标,便是从一组包含各种特征和类别的数据集中,找到特征与结果之间的线性关系(假设数据集的特征与类别存在线性关系)。

参考:(1)把数字标签转化成onehot标签:https://blog.csdn.net/a_yangfh/article/details/77911126

           (2)使用numpy随机打散训练数据:https://blog.csdn.net/kaizhongwang/article/details/78864090

           (3)存储和读取数据(numpy、pickle):https://blog.csdn.net/cfyzcc/article/details/51804466

           (4)tensorflow实现线性回归:https://blog.csdn.net/monkey131499/article/details/52145254

代码实现:

              要跑的话直接到后面运行总代码就好

             tensorflow: 1.4

             python:  3.6

1.首先是数据处理:

        数据从这里下载:机器学习的UCI数据集:https://archive.ics.uci.edu/ml/machine-learning-databases/iris/

        Iris花的数据集,这里有75个数据,有3种花,每个花有4个特征。

        具体数据如下:

                             

代码(数据处理,将txt读取,转化为数字):

import tensorflow as tf
import numpy as np
import pickle

filename = 'E:/workspace/dataSet/Classic/sentence/iris.data.txt'
###将iris数据标签转化为number形式
def readChangeIris(filename):
    file = open(filename, 'r', encoding='gbk')
    iris_XY = []
    label_dict = {}
    while file.readline():
        text = file.readline()
        if '' is not text:
            texts = text.strip().split(',')
            mid_itis_trainX = []
            for i in range(len(texts)-1):
                wordNumber = float(texts[i]) #注意,是float而不是int
                mid_itis_trainX.append(wordNumber)

            irisY = texts[len(texts)-1].strip()
            if irisY not in label_dict : #构建字典, 存储标签对应的类别数值
                label_dict[irisY] = len(label_dict) + 1

            irisLabel = label_dict[irisY]
            mid_itis_trainX.append(irisLabel)
            iris_XY.append(mid_itis_trainX)

    file.close()
    return iris_XY, label_dict

2.数据存储(numpy,pickle):

###写成文件形式
def writePickle():
    iris_XY, label_dict = readChangeIris(filename)
    iris_XY_numpy = np.array(iris_XY)    #list转化为numpy,好切片
    print('readchange ok!')
    irisNumber = open('irisNumber.pkl', 'wb')
    pickle.dump(iris_XY_numpy, irisNumber)
    irisNumber.close()

def loadPickle():
    pkl_file = open('irisNumber.pkl', 'rb')
    irisXY = pickle.load(pkl_file)
    # 注意切片
    iris_X = irisXY[:,0:4]
    iris_yy = irisXY[:, 4]
    iris_Y = []
    for y in iris_yy:
        if y == 1:
            iris_Y.append([1, 0, 0])
        if y == 2:
            iris_Y.append([0, 1, 0])
        if y == 3:
            iris_Y.append([0, 0, 1])

    iris_Y_numpy = np.array(iris_Y)
    return iris_X, iris_Y_numpy

3.数据读取与打散:

writePickle()
iris_X, iris_Y = loadPickle()
# onehotLabels = digit2onehot(iris_yy, 3)
indices = np.random.permutation(iris_X.shape[0])
# indices = numpy.random.permutation(len(data_x))
rand_data_x = iris_X[indices]
rand_data_y = iris_Y[indices]
iris_train_X = rand_data_x[1:75, :]
iris_train_Y = rand_data_y[1:75, :]
iris_test_X = rand_data_x[55:75, :]
iris_test_Y = rand_data_y[55:75, :]

4.线性回归:

in_size = 4
out_size = 3
x = tf.placeholder(tf.float32, [None, in_size])
y = tf.placeholder(tf.float32, [None, out_size])
W=tf.Variable(tf.random_normal([in_size, out_size]), name='W')
b=tf.Variable(tf.zeros([1,out_size]), name='b') + 0.2  #不设为0
WXB = tf.matmul(x, W) + b
pred = tf.nn.softmax(WXB)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices=[1])) #loss

train_step=tf.train.GradientDescentOptimizer(0.008).minimize(cross_entropy)
init=tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
    for i in range(1600):
        sess.run([train_step],feed_dict={x:iris_train_X,y:iris_train_Y})
        if i % 100 == 0:
            print("ready go!")
            y_pre = sess.run(pred, feed_dict={x:iris_test_X})#需要run才嫩
            correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(iris_test_Y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            result = sess.run(accuracy, feed_dict={x: iris_test_X, y: iris_test_Y})
            cross_entropy_loss = sess.run(cross_entropy, feed_dict={x: iris_train_X, y: iris_train_Y})
            print('预测结果:',[y_pre,'\n',iris_test_Y],'损失函数:',cross_entropy_loss,'\t\t','准确率:  ',result)
ccd = 0

5.总代码:

import tensorflow as tf
import numpy as np
import pickle


filename = 'E:/workspace/dataSet/Classic/sentence/iris.data.txt'
###将iris数据标签转化为number形式
def readChangeIris(filename):
    file = open(filename, 'r', encoding='gbk')
    iris_XY = []
    label_dict = {}
    while file.readline():
        text = file.readline()
        if '' is not text:
            texts = text.strip().split(',')
            mid_itis_trainX = []
            for i in range(len(texts)-1):
                wordNumber = float(texts[i]) #注意,是float而不是int
                mid_itis_trainX.append(wordNumber)

            irisY = texts[len(texts)-1].strip()
            if irisY not in label_dict : #构建字典, 存储标签对应的类别数值
                label_dict[irisY] = len(label_dict) + 1

            irisLabel = label_dict[irisY]
            mid_itis_trainX.append(irisLabel)
            iris_XY.append(mid_itis_trainX)

    file.close()
    return iris_XY, label_dict
#测试
iris_XY, label_dict = readChangeIris(filename)

###写成文件形式
def writePickle():
    iris_XY, label_dict = readChangeIris(filename)
    iris_XY_numpy = np.array(iris_XY)    #list转化为numpy,好切片
    print('readchange ok!')
    irisNumber = open('irisNumber.pkl', 'wb')
    pickle.dump(iris_XY_numpy, irisNumber)
    irisNumber.close()

def loadPickle():
    pkl_file = open('irisNumber.pkl', 'rb')
    irisXY = pickle.load(pkl_file)
    # 注意切片
    iris_X = irisXY[:,0:4]
    iris_yy = irisXY[:, 4]
    iris_Y = []
    for y in iris_yy:
        if y == 1:
            iris_Y.append([1, 0, 0])
        if y == 2:
            iris_Y.append([0, 1, 0])
        if y == 3:
            iris_Y.append([0, 0, 1])

    iris_Y_numpy = np.array(iris_Y)
    return iris_X, iris_Y_numpy

#读写数据
writePickle()
iris_X, iris_Y = loadPickle()
# onehotLabels = digit2onehot(iris_yy, 3)
indices = np.random.permutation(iris_X.shape[0])
# indices = numpy.random.permutation(len(data_x))
rand_data_x = iris_X[indices]
rand_data_y = iris_Y[indices]
iris_train_X = rand_data_x[1:75, :]
iris_train_Y = rand_data_y[1:75, :]
iris_test_X = rand_data_x[55:75, :]
iris_test_Y = rand_data_y[55:75, :]

in_size = 4
out_size = 3
x = tf.placeholder(tf.float32, [None, in_size])
y = tf.placeholder(tf.float32, [None, out_size])
W=tf.Variable(tf.random_normal([in_size, out_size]), name='W')
b=tf.Variable(tf.zeros([1,out_size]), name='b') + 0.2 #不设置为0
WXB = tf.matmul(x, W) + b
pred = tf.nn.softmax(WXB)
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred),reduction_indices=[1])) #loss

train_step=tf.train.GradientDescentOptimizer(0.008).minimize(cross_entropy)
init=tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init)
    for i in range(1600):
        sess.run([train_step],feed_dict={x:iris_train_X,y:iris_train_Y})
        if i % 100 == 0:
            print("ready go!")
            y_pre = sess.run(pred, feed_dict={x:iris_test_X})#需要run才嫩
            correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.argmax(iris_test_Y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            result = sess.run(accuracy, feed_dict={x: iris_test_X, y: iris_test_Y})
            cross_entropy_loss = sess.run(cross_entropy, feed_dict={x: iris_train_X, y: iris_train_Y})
            print('预测结果:',[y_pre,'\n',iris_test_Y],'损失函数:',cross_entropy_loss,'\t\t','准确率:  ',result)

6.不动手不知道,好多坑,尤其是数据处理,各种格式,各种数据类型

希望对你有帮助!

猜你喜欢

转载自blog.csdn.net/rensihui/article/details/80458628