python机器学习::建立简单线性回归模型(1)

建立简单线性回归模型


先给出数据,将数据保存到txt文本,取名liner_regressor.txt

1.1 39343
1.3 46205
1.5 37731
2 43525
2.2 39891
2.9 56642
3 60150
3.2 54445
3.2 64445
3.7 57189
3.9 63218
4 55794
4 56957
4.1 57081
4.5 61111
4.9 67938
5.1 66029
5.3 83088
5.9 81363
6 93940
6.8 91738
7.1 98273
7.9 101302
8.2 113812
8.7 109431
9 105582
9.5 116969
9.6 112635
10.3 122391
10.5 121872

一、数据读取

import sys
#读取文件,前面是x,后面是y,现在用空格分开
filename="liner_regressor.txt"
X=[]
Y=[]
with open(filename,'r')as f:
    for line in f.readlines():
        #print line
        xt,yt=[float(i) for i in line.split()]  #若两个数据之间是空格,则括号不填东西,否则就填分割的符号
        X.append(xt)
        Y.append(yt)

二、将数据分成训练集、测试集

np.reshape用法参见:https://blog.csdn.net/DocStorm/article/details/58593682

import numpy as np

num_training=int(0.8*len(X))
num_test=len(X)-num_training

#Training data
#利用分片取出前num_traing并重组数组成列向量
#https://blog.csdn.net/DocStorm/article/details/58593682
X_train=np.array(X[:num_training]).reshape((num_training,1))
Y_train=np.array(Y[:num_training])

print X_train
#Test data
X_test=np.array(X[num_training:]).reshape((num_test,1))
Y_test=np.array(Y[num_training:])

则X_train为列向量:

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]]

Y_train为一维数组:

[  39343.   46205.   37731.   43525.   39891.   56642.   60150.   54445.
   64445.   57189.   63218.   55794.   56957.   57081.   61111.   67938.
   66029.   83088.   81363.   93940.   91738.   98273.  101302.  113812.]

三、创建回归器对象

from sklearn import linear_model

#Create linear regression object
linear_regressor=linear_model.LinearRegression()

#Train the model using the training sets
linear_regressor.fit(X_train,Y_train)  #利用训练集训练模型

四、plot可视化输出

  其中plot用到的函数可参见:

 https://blog.csdn.net/qiu931110/article/details/68130199

import matplotlib.pyplot as plt

#draw the line
#先计算对应X的拟合直线输出值
X_total=np.array(X).reshape((len(X),1))
y_train_pred=linear_regressor.predict(X_total)

plt.figure()
#绘制散点图
plt.scatter(X,Y,color="orange")
#绘制拟合直线
plt.plot(X_total,y_train_pred,color="blue",linewidth=4)
plt.title("train data")
plt.show()

 五、全部代码:

#-*-coding:UTF-8 -*-
import sys
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
#读取文件,前面是x,后面是y,现在用空格分开
filename="liner_regressor.txt"
X=[]
Y=[]
with open(filename,'r')as f:
    for line in f.readlines():
        #print line
        xt,yt=[float(i) for i in line.split()]
        X.append(xt)
        Y.append(yt)

num_training=int(0.8*len(X))
num_test=len(X)-num_training

#Training data
#利用分片取出前num_traing并重组数组成列向量
#https://blog.csdn.net/DocStorm/article/details/58593682
X_train=np.array(X[:num_training]).reshape((num_training,1))
Y_train=np.array(Y[:num_training])

print X_train
print Y_train
#Test data
X_test=np.array(X[num_training:]).reshape((num_test,1))
Y_test=np.array(Y[num_training:])

#Create linear regression object
linear_regressor=linear_model.LinearRegression()

#Train the model using the training sets
linear_regressor.fit(X_train,Y_train)

#draw the line
#先计算对应X的拟合直线输出值
X_total=np.array(X).reshape((len(X),1))
y_train_pred=linear_regressor.predict(X_total)

plt.figure()
#绘制散点图
plt.scatter(X,Y,color="orange")
#绘制拟合直线
plt.plot(X_total,y_train_pred,color="blue",linewidth=4)
plt.title("train data")
plt.show()

猜你喜欢

转载自blog.csdn.net/dieju8330/article/details/82817355