机器学习1:单变量线性回归

1 手写代码的单变量线性回归

用简单代码说清楚最小二乘法原理,代价函数,梯度下降等基本概念。


import numpy as np
import matplotlib.pyplot as plt

# Prepare train data
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10
w = 0
b = 0
for i in range(60000):
    det = 2*(train_Y - train_X*w -b)
    pasw = np.dot(train_X.T,det)
    pasb = sum(det)
    w = w + 0.001*pasw
    b = b + 0.001*pasb
print(w,b)
plt.plot(train_X,train_Y,"+")
plt.plot(train_X,train_X.dot( w ) + b )
plt.show()

  

2 基于sklearn的单变量线性回归

from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
lr = linear_model.LinearRegression()
boston = datasets.load_boston()
y = boston.target

# cross_val_predict returns an array of the same size as `y` where each entry
# is a prediction obtained by cross validation:
predicted = cross_val_predict(lr, boston.data, y, cv=20)
W = np.column_stack( (y, predicted) )
#print( W )

fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

(略去运算结果) 

3 基于tensorflow的单变量线性回归

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Prepare train data
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10

# Define the model
X = tf.placeholder("float")
Y = tf.placeholder("float")
w = tf.Variable(0.0, name="weight")
b = tf.Variable(0.0, name="bias")
loss = tf.square( Y - X*w - b )
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

# Create session to run
with tf.Session() as sess:
    sess.run(tf.initialize_all_variables())
    epoch = 1
    for i in range(100):
        for (x, y) in zip(train_X, train_Y):
            _, w_value, b_value = sess.run([train_op, w, b],feed_dict={X: x,Y: y})
        print("Epoch: {}, w: {}, b: {}".format(epoch, w_value, b_value))
        epoch += 1


#draw
plt.plot(train_X,train_Y,"+")
plt.plot(train_X,train_X.dot(w_value)+b_value)
plt.show()

运算结果:

基于pandas的代码 

# 一、导入所需库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
 
# 二、构造矩阵
 
data = pd.read_csv("linear_regression.csv")
data.insert(0,'ones',1)
n = data.shape[1]
x = data.iloc[:,:n-1]
y = data.iloc[:,n-1:]
 
x = np.matrix(x)
y = np.matrix(y)
w = np.array([0,0]).reshape(1,2)
 
# 三、构造目标函数、损失函数、代价函数
 
#其实不需要写 在代价函数中有体现 我只是拿来复习一下理论知识
def obiect_function(x,w):
    '''
    :param x: 特征矩阵
    :param w: 权重矩阵
    :return: 返回目标函数的值
    '''
    return x*w.T
 
#其实不需要写 在代价函数中有体现 我只是拿来复习一下理论知识
def loss_function(x,y,w):
    '''
    :param x:特征矩阵一个样本值
    :param w: 权重矩阵
    :param y: 对应x的实际值
    :return: 返回一个样本的损失函数值
    '''
    return x*w.T-y
 
#代价函数 也是梯度下降要针对的函数
def cost_function(x,y,w):
    '''
    :param x: 特征矩阵
    :param w: 权重矩阵
    :param y: 实际值矩阵
    :return: 返回代价函数的值
    '''
    #一共有m个样本
    m = x.shape[0]
    return np.sum(np.power(x*w.T-y,2))/(2*m)
# 四、梯度下降
def gradient_descent(x,y,w,alpha,iters):
    '''
    :param x: 特征矩阵
    :param y: 实际值
    :param w: 权重矩阵
    :param alpha: 步长
    :param iters: 迭代次数
    :return: 返回迭代之后的权重矩阵w和每次迭代之后的代价函数的值组成的数组cost
    '''
    temp = np.zeros(w.shape)
    x_len = x.shape[0]
    w_len = w.shape[1]
    cost = np.zeros(iters)
    for i in range(iters):
        error = x*w.T-y
        for j in range(w_len):
            temp[0,j] =w[:,j] - sum(np.multiply(error,x[:,j]))*(alpha/x_len)
        w = temp
        cost[i] = cost_function(x,y,w)
    return w,cost
 
alpha = 0.01
iters = 1000
w,cost = gradient_descent(x,y,w,alpha,iters)
 
 
# 五、绘制预测收益和实际收益图
# plt.figure(figsize=(12,8))
# plt.scatter(data["人口"],data['收益'],label = '实际值')
# plt.xlabel("人口")
# plt.ylabel("收益")
# plt.title("人口收益预测模型图")
#
# c = np.linspace(min(data["人口"]),max(data['人口']),100)
# f = [w[0,0]+w[0,1]* i for i in c]
#
# plt.plot(c,f,label = "预测模型",color = "r")
# plt.legend()
# plt.show()
 
 
# 六、绘制迭代次数和代价函数关系图
plt.plot(range(1,iters+1),cost,label = '迭代次数和代价函数关系')
plt.xlabel("迭代次数")
plt.ylabel('代价函数')
plt.title('迭代次数和代价函数关系图')
plt.legend()
plt.show()
 
 

猜你喜欢

转载自blog.csdn.net/gongdiwudu/article/details/119204681