1 手写代码的单变量线性回归
用简单代码说清楚最小二乘法原理,代价函数,梯度下降等基本概念。
import numpy as np
import matplotlib.pyplot as plt
# Prepare train data
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10
w = 0
b = 0
for i in range(60000):
det = 2*(train_Y - train_X*w -b)
pasw = np.dot(train_X.T,det)
pasb = sum(det)
w = w + 0.001*pasw
b = b + 0.001*pasb
print(w,b)
plt.plot(train_X,train_Y,"+")
plt.plot(train_X,train_X.dot( w ) + b )
plt.show()
2 基于sklearn的单变量线性回归
from sklearn import linear_model
import matplotlib.pyplot as plt
import numpy as np
lr = linear_model.LinearRegression()
boston = datasets.load_boston()
y = boston.target
# cross_val_predict returns an array of the same size as `y` where each entry
# is a prediction obtained by cross validation:
predicted = cross_val_predict(lr, boston.data, y, cv=20)
W = np.column_stack( (y, predicted) )
#print( W )
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()
(略去运算结果)
3 基于tensorflow的单变量线性回归
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
# Prepare train data
train_X = np.linspace(-1, 1, 100)
train_Y = 2 * train_X + np.random.randn(*train_X.shape) * 0.33 + 10
# Define the model
X = tf.placeholder("float")
Y = tf.placeholder("float")
w = tf.Variable(0.0, name="weight")
b = tf.Variable(0.0, name="bias")
loss = tf.square( Y - X*w - b )
train_op = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
# Create session to run
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
epoch = 1
for i in range(100):
for (x, y) in zip(train_X, train_Y):
_, w_value, b_value = sess.run([train_op, w, b],feed_dict={X: x,Y: y})
print("Epoch: {}, w: {}, b: {}".format(epoch, w_value, b_value))
epoch += 1
#draw
plt.plot(train_X,train_Y,"+")
plt.plot(train_X,train_X.dot(w_value)+b_value)
plt.show()
运算结果:
基于pandas的代码
# 一、导入所需库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
# 二、构造矩阵
data = pd.read_csv("linear_regression.csv")
data.insert(0,'ones',1)
n = data.shape[1]
x = data.iloc[:,:n-1]
y = data.iloc[:,n-1:]
x = np.matrix(x)
y = np.matrix(y)
w = np.array([0,0]).reshape(1,2)
# 三、构造目标函数、损失函数、代价函数
#其实不需要写 在代价函数中有体现 我只是拿来复习一下理论知识
def obiect_function(x,w):
'''
:param x: 特征矩阵
:param w: 权重矩阵
:return: 返回目标函数的值
'''
return x*w.T
#其实不需要写 在代价函数中有体现 我只是拿来复习一下理论知识
def loss_function(x,y,w):
'''
:param x:特征矩阵一个样本值
:param w: 权重矩阵
:param y: 对应x的实际值
:return: 返回一个样本的损失函数值
'''
return x*w.T-y
#代价函数 也是梯度下降要针对的函数
def cost_function(x,y,w):
'''
:param x: 特征矩阵
:param w: 权重矩阵
:param y: 实际值矩阵
:return: 返回代价函数的值
'''
#一共有m个样本
m = x.shape[0]
return np.sum(np.power(x*w.T-y,2))/(2*m)
# 四、梯度下降
def gradient_descent(x,y,w,alpha,iters):
'''
:param x: 特征矩阵
:param y: 实际值
:param w: 权重矩阵
:param alpha: 步长
:param iters: 迭代次数
:return: 返回迭代之后的权重矩阵w和每次迭代之后的代价函数的值组成的数组cost
'''
temp = np.zeros(w.shape)
x_len = x.shape[0]
w_len = w.shape[1]
cost = np.zeros(iters)
for i in range(iters):
error = x*w.T-y
for j in range(w_len):
temp[0,j] =w[:,j] - sum(np.multiply(error,x[:,j]))*(alpha/x_len)
w = temp
cost[i] = cost_function(x,y,w)
return w,cost
alpha = 0.01
iters = 1000
w,cost = gradient_descent(x,y,w,alpha,iters)
# 五、绘制预测收益和实际收益图
# plt.figure(figsize=(12,8))
# plt.scatter(data["人口"],data['收益'],label = '实际值')
# plt.xlabel("人口")
# plt.ylabel("收益")
# plt.title("人口收益预测模型图")
#
# c = np.linspace(min(data["人口"]),max(data['人口']),100)
# f = [w[0,0]+w[0,1]* i for i in c]
#
# plt.plot(c,f,label = "预测模型",color = "r")
# plt.legend()
# plt.show()
# 六、绘制迭代次数和代价函数关系图
plt.plot(range(1,iters+1),cost,label = '迭代次数和代价函数关系')
plt.xlabel("迭代次数")
plt.ylabel('代价函数')
plt.title('迭代次数和代价函数关系图')
plt.legend()
plt.show()