Python实现简单的线性回归

最小二乘法实现

完整代码

from matplotlib import pyplot as plt
from pandas import Series, DataFrame

# 创建数据集
examDict = {'x': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            'y': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)
x=examDf.x
y=examDf.y
# 绘制散点图
plt.scatter(x, y, color='b', label="Exam Data")

# 添加图的标签(x轴,y轴)
plt.xlabel("Hours")
plt.ylabel("Score")
# 显示图像
plt.show()

# 损失函数
def cost(w,b,x,y):
    total_cost=0
    M=len(x)
    for i in range(M):
        total_cost+=(y[i]-w*x[i]-b)**2

    return total_cost/M

# 求平均数
def average(data):
    sum=0
    num=len(data)
    for i in range(num):
        sum+=data[i]
    return sum/num

# 求取w和b 
# 直线: y=w*x + b
def fit(x,y):
    M=len(x)
    x_bar= average(x)
    sum_yx=0
    sum_x2=0
    sum_delta=0
    for i in range(M):
        sum_yx+=y[i]*(x[i]-x_bar)
        sum_x2+=x[i]**2

    w=sum_yx/(sum_x2-M*(x_bar**2))

    for i in  range(M):
        sum_delta+=(y[i]-w*x[i])

    b=sum_delta/M

    return w,b
    

w,b=fit(x,y)
print("w is:",w)
print("b is:",b)
cost = cost(w,b,x,y)
print("cost is:",cost)

plt.scatter(x, y)
# 预测的y
pred_y=w*x +b
plt.plot(x,pred_y, color='r')
plt.show()

初始散点图
在这里插入图片描述
拟合效果图
在这里插入图片描述

梯度下降法实现

from matplotlib import pyplot as plt
from pandas import Series, DataFrame

# 创建数据集
examDict = {'x': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            'y': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)
x=examDf.x
y=examDf.y
# 绘制散点图
plt.scatter(x, y, color='b', label="Exam Data")

# 添加图的标签(x轴,y轴)
plt.xlabel("Hours")
plt.ylabel("Score")
# 显示图像
plt.show()

# 损失函数
def cost(w,b,x,y):
    total_cost=0
    M=len(x)
    for i in range(M):
        total_cost+=(y[i]-w*x[i]-b)**2

    return total_cost/M

alpha=0.001
initial_w=0
initail_b=0
num_iter=1000

# 算法
def grad_desc(x,y,initial_w,initail_b,alpha,num_iter):
    w=initial_w
    b=initail_b
    cost_list=[]
    for i in range(num_iter):
        cost_list.append(cost(w,b,x,y))
        w,b=step_grad_desc(w,b,alpha,x,y)
    return [w,b,cost_list]

def step_grad_desc(current_w,current_b,alpha,x,y):
    sum_grad_w=0
    sum_grad_b=0
    M=len(x)
    for i in range(M):
        sum_grad_w+=(current_w*x[i]+current_b-y[i])*x[i]
        sum_grad_b+=current_w*x[i]+current_b-y[i]

    gran_w=2/M*sum_grad_w
    grad_b=2/M*sum_grad_b

    update_w=current_w-alpha*gran_w
    update_b=current_b-alpha*grad_b

    return update_w,update_b

w,b,cost_list=grad_desc(x,y,initial_w,initail_b,alpha,num_iter)
print("w is:", w)
print("b is:", b)
print("cost is:",cost(w,b,x,y))
plt.plot(cost_list)
plt.show()

损失函数
在这里插入图片描述
拟合直线
在这里插入图片描述

调用sklearn实现

from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt
from pandas import Series, DataFrame

# 创建数据集
examDict = {'x': [0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75,
                     2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50],
            'y': [10, 22, 13, 43, 20, 22, 33, 50, 62,
                   48, 55, 75, 62, 73, 81, 76, 64, 82, 90, 93]}

# 转换为DataFrame的数据格式
examDf = DataFrame(examDict)
x=examDf.x
y=examDf.y
# 绘制散点图
plt.scatter(x, y, color='b', label="Exam Data")

# 添加图的标签(x轴,y轴)
plt.xlabel("Hours")
plt.ylabel("Score")
# 显示散点图像
plt.show()

# 损失函数
def cost(w,b,x,y):
    total_cost=0
    M=len(x)
    for i in range(M):
        total_cost+=(y[i]-w*x[i]-b)**2

    return total_cost/M

# 调库
lr=LinearRegression()
lr.fit(x.values.reshape(-1, 1),y.values.reshape(-1, 1))

w=lr.coef_[0][0]
b=lr.intercept_[0]
print("w is:", w)
print("b is:", b)
cost=cost(w,b,x,y)
print("cost is:",cost)
plt.scatter(x, y)
# 预测的y
pred_y=w*x +b
plt.plot(x,pred_y, color='r')
plt.show()

拟合直线
在这里插入图片描述

发布了82 篇原创文章 · 获赞 9 · 访问量 6163

猜你喜欢

转载自blog.csdn.net/weixin_43424932/article/details/105347122