sklearn学习记录(1)-最小二乘法的官方案例解读

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接: https://blog.csdn.net/weixin_44112790/article/details/97610506

一元线性回归

这里就只取了一个特征的数据直接进行最小二乘法

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes = datasets.load_diabetes()


# Use only one feature
diabetes_X = diabetes.data[:, np.newaxis, 2]

# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))

# Plot outputs
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()

最后绘制的回归直线
在这里插入图片描述

二元线性回归

这个案例演示了如何绘制二元线性回归结果的图像,

diabetes = datasets.load_diabetes()
# 特征的索引,只用了两个特征
indices = (0, 1)
# 前面作为训练集,最后20个作为测试集
X_train = diabetes.data[:-20, indices]
X_test = diabetes.data[-20:, indices]
y_train = diabetes.target[:-20]
y_test = diabetes.target[-20:]
# 构造线性回归对象
ols = linear_model.LinearRegression()
# 训练
ols.fit(X_train, y_train)


# #############################################################################
# Plot the figure
def plot_figs(fig_num, elev, azim, X_train, clf):
    '''
    :param fig_num: 绘图编号
    :param elev:    float,方位角视角
    :param azim:    float,高程视角
    :param X_train: 训练集
    :param clf:     回归器对象
    :return:
    '''
    # 采取独立的绘图窗格,而不是子图
    fig = plt.figure(fig_num, figsize=(4, 3))
    # 清空绘图窗格
    plt.clf()
    # 构造3D坐标对象  azim : float,方位角视角,默认为-60。lev : float,高程视角,默认为30。
    ax = Axes3D(fig, elev=elev, azim=azim)
    # 绘制离散点
    ax.scatter(X_train[:, 0], X_train[:, 1], y_train, c='k', marker='+')
    # 预测数据绘制平面
    ax.plot_surface(np.array([[-.1, -.1], [.15, .15]]),
                    np.array([[-.1, .15], [-.1, .15]]),
                    clf.predict(np.array([[-.1, -.1, .15, .15],
                                          [-.1, .15, -.1, .15]]).T
                                ).reshape((2, 2)),
                    alpha=.5)
    # 标注横纵轴信息
    ax.set_xlabel('X_1')
    ax.set_ylabel('X_2')
    ax.set_zlabel('Y')
    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])

#Generate the three different figures from different views
# 以不同的视角进行绘图
elev = 43.5
azim = -110
plot_figs(1, elev, azim, X_train, ols)

elev = -.5
azim = 0
plot_figs(2, elev, azim, X_train, ols)

elev = -.5
azim = 90
plot_figs(3, elev, azim, X_train, ols)

plt.show()

从不同的视角绘制出了最小二乘拟合的平面和数据集
在这里插入图片描述

与岭回归对比

import numpy as np
import matplotlib.pyplot as plt

from sklearn import linear_model
# 构造数据,这里就是取了(0.5, 0.5) (1, 1)这两个点做线性回归
X_train = np.c_[.5, 1].T
y_train = [.5, 1]
X_test = np.c_[0, 2].T
# 随机数种子
np.random.seed(0)
# 构造模型字典用于循环,从而减少代码量
classifiers = dict(ols=linear_model.LinearRegression(),
                   ridge=linear_model.Ridge(alpha=.1))
# 遍历字典,训练、预测并绘图
for name, clf in classifiers.items():
    # 获得绘图画布、坐标轴
    fig, ax = plt.subplots(figsize=(4, 3))

    for _ in range(6):
        # 随机噪声扰动
        this_X = .1 * np.random.normal(size=(2, 1)) + X_train
        # 用含噪声的数据集训练
        clf.fit(this_X, y_train)
        # 绘制预测结果直线
        ax.plot(X_test, clf.predict(X_test), color='gray')
        # 绘制含噪声的训练集离散点
        ax.scatter(this_X, y_train, s=3, c='gray', marker='o', zorder=10)
    # 用原数据集训练
    clf.fit(X_train, y_train)
    # 绘制预测结果直线
    ax.plot(X_test, clf.predict(X_test), linewidth=2, color='blue')
    # 绘制训练集离散点
    ax.scatter(X_train, y_train, s=30, c='red', marker='+', zorder=10)
    # 标注绘图信息
    ax.set_title(name)
    ax.set_xlim(0, 2)
    ax.set_ylim((0, 1.6))
    ax.set_xlabel('X')
    ax.set_ylabel('y')

    fig.tight_layout()

plt.show()

面对噪声岭回归并没那么敏感,斜率较为稳定(右图);而普通的最小二乘法求解的线性回归斜率却不稳定,
在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_44112790/article/details/97610506