笔记——实现多元线性回归模型LinearRegression&& KNN Regressor

在这里插入图片描述

在这里插入图片描述


算法有待我再琢磨琢磨


导入boston房价数据

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
boston=datasets.load_boston()
X=boston.data
y=boston.target
X=X[y<50.0]
y=y[y<50.0]
X.shape
(490, 13)

导入自己写的LinearRegression.py

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=666)
from LinearRegression import LinearRegression
reg=LinearRegression()
reg.fit_normal(X_train,y_train)
LinearRegression()
reg.coef_# 系数
array([-1.15625837e-01,  3.13179564e-02, -4.35662825e-02, -9.73281610e-02,
       -1.09500653e+01,  3.49898935e+00, -1.41780625e-02, -1.06249020e+00,
        2.46031503e-01, -1.23291876e-02, -8.79440522e-01,  8.31653623e-03,
       -3.98593455e-01])
reg.interception_#截距
32.59756158867594
reg.score(X_test,y_test)
0.8009390227581046

scikit-learn中的回归问题

from sklearn.linear_model import LinearRegression
lin_reg=LinearRegression()
lin_reg.fit(X_train,y_train)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
lin_reg.coef_
array([-1.15625837e-01,  3.13179564e-02, -4.35662825e-02, -9.73281610e-02,
       -1.09500653e+01,  3.49898935e+00, -1.41780625e-02, -1.06249020e+00,
        2.46031503e-01, -1.23291876e-02, -8.79440522e-01,  8.31653623e-03,
       -3.98593455e-01])
lin_reg.intercept_
32.59756158869959
lin_reg.score(X_test,y_test)
0.8009390227581041

KNN Regressor使用KNN算法解决线性回归问题

from sklearn.neighbors import KNeighborsRegressor
knn_reg=KNeighborsRegressor()
knn_reg.fit(X_train,y_train)
knn_reg.score(X_test,y_test)
0.602674505080953

网格搜索,寻找超参数

from sklearn.model_selection import GridSearchCV
param_grid=[
    {
        "weights":["uniform"],
        "n_neighbors":[i for i in range(1,11)]
    },
    {
        "weights":["distance"],
        "n_neighbors":[i for i in range(1,11)],
        "p":[i for i in range(1,6)]
    }
]
knn_reg=KNeighborsRegressor()
grid_search=GridSearchCV(knn_reg,param_grid,n_jobs=-1,verbose=1)
grid_search.fit(X_train,y_train)
Fitting 5 folds for each of 60 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 235 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:    0.6s finished





GridSearchCV(cv=None, error_score=nan,
             estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30,
                                           metric='minkowski',
                                           metric_params=None, n_jobs=None,
                                           n_neighbors=5, p=2,
                                           weights='uniform'),
             iid='deprecated', n_jobs=-1,
             param_grid=[{'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                          'weights': ['uniform']},
                         {'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
                          'p': [1, 2, 3, 4, 5], 'weights': ['distance']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=1)
grid_search.best_params_
{'n_neighbors': 6, 'p': 1, 'weights': 'distance'}
grid_search.best_score_
0.6243135119018297
grid_search.best_estimator_.score(X_test,y_test)
0.7353138117643773

linearRegression.py

import numpy as np
from metrics import r2_score

class LinearRegression:
    def __int__(self):
        "初始化"
        self.coef_=None
        self.interception_=None
        self._theta=None

    def fit_normal(self,X_train,y_train):
        assert X_train.shape[0]==y_train.shape[0],\
            "必须相等"

        X_b=np.hstack([np.ones((len(X_train),1)),X_train])
        self._theta=np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train);

        self.interception_=self._theta[0]
        self.coef_=self._theta[1:]

        return self

    def predict(self,X_predict):
        assert self.interception_ is not None and self.coef_ is not None,\
            "must fit before predict!"
        assert X_predict.shape[1]==len(self.coef_),\
            "X_predict的特征数量必须等于X_train"
        X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
        return X_b.dot(self._theta)

    def score(self,X_test,y_test):

        y_predict=self.predict(X_test)
        return r2_score(y_test,y_predict)

    def __repr__(self):
        return "LinearRegression()"

猜你喜欢

转载自blog.csdn.net/chairon/article/details/107345876