最近在看慕课网BOBO老师的机器学习入门课程,之前有看过很多的相关课程,讲数据原理推导及sklearn使用较多,顺便推荐一波,看过的课程中,个人觉得邹博老师讲的就蛮好(也看过吴恩达的斯坦福的课程,数学推导看的时候多看几遍也看的懂,过一会儿就忘了,哎),文章的话可以参照https://www.cnblogs.com/pinard/category/894692.html这位的博客,觉得写得清楚明了,有不明白的提问一般都会讲清楚。
BOBO老师的课程里有讲算法的简单实现,也跟着手动敲了一下,算法里还是有python语法知识的(敲了一遍也还是懵逼),不过还是有助于理解算法的,如果是初学了也可以看下。
一元线性回归简单实现,求解方式是最小二乘法计算求解
import numpy as np
class SimpleLinearRegression1:
def __init__(self):
self.a_ = None
self.b_ = None
def fit(self, x_train, y_train):
"""根据训练数据集训练模型"""
assert x_train.ndim == 1
# 传入的数据是一维的
assert len(x_train) == len(y_train)
# 传入的x和y长度保持一致
x_mean = np.mean(x_train)
y_mean = np.mean(y_train)
num = 0.0
d = 0.0
for x, y in zip(x_train, y_train):
num += (x - x_mean) * (y - y_mean)
d += (x - x_mean) ** 2
self.a_ = num / d
self.b_ = y_mean - self.a_ * x_mean
return self
def predict(self, x_predict):
"""给定待预测数据集x_predict,返回表示x_predict的结果向量"""
assert x_predict.ndim == 1
assert self.a_ is not None and self.b_ is not None
return np.array([self._predict(x) for x in x_predict])
def _predict(self, x_single):
"""给定单个待预测数据x_single,返回x_single的预测结果值"""
return self.a_ * x_single + self.b_
def __repr__(self):
return "SimpleLinearRegression1()"
多元线性回归的实现,求解方式是矩阵求解的方式
import numpy as np
class LinearRegression:
def __init__(self):
"""初始化Linear Regression模型"""
self.coef_ = None
self.interception_ = None
self._theta = None
def fit_normal(self,X_train,y_train):
"""根据训练数据集X_train,y_train训练Linear Regression模型"""
assert X_train.shape[0] == y_train.shape[0],\
"the size of X_train must be equrt to the size of y_train"
X_b = np.hstack([np.ones(len(X_train, 1), X_train)])
self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train);
self.interception_ = self._theta[0]
self.coef_ = self._theta[1:]
return self
def predict(self, X_predict):
"""给定待预测数据集X_predict,返回表示X_predict的结果向量"""
assert self.intercept_ is not None and self.coef_ is not None, \
"""must fit before predict!"""
assert X_predict.shape[1] == len(self.coef_), \
"""the feature number of X_predict must be equal to X_train"""
X_b = np.hstack([np.ones((len(X_predict), 1)),X_predict])
return X_b.dot(self._theta)
def score(self, X_test, y_test):
"""根据测试数据集X_test 和 y_test确定当前模型的准确度"""
y_predict = self.predict(X_test)
return r2_score(y_test, y_predict)
def __repar__(self):
return "LinearRegression()"