吴恩达机器学习练习1(python):线性回归

吴恩达机器学习课程:网易云课堂

此文转载于:黄海广博士,[email protected],感谢吴恩达先生、黄海广博士及所有奋斗在AI一线的前辈们,努力学习,自此刻始。

注意:此文只是方便本人学习,侵权立删。

更权威科学的课程和笔记请移步:https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 获取数据
path = 'F:/yanwucao/test\ExpData\MeachineLearning\AndrewNg\ex1/ex1data1.txt'
data1 = pd.read_csv(path, header=None, names=['population', 'profit'])

# data1.plot(kind='scatter', x='population', y='profit')
# plt.show()


def computeCost(X, y, theta):
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))


cols = data1.shape[1]
X = data1.iloc[:, 0:cols-1]     # X是所有行,去掉最后一列
y = data1.iloc[:, cols-1:cols]  # X是所有行,最后一列
w = pd.Series(np.ones(X.shape[0]))
X.insert(0, 'Ones', w)
# print(X.head())
# print(y.head())

# 代价函数应该要是numpy矩阵,需要转换X、y,才能使用他们
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0, 0]))
# print(computeCost(X, y, theta))


def gradientdescent(X, y, theta, alpha=0.01, iters=1000):
    cost = np.zeros(iters)
    temp = np.matrix(np.zeros(theta.shape))
    # print(type(errors))
    for i in range(iters):
        errors = X * (theta.T) - y
        for j in range(theta.shape[1]):
            temp[0, j] = theta[0, j] - alpha * (np.sum(np.multiply(errors, (X[:, j]))) / len(X))

        theta = temp   # 需要同时更新theta值
        cost[i] = computeCost(X, y, theta)

    return theta, cost


g, cost = gradientdescent(X, y, theta)
# print(g)

# 显示
x = np.linspace(data1.population.min(), data1.population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)

fig, ax = plt.subplots(figsize=(12, 8))
ax.plot(x, f, 'r', label='prediction')
ax.scatter(data1.population, data1.profit, marker='x', label='traning data')
ax.legend(loc=2)
ax.set_xlabel('population')
ax.set_ylabel('profit')
ax.set_title('predicted profit vs population size')
plt.show()

fig1, ax1 = plt.subplots(figsize=(12, 8))
ax1.plot(np.arange(1000), cost, 'blue', label='error')
ax1.set_xlabel('Iterations')
ax1.set_ylabel('Cost')
ax1.set_title('Error vs. Training Epoch')
plt.show()


# multi variable linear regression
path1 = 'F:\yanwucao/test\ExpData\MeachineLearning\AndrewNg\ex1\ex1data2.txt'
data2 = pd.read_csv(path1, header=None, names=['size', 'bedrooms', 'price'])

# 特征归一化
data2 = (data2 - data2.mean()) / data2.std()    # pandas.mean/std都是按列进行运算
# print(data2.head())
col1 = pd.Series(np.ones(data2.shape[0]))
data2.insert(0, 'ones', col1)
cols = data2.shape[1]
X1 = np.matrix(data2.iloc[:, 0:cols-1].values)
y1 = np.matrix(data2.iloc[:, cols-1:cols].values)
theta1 = np.matrix([0, 0, 0])

g1, cost1 = gradientdescent(X1, y1, theta1)
# print(cost1[-1])

fig2, ax = plt.subplots(figsize=(12, 8))
ax.plot(np.arange(1000), cost, 'r', label='multierror')
ax.set_xlabel('iterations')
ax.set_ylabel('cost')
ax.set_title('multi variable linear regression')
plt.show()


######################################################################################
# 直接使用scikit-learn的线性回归函数
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X, y)

x = np.array(X[:, 1].A1)
y = model.predict(X).flatten()
fig3, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data1.Population, data1.Profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

猜你喜欢

转载自blog.csdn.net/yanwucao/article/details/80297578