这是吴恩达机器学习课程的课后习题解答
课程视频地址：https://www.bilibili.com/video/BV164411b7dx/
笔记以及课后习题： https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes

对于笔者的课后习题解答，会从零基础的角度将习题中的Python模块以及使用到的函数逐一分析。

可以联系博主取得Jupyter Notebook版本，更利于操作哦！

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

1.数据导入

导入txt文件数据

path = "ex1data1.txt"
data = pd.read_csv(path,header=None, names=['renkou','shouyi'] ) # 读取、头部、属性名称
data.tail(5)  # 打印head,tail

data.describe() # 打印总计、均值、标准差、min、25%、50%、75%、max

kind类型

line : line plot (default)#折线图
bar : vertical bar plot#条形图
barh : horizontal bar plot#横向条形图
hist : histogram#柱状图
box : boxplot#箱线图
kde : Kernel Density Estimation plot#Kernel 的密度估计图，主要对柱状图添加Kernel 概率密度线
pie : pie plot#饼图
scatter : scatter plot#散点图需要传入columns方向的索引

data.plot(kind='scatter', x='renkou', y='shouyi', figsize=(12,8))
plt.show()

2.计算代价函数CostFunction

首先，我们将创建一个以参数θ为特征函数的代价函数

\[J\left( \theta \right)=\frac{1}{2m}\sum\limits_{i=1}^{m}{{{\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)}^{2}}} \]

其中：$${{h}{\theta }}\left( x \right)={{\theta }^{T}}X={{\theta }{0}}{{x}{0}}+{{\theta }{1}}{{x}{1}}+{{\theta }{2}}{{x}{2}}+...+{{\theta }{n}}{{x}_{n}}$$

def CostComputer(X, y, theta): #X为特征矩阵 y为目标值 theta为\theta
    inner = np.power((X * theta.T - y), 2) # 计算每一个X的cost平方
    return np.sum(inner)/(2 * len(X))

data.insert(0,'Ones',1) #在第X列插入数据，并列名为‘ones‘
data.head()

cols = data.shape[1] # shape[0]--rows shape[1]--cols
X = data.iloc[:,0:cols-1] #取出所有特征值列组成X
y = data.iloc[:,cols-1:cols] # 取出目标值列组成y

X.head()
#X.values # 转化成数组

y.head()

将X,y转成矩阵，初始化theta={0,0} $\theta_0$和$\theta_1$

X = np.matrix(X.values)
y = np.matrix(y.values)
# X在np.matrix转化后type(x)=matrix

theta = np.matrix(np.array([1,1]))
theta # matrix([[0, 0]] 初始值为0,0

X.shape, y.shape, theta.shape

$inner[i] ={{{\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)}^{2}}}$
$np.sum(inner)=\sum\limits_{i=1}^{m}{{{\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)}^{2}}}$

def CostComputer_1(X, y, theta): #X为特征矩阵 y为目标值 theta为\theta
    inner = np.power((X * theta.T - y), 2) # 计算每一个X的cost平方
    return np.sum(inner)/(2 * len(X))
CostComputer_1(X, y, theta)

3. 计算批量梯度下降Batch Gradient Descent

\[{{\theta }_{j}}:={{\theta }_{j}}-\alpha \frac{\partial }{\partial {{\theta }_{j}}}J\left( \theta \right) \]

\[{{\theta }_{j}}:={{\theta }_{j}}-\alpha \frac{\partial }{\partial {{\theta }_{j}}}\frac{1}{m}\sum\limits_{i=1}^{m}{{{\left( {{h}_{\theta }}\left( {{x}^{(i)}} \right)-{{y}^{(i)}} \right)}^{2}}} \]

函数使用:nupy.ravel()

# nupy.ravel() 降维：将多重数组降至一维数组，返回的是指正
theta = np.array([[1,2],[3,4]])
tt = theta.ravel() 
tt.shape[0]

函数使用:np.multiply()

#np.multiply() 数组和矩阵对应位置相乘，输出与相乘数组/矩阵的大小一致 A_ij * B_ij = C_ij
A = np.arange(1,5).reshape(2,2)
B = np.arange(0,4).reshape(2,2)
print('A:',A)
print('B:',B)
np.multiply(A,B)

def gradientDescent(X, y , theta, alpha, iters): #输入特质值矩阵X,目标值矩阵y,两个系数theta,学习率alpha,迭代次数iters
    temp = np.matrix(np.zeros(theta.shape)) # theta系数置于0
    parameters = int(theta.ravel().shape[1]) # 系数个数
    cost = np.zeros(iters)  # 根据iters初始化cost向量的维数
    
    for it in range(iters):
        error = (X * theta.T) - y #每一项代价值 n*1维
        
        for j in range(parameters):  #遍历更新每一个theta_j值
            term = np.multiply(error, X[:,j])
            
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))
            
        theta = temp
        
        cost[it] = CostComputer_1(X, y, theta)
    return theta,cost

alpha = 0.01
iters = 100

theta = np.matrix(np.array([0,0]))
print(theta)
print(theta.ravel().shape[1])
print('Parameters:',int(theta.ravel().shape[1]))
g,cost = gradientDescent(X, y, theta, alpha, iters)
g

CostComputer_1(X, y, g)

函数使用:numpy.linspace()

#numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
#在指定的间隔内返回均匀间隔的数字。
#返回num均匀分布的样本，在[start, stop]。
np.linspace(0,100,num=11) # 11个数字 差值等于(start-end)/(num+1)

函数使用:plt.subplots()

#plt.subplots()
#是一个函数，返回一个包含figure和axes对象的元组。
#因此，使用fig,ax = plt.subplots()将元组分解为fig和ax两个变量
fig,ax = plt.subplots(nrows=1,ncols=1,figsize=(8,8)) # fig为返回的图像，ax为返回的坐标系（为一个数组）,默认nrows=ncols=1
# figsize为(底,高)
#ax,set_xticks(2) #设置各个坐标系的刻度
#ax.set_yticks(3) # 需要注意，这里每个坐标系都需要单独设置刻度，坐标轴名称，主题，图例等
x = np.linspace(1,10,num=11)
y = x ** (1/2)
ax.plot(x,y,color='blue')
ax.set_xlabel('xlabel')
ax.set_ylabel('ylabel')
ax.set_title('test') 
#ax[0].legend(loc='') #当多个图表的时候使用ax[i]
plt.show()

x = np.linspace(data.renkou.min(), data.renkou.max(), 100)
f = g[0, 0] + (g[0, 1] * x)  # 即将画的函数
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x,f,'r',label='yuce')
ax.scatter(data.renkou,data.shouyi,label='xunlianji')
ax.set_xlabel('renkou')
ax.set_ylabel('shouyi')
ax.set_title('yucemoxing')
plt.show()

3.多变量线性回归

特征值：房子的大小，卧室的数量
目标：房子的价格

导入数据

path = 'ex1data2.txt'
data = pd.read_csv(path,header=None,names=['size','bedrooms','price'])
data.head()

data.shape,data.describe()

data.shape,data.describe()

特征归一化

# data.mean() 平均值
# data.std() 标准差

data2 = (data - data.mean()) / data.std()
data2.shape,data2.describe()

data2.insert(0,'ones',1)
data2.head()

# 设置训练集
cols = data2.shape[1]
X2 = data2.iloc[:,0:cols-1]
y2 = data2.iloc[:,cols-1:cols]
X2.head(),y2.head()

# 转化成矩阵 并且 设置theta
X2 = np.matrix(X2.values)
y2 = np.matrix(y2.values)
theta = np.matrix(np.array([0,0,0]))

# 扔进刚才写好的梯度下降算法
g2,c2 = gradientDescent(X2, y2, theta, alpha, iters)

g2

训练过程可视化

np.arange(iters),c2

# iters [0,100] c2 每一次迭代计算出来的cost值
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(np.arange(iters), c2, 'r')
ax.set_xlabel('Iterations')
ax.set_ylabel('Cost')
ax.set_title('Error vs. Training Epoch')
plt.show()

4.scikit-learn的线性回归函数

path = 'ex1data1.txt'
data = pd.read_csv(path,header=None,names=['renkou','shouyi'])
cols = data.shape[1] # shape[0]--rows shape[1]--cols
X = data.iloc[:,0:cols-1] #取出所有特征值列组成X
y = data.iloc[:,cols-1:cols] # 取出目标值列组成y

from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(X, y)
X = np.matrix(X.values)
y = np.matrix(y.values)

# .A1 对二维数组进行扁平化成一维数组

model.predict(X).flatten() # 扁平化处理:将多维数组降位一维
# 返回的是拷贝,修改返回的a之后原始的x并未改变。

f为预测函数

x = np.array(X[:, 0].A1)
f = model.predict(X).flatten()

图形化

fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.renkou, data.shouyi, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('renkoi')
ax.set_ylabel('shouyi')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

5. normal equation（正规方程）

$\theta ={{\left( {{X}^{T}}X \right)}^{-1}}{{X}^{T}}y$

函数介绍np.linalg

numpy.linalg模块包含线性代数的函数。使用这个模块，可以计算逆矩阵、求特征值、解线性方程组以及求解行列式等。

A = np.mat("0 1 2;1 0 3;4 -3 8") # 创建矩阵
inv = np.linalg.inv(A)#使用inv函数计算逆矩阵
A,inv

# 求解线性方程组
B = np.mat("1 -2 1;0 2 -8;-4 5 9")
b = np.array([0,8,-9])
xx = np.linalg.solve(B,b)
B,b,xx

# 求矩阵特征值
C = np.mat("3 -2;1 0")
c0 = np.linalg.eigvals(C)
C,c0,np.linalg.eigvals(A)

def normalEqu(X,y):
    return np.linalg.inv(X.T@X)@X.T@y
normalEqu(X,y)

【机器学习】ex1-线性回归