開発環境

アナコンダ4.9.2 / Python 3.6.12

仕事

あなたがレストランのオーナーであり、いくつかの都市の人口と利益のデータ（ex1data1.txt）を知っているとすると、線形回帰法を使用して、開発のためにどの都市に行くかを計算します。

プログラム分解

生データのインポート

コード

import pandas as pd
import seaborn as sns
sns.set(context="notebook", style="whitegrid", palette="dark")
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
data = pd.read_csv('E:\Ana3\linear\ex1data1.txt', names=['population', 'profit'])#读取ex1data1.txt中的数据
print(data.head())#看前五行
sns.lmplot('population', 'profit', data, size=6, fit_reg=False)
plt.show()#绘制人口-利润散点图

結果
ここに画像の説明を挿入

コスト関数を計算する

コード

def computeCost(X, y, theta):
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))#定义代价函数J
data.insert(0, 'Ones', 1)
cols = data.shape[1]#在训练集中添加一列，值为1，以便使用向量化的方案计算代价和梯度
X = data.iloc[:,0:cols-1]#X是所有行，去掉最后一列
y = data.iloc[:,cols-1:cols]#y是所有行，最后一列
print(X.head())
print(y.head())#head()是观察前5行
X = np.matrix(X.values)
y = np.matrix(y.values)#代价函数应是numpy矩阵，所以需要转换X和Y再使用
theta = np.matrix(np.array([0,0]))#初始化theta
print(computeCost(X, y, theta))#计算初始代价函数的值 (theta初始值为0)

結果
①Xとyの最初の5行を観察します。②
ここに画像の説明を挿入
初期コスト関数の値を出力します（シータの初期値は0です）。

バッチ勾配まともなアルゴリズム

コード

def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))#初始化一个theta临时矩阵temp，维数(1, 2)
    parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)#初始化代价数组
    for i in range(iters):
        error = (X * theta.T) - y
        for j in range(parameters):
            term = np.multiply(error, X[:,j])
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))#梯度下降法中theta的迭代公式
        theta = temp
        cost[i] = computeCost(X, y, theta)#更新theta后的代价值
    return theta, cost
alpha = 0.01
iters = 1000
g, cost = gradientDescent(X, y, theta, alpha, iters)#运行梯度下降算法，用我们的数据（训练集）训练合适的参数θ
print(computeCost(X, y, g))

コアフォーミュラ
ここに画像の説明を挿入

結果
ここに画像の説明を挿入

フィッティングをグラフィカルに表示

コード

x = np.linspace(data.population.min(), data.population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.population, data.profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()

結果
ここに画像の説明を挿入

コスト反復曲線をプロットします

コード

fig, ax = plt.subplots(figsize=(12,8)) 
ax.plot(np.arange(iters), cost, 'r') 
ax.set_xlabel('Iterations') 
ax.set_ylabel('Cost') 
ax.set_title('Error vs. Training Epoch') 
plt.show()

結果
ここに画像の説明を挿入

完全なプログラム

import pandas as pd
import seaborn as sns
sns.set(context="notebook", style="whitegrid", palette="dark")
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
data = pd.read_csv('E:\Ana3\linear\ex1data1.txt', names=['population', 'profit'])#读取ex1data1.txt中的数据
print(data.head())#看前五行
sns.lmplot('population', 'profit', data, size=6, fit_reg=False)
plt.show()#绘制人口-利润散点图
def computeCost(X, y, theta):
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))#定义代价函数J
data.insert(0, 'Ones', 1)
cols = data.shape[1]#在训练集中添加一列，值为1，以便使用向量化的方案计算代价和梯度
X = data.iloc[:,0:cols-1]#X是所有行，去掉最后一列
y = data.iloc[:,cols-1:cols]#y是所有行，最后一列
print(X.head())
print(y.head())#head()是观察前5行
X = np.matrix(X.values)#维数(97,2)
y = np.matrix(y.values)#代价函数应是numpy矩阵，所以需要转换X和Y再使用,维数(97,1)
theta = np.matrix(np.array([0,0]))#初始化theta,维数(1,2)
print(computeCost(X, y, theta))#计算初始代价函数的值 (theta初始值为0)
def gradientDescent(X, y, theta, alpha, iters):
    temp = np.matrix(np.zeros(theta.shape))#初始化一个theta临时矩阵temp，维数(1, 2)
    parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)#初始化代价数组
    for i in range(iters):
        error = (X * theta.T) - y
        for j in range(parameters):
            term = np.multiply(error, X[:,j])
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))#梯度下降法中theta的迭代公式
        theta = temp
        cost[i] = computeCost(X, y, theta)#更新theta后的代价值
    return theta, cost
alpha = 0.01
iters = 1000
g, cost = gradientDescent(X, y, theta, alpha, iters)#运行梯度下降算法，用我们的数据（训练集）训练合适的参数θ
print(computeCost(X, y, g))

x = np.linspace(data.population.min(), data.population.max(), 100)
f = g[0, 0] + (g[0, 1] * x)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data.population, data.profit, label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('Population')
ax.set_ylabel('Profit')
ax.set_title('Predicted Profit vs. Population Size')
plt.show()
fig, ax = plt.subplots(figsize=(12,8)) 
ax.plot(np.arange(iters), cost, 'r') 
ax.set_xlabel('Iterations') 
ax.set_ylabel('Cost') 
ax.set_title('Error vs. Training Epoch') 
plt.show()

Wu Enda Machine Learning Homework（1）単変量線形回帰/バッチ最急降下法（母集団と利益のデータ）

単変量線形回帰/バッチ勾配まとも（母集団と利益のデータ）

開発環境

仕事

プログラム分解

生データのインポート

コスト関数を計算する

バッチ勾配まともなアルゴリズム

フィッティングをグラフィカルに表示

コスト反復曲線をプロットします

完全なプログラム

おすすめ