In [1]:
import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt %matplotlib inline
In [2]:
# Customize a normalized function (this function is the premise of my data is a matrix) DEF regularize (xMat): Inmat = xMat.copy () # create a copy of this to inmat operation will not affect the xmat inMeans np.mean = (Inmat, Axis = 0) # averaging INVAR = np.std (Inmat, Axis = 0) # find standard deviation Inmat = (Inmat - inMeans) / INVAR # normalized return Inmat
In [ ]:
w=w-a*x.T(X*W-Y)/M
In [4]:
# Set the learning parameters are set to the data rate (step) the maximum number of iterations brute DEF gradDescent_0 (dataSet A, EPS = 0.01, numIt = 50000 ): xMat = np.mat (dataSet.iloc [:,: -1]. values) # are taken into x and y matrix (matrix) in yMat = np.mat (dataSet.iloc [:, -1 ] .values) .T xMat = regularize (xMat) # respectively normalized X and Y of a yMat = regularize (yMat) m, n- = xMat.shape weights = np.zeros ((n-,. 1)) # set our initial coefficient (weight) B = 0 for K in Range (numIt): # iterations grad = xMat.T * (xMat * weights - yMat) / m# Calculating a gradient B = weights = weights - Grad EPS * # update the weights return weights
In [7]:
= pd.read_table ABA ( ' abalone.txt ' , header = None) # of the data set from the UCI, was recorded ⽣ ⻥ properties of abalone, the age of the destination time is a standard field ⽣ was aba.head ()
Out[7]:
In [8]:
aba.shape
Out[8]:
In [12]:
aba.tail()
Out[12]:
In [13]:
aba.info()
In [29]:
aba[1].isnull().value_counts()
Out[29]:
In [30]:
# Calculate
2
gradDescent_0 (ABA eps = 0.01, called = 5000)
Out[30]:
In [31]:
# Using least squares comparison results to conclude xMat = np.mat (aba.iloc [:,: -1 ] .values) yMat = np.mat (aba.iloc [:, -1 ] .values) .T xMat = regularize (xMat) yMat = regularize (yMat) XTX = xMat.T * xMat WS = xTx.I * (* xMat.T yMat) WS
Out[31]:
In [32]:
aba[0].mean()
Out[32]:
In [34]:
weights = np.zeros((9,1))
weights
Out[34]:
SGD stochastic gradient descent method
Stochastic gradient descent method, in fact, a similar principle and batch gradient descent method, the difference in gradient is not required when all the m samples Using the data is only selected ⼀ ⽽ j samples to find the gradient.
Another: Because it is done randomly selected part of the gradient, random too, it is preferable to increase the number of iterations is about several 6
Also: Since stochastic gradient descent output randomness factor, it is not suitable for the solution of linear regression
In [39]:
# Seen the basic operation is the same, except that we have a data set with replacement sampling DEF gradDescent_1 (dataSet, eps = 0.01, numIt = 500000 ): dataSet = dataSet.sample (numIt, the replace = True) # the Sample function how many random sample, expressed replace = True parameters are put back dataSet.index = Range (dataSet.shape [0]) # of index for tacticity of xMat = np.mat (dataSet.iloc [:,: -1 ] .values) yMat = np.mat (dataSet.iloc [:, -1 ] .values) .T xMat = regularize (xMat) yMat = regularize (yMat) m, n- = xMat.shape weights = np.zeros (( n-,. 1 )) for Iin Range (m): # iterations Grad xMat = [I] * .T (xMat [I] * weights - yMat [I]) # select only one where each time gradient calculating weights weights = - * EPS Grad return weights
In [42]:
Import Time % Time gradDescent_1 (aba) # coefficients are calculated several times inconsistent, because the result is a solution space itself, may ultimately not very different sse
Out[42]:
In [ ]:
DEF sseCal (dataSet A, regres): # set parameters for data sets and the regression methods n-= dataSet.shape [0] Y = dataSet.iloc [:, -1 ] .values WS = regres (dataSet A) yhat = dataSet.iloc [ :,: -1] * .values WS yhat = yhat.reshape ([n-,]) RSS = np.power (yhat - Y, 2 ) .sum () return RSS
In [45]:
%%time n=aba.shape[0] y=aba.iloc[:,-1].values ws=gradDescent_1(aba) yhat=aba.iloc[:,:-1].values * ws yhat=yhat.reshape([n,]) rss = np.power(yhat - y, 2).sum() rss Wall time: 46.9 s
In [46]:
rss
Out[46]:
In [48]:
# Package 2 ** R & lt DEF rSquare (dataSet A, regres): # Set parameter dataset and regression SSE = sseCal (dataSet A, regres) Y = dataSet.iloc [:, -1 ] .values SST = np.power ( Y - y.mean (), 2 ) .sum () return . 1 - SSE / SST
In [49]:
rSquare (ABA, gradDescent_1)
Out[49]: