A - python gradient descent algorithm practice

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
 
In [2]:
# Customize a normalized function (this function is the premise of my data is a matrix) 
DEF regularize (xMat): 
    Inmat = xMat.copy () # create a copy of this to inmat operation will not affect the xmat 
    inMeans np.mean = (Inmat, Axis = 0) # averaging 
    INVAR = np.std (Inmat, Axis = 0) # find standard deviation 
    Inmat = (Inmat - inMeans) / INVAR # normalized 
    return Inmat

BGD batch gradient descent

In [ ]:
w=w-a*x.T(X*W-Y)/M
In [4]:
# Set the learning parameters are set to the data rate (step) the maximum number of iterations brute 
DEF gradDescent_0 (dataSet A, EPS = 0.01, numIt = 50000 ): 
    xMat = np.mat (dataSet.iloc [:,: -1]. values) # are taken into x and y matrix (matrix) in 
    yMat = np.mat (dataSet.iloc [:, -1 ] .values) .T 
    xMat = regularize (xMat) # respectively normalized X and Y of a 
    yMat = regularize (yMat) 
    m, n- = xMat.shape 
    weights = np.zeros ((n-,. 1)) # set our initial coefficient (weight) 
    B = 0
     for K in Range (numIt): # iterations 
        grad = xMat.T * (xMat * weights - yMat) / m# Calculating a gradient 
        B = 
        weights = weights - Grad EPS * # update the weights 
    return weights
 

 

In [7]:
= pd.read_table ABA ( ' abalone.txt ' , header = None) # of the data set from the UCI, was recorded ⽣ ⻥ properties of abalone, the age of the destination time is a standard field ⽣ was 
aba.head ()

 

Out[7]:
  0 1 2 3 4 5 6 7 8
0 1 0.455 0.365 0.095 0.5140 0.2245 0.1010 0.150 15
1 1 0.350 0.265 0.090 0.2255 0.0995 0.0485 0.070 7
2 -1 0.530 0.420 0.135 0.6770 0.2565 0.1415 0.210 9
3 1 0.440 0.365 0.125 0.5160 0.2155 0.1140 0.155 10
4 0 0.330 0.255 0.080 0.2050 0.0895 0.0395 0.055 7
In [8]:
aba.shape
Out[8]:
(4177, 9)
In [12]:
aba.tail()
 
Out[12]:
  0 1 2 3 4 5 6 7 8
4172 -1 0.565 0.450 0.165 0.8870 0.3700 0.2390 0.2490 11
4173 1 0.590 0.440 0.135 0.9660 0.4390 0.2145 0.2605 10
4174 1 0.600 0.475 0.205 1.1760 0.5255 0.2875 0.3080 9
4175 -1 0.625 0.485 0.150 1.0945 0.5310 0.2610 0.2960 10
4176 1 0.710 0.555 0.195 1.9485 0.9455 0.3765 0.4950 12
In [13]:
aba.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 9 columns):
0    4177 non-null int64
1    4177 non-null float64
2    4177 non-null float64
3    4177 non-null float64
4    4177 non-null float64
5    4177 non-null float64
6    4177 non-null float64
7    4177 non-null float64
8    4177 non-null int64
dtypes: float64(7), int64(2)
memory usage: 293.8 KB
In [29]:
aba[1].isnull().value_counts()

Out[29]:

False    4177
Name: 1, dtype: int64
In [30]:
# Calculate
2
gradDescent_0 (ABA eps = 0.01, called = 5000)
Out[30]:
matrix([[ 0.01501141],
        [ 0.0304397 ],
        [ 0.31206232],
        [ 0.15730289],
        [ 0.38951011],
        [-0.94609003],
        [-0.10003637],
        [ 0.74573878]])
In [31]:
# Using least squares comparison results to conclude 
xMat = np.mat (aba.iloc [:,: -1 ] .values) 
yMat = np.mat (aba.iloc [:, -1 ] .values) .T 
xMat = regularize (xMat) 
yMat = regularize (yMat) 
XTX = xMat.T * xMat 
WS = xTx.I * (* xMat.T yMat) 
WS
Out[31]:
matrix([[ 0.0162406 ],
        [-0.05874764],
        [ 0.41308287],
        [ 0.15391644],
        [ 1.4069792 ],
        [-1.39621019],
        [-0.3318546 ],
        [ 0.37046383]])
In [32]:
aba[0].mean()
Out[32]:
0.052908786210198705
In [34]:
weights = np.zeros((9,1))
weights

Out[34]:

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])
 

SGD stochastic gradient descent method

 

Stochastic gradient descent method, in fact, a similar principle and batch gradient descent method, the difference in gradient is not required when all the m samples Using the data is only selected ⼀ ⽽ j samples to find the gradient.

 

Another: Because it is done randomly selected part of the gradient, random too, it is preferable to increase the number of iterations is about several 6

 

Also: Since stochastic gradient descent output randomness factor, it is not suitable for the solution of linear regression

In [39]:
# Seen the basic operation is the same, except that we have a data set with replacement sampling 
DEF gradDescent_1 (dataSet, eps = 0.01, numIt = 500000 ): 
    dataSet = dataSet.sample (numIt, the replace = True) # the Sample function how many random sample, expressed replace = True parameters are put back 
    dataSet.index = Range (dataSet.shape [0]) # of index for tacticity of 
    xMat = np.mat (dataSet.iloc [:,: -1 ] .values) 
    yMat = np.mat (dataSet.iloc [:, -1 ] .values) .T 
    xMat = regularize (xMat) 
    yMat = regularize (yMat) 
    m, n- = xMat.shape 
    weights = np.zeros (( n-,. 1 ))
     for Iin Range (m): # iterations 
        Grad xMat = [I] * .T (xMat [I] * weights - yMat [I]) # select only one where each time gradient calculating 
        weights weights = - * EPS Grad
     return weights
In [42]:
Import Time
 % Time gradDescent_1 (aba) # coefficients are calculated several times inconsistent, because the result is a solution space itself, may ultimately not very different sse
Wall time: 46.6 s
Out[42]:
matrix([[ 0.05499166],
        [-0.11769103],
        [ 0.34716992],
        [ 0.40666376],
        [ 1.41184507],
        [-1.25264229],
        [-0.33662608],
        [ 0.28914221]])
In [ ]:
DEF sseCal (dataSet A, regres): # set parameters for data sets and the regression methods 
    n-= dataSet.shape [0] 
    Y = dataSet.iloc [:, -1 ] .values 
    WS = regres (dataSet A) 
    yhat = dataSet.iloc [ :,: -1] * .values WS 
    yhat = yhat.reshape ([n-,]) 
    RSS = np.power (yhat - Y, 2 ) .sum ()
     return RSS
In [45]:
%%time
n=aba.shape[0]
y=aba.iloc[:,-1].values
ws=gradDescent_1(aba)
yhat=aba.iloc[:,:-1].values * ws
yhat=yhat.reshape([n,])
rss = np.power(yhat - y, 2).sum()
rss

 
Wall time: 46.9 s
In [46]:
rss
Out[46]:
379691.65605548245
In [48]:
# Package 2 ** R & lt 
DEF rSquare (dataSet A, regres): # Set parameter dataset and regression 
    SSE = sseCal (dataSet A, regres) 
    Y = dataSet.iloc [:, -1 ] .values 
    SST = np.power ( Y - y.mean (), 2 ) .sum ()
     return . 1 - SSE / SST
In [49]:
rSquare (ABA, gradDescent_1)
Out[49]:
-7.374175454585444
 

Guess you like

Origin www.cnblogs.com/Koi504330/p/11909392.html