Chapter 3
3.1 Prediction of Shepherd Dog Weight by Linear Regression
3.1.1 Data entry
train_x = [ 27 , 29 , 34 , 40 , 42 , 47 , 48 , 49 , 50 , 52 , 52 , 52 , 54 ]
train_y = [ 6 , 7.5 , 9 , 10.7 , 12.8 , 15.1 , 16 , 18.5 , 19.4 , 18.4 , 19.7 , 21.8 , 21.7 ]
print_shape( train_x)
print_shape( train_y)
3.1.2 Defining a linear regression model
model = linear_regressor( )
3.1.3 Training Linear Regression Model
model. train( train_x, train_y)
3.1.4 Model Visualization
model. show( )
3.1.5 Model prediction - predict() function
x = 40
pred_y = model. predict( x)
print ( pred_y)
3.1.6 Parameters in the model
weights = model. get_weights( )
print ( weights)
k = weights[ 0 ]
b = weights[ 1 ]
print ( "k=" , k)
print ( "b=" , b)
3.1.7 Define a linear function
def linear_function ( x, k, b) :
y = k * x + b
return y
3.1.8 Forecasting using linear functions
x = 40
pred_y = linear_function( x, k, b)
print ( pred_y)
3.1.9 Model Analysis - Impact of Outlier Data Fit and Results
train_y[ 5 ] = 30
model = linear_regressor( )
model. train( train_x, train_y)
model. show( )
3.2 Prediction of Shepherd Dog Weight by Polynomial Regression
3.2.1 Data entry
train_x = [ 27 , 29 , 34 , 40 , 42 , 47 , 48 , 49 , 50 , 52 , 52 , 52 , 54 ]
train_y = [ 6 , 7.5 , 9 , 10.7 , 12.8 , 15.1 , 16 , 18.5 , 19.4 , 18.4 , 19.7 , 21.8 , 21.7 ]
3.2.2 Defining a polynomial regression model
model = poly_regressor( 2 )
3.2.3 Training a polynomial regression model
model. train( train_x, train_y)
3.2.4 Model Visualization
model. show( )
3.2.5 Model Prediction
x = 40
pred_y = model. predict( x)
print ( pred_y)
3.2.6 The influence of the number of models on the fitting effect
model = poly_regressor( 3 )
model. train( train_x, train_y)
model. show( )
model = poly_regressor( 10 )
model. train( train_x, train_y)
model. show( )
model = poly_regressor( 30 )
model. train( train_x, train_y)
model. show( )
3.2.7 The influence of abnormal data on the fitting effect
train_y[ 5 ] = 30
model = poly_regressor( 2 )
model. train( train_x, train_y)
model. show( )
model = poly_regressor( 30 )
model. train( train_x, train_y)
model. show( )
3.3 Linear regression model evaluation and test set
3.3.1 Training a linear model
train_x = [ 27 , 29 , 34 , 40 , 42 , 47 , 48 , 49 , 50 , 52 , 52 , 52 , 54 ]
train_y = [ 6 , 7.5 , 9 , 10.7 , 12.8 , 15.1 , 16 , 18.5 , 19.4 , 18.4 , 19.7 , 21.8 , 21.7 ]
model = linear_regressor( )
model. train( train_x, train_y)
model. show( )
3.3.2 Define the error function
def mse_error ( pred, y) :
error = 0
for i in range ( len ( pred) ) :
error = error + ( y[ i] - pred[ i] ) ** 2
error = error / len ( pred)
return error
print ( mse_error( train_x, train_y) )
3.3.3 Calculation of fitting error
pred_y = model. predict( train_x)
error = mse_error( pred_y, train_y)
print ( error)
3.3.4 Write the error calculation process into the function
def compute_error ( model, x, y) :
pred = model. predict( x)
error = mse_error( pred, y)
return error
3.3.5 Model Comparison
model2 = poly_regressor( 3 )
model2. train( train_x, train_y)
model2. show( )
print ( compute_error( model2, train_x, train_y) )
model3 = poly_regressor( 30 )
model3. train( train_x, train_y)
model3. show( )
print ( compute_error( model3, train_x, train_y) )
3.3.6 Training set, test set and overfitting problem
test_x = [ 23 , 31 , 32 , 38 , 40 , 45 , 49 , 50 , 50 , 51 , 51 , 53 , 55 ]
test_y = [ 6.3 , 7.2 , 9.1 , 10.5 , 12.9 , 15.5 , 15.9 , 18.3 , 19.7 , 18.9 , 19.3 , 21.3 , 22.1 ]
print ( "线性回归误差:" , compute_error( model, test_x, test_y) )
print ( "3次多项式误差:" , compute_error( model2, test_x, test_y) )
print ( "30次多项式误差:" , compute_error( model3, test_x, test_y) )
3.4 Linear Classification Prediction Performance
3.4.1 Reading data
train_x = [ 60 , 56 , 60 , 55 , 60 , 57 , 65 , 60 , 62 , 59 , 43 , 52 , 41 , 45 , 43 , 50 , 46 , 52 , 56 , 56 ]
train_y = [ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 ]
3.4.2 Defining a linear classification model
model = linear_classifier( )
3.4.3 Training a linear classification model
model. train( train_x, train_y)
3.4.4 Model Visualization
model. show( )
3.4.5 Model application - predict() function
x = 60
pred_y = model. predict( x)
print ( pred_y)
3.4.6 Parameters in the model
weights = model. get_weights( )
print ( weights)
k = weights[ 0 ]
b = weights[ 1 ]
print ( "k=" , k)
print ( "b=" , b)
3.4.7 Define a linear classification function
def decision_function ( x, k, b) :
if k * x + b > 0 :
return 1
else :
return - 1
pred = decision_function( 3 , 2 , - 5.5 )
print ( pred)
pred = decision_function( 3 , 2 , - 6.5 )
print ( pred)
3.4.8 Prediction using linear classification functions
k, b = model. get_weights( )
x = 60
pred_y = decision_function( x, k, b)
print ( pred_y)
3.4.9 Accuracy calculation
def accuracy ( pred, y) :
right = 0
total = 0
for i in range ( len ( pred) ) :
if pred[ i] == y[ i] :
right += 1
total += 1
acc = right / total
return acc
pred_y = model. predict( train_x)
acc = accuracy( pred_y, train_y)
print ( acc)
3.4.10 Comparison of Linear Classification and Linear Regression
model2 = linear_regressor( )
model2. train( train_x, train_y)
model2. show( )
3.5 Using height and weight to predict gender
3.5.1 Read data
train_x_m = [ [ 163 , 60 ] , [ 164 , 56 ] , [ 165 , 60 ] , [ 168 , 55 ] , [ 169 , 60 ] , [ 170 , 57 ] , [ 170 , 65 ] , [ 171 , 60 ] , [ 170 , 62 ] , [ 169 , 59 ] , [ 153 , 43 ] , [ 158 , 52 ] , [ 156 , 41 ] , [ 158 , 45 ] , [ 159 , 43 ] , [ 160 , 50 ] , [ 159 , 46 ] , [ 158 , 52 ] , [ 157 , 56 ] , [ 158 , 55 ] , [ 167 , 53 ] , [ 168 , 52 ] , [ 163 , 65 ] , [ 171 , 52 ] , [ 169 , 52 ] , [ 170 , 57 ] , [ 170 , 60 ] , [ 168 , 52 ] , [ 166 , 60 ] , [ 165 , 51 ] , [ 153 , 43 ] , [ 158 , 55 ] , [ 156 , 41 ] , [ 156 , 57 ] , [ 159 , 43 ] , [ 163 , 41 ] , [ 162 , 56 ] , [ 155 , 52 ] , [ 152 , 56 ] , [ 153 , 55 ] ]
train_x_s = [ 60 , 56 , 60 , 55 , 60 , 57 , 65 , 60 , 62 , 59 , 43 , 52 , 41 , 45 , 43 , 50 , 46 , 52 , 56 , 55 , 53 , 52 , 65 , 52 , 52 , 57 , 60 , 52 , 60 , 51 , 43 , 55 , 41 , 57 , 43 , 41 , 56 , 52 , 56 , 55 ]
train_y = [ 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 , - 1 ]
test_x_m = [ [ 166 , 58 ] , [ 162 , 56 ] , [ 178 , 66 ] , [ 153 , 50 ] , [ 140 , 60 ] , [ 160 , 55 ] ]
test_x_s = [ 58 , 56 , 66 , 50 , 60 , 55 ]
test_y = [ 1 , 1 , 1 , - 1 , - 1 , - 1 ]
3.5.2 Data Visualization
visualize_data2D( train_x_s, train_y)
visualize_data3D( train_x_m, train_y)
3.5.3 Multivariate Classification Training and Validation
model = linear_classifier( )
model. train( train_x_m, train_y)
pred_y = model. predict( test_x_m)
acc = accuracy( pred_y, test_y)
print ( acc)
3.5.4. Multivariate classifiers vs Univariate classifiers
model2 = linear_classifier( )
model2. train( train_x_s, train_y)
red_y = model2. predict( test_x_s)
acc = accuracy( pred_y, test_y)
print ( acc)
3.6 Classification of iris flowers using perceptrons
3.6.1 Loading the database
iris = data. get( 'iris-simple' )
3.6.2 Display database
fig( ) + plot( iris)
3.6.3 Create a classifier
blc = binary_linear_classifier( )
3.6.4 Training a classifier
blc. train( iris, alg= Perceptron( ) )
3.6.5 Setting the learning rate of the perceptron
blc1 = binary_linear_classifier( )
blc1. train( iris, alg= Perceptron( lr= 0.4 ) )
blc2 = binary_linear_classifier( )
blc2. train( iris, alg= Perceptron( lr= 0.05 ) )
blc3 = binary_linear_classifier( )
blc3. train( iris, alg= Perceptron( w= [ 1 , 1 ] , b= 1 ) )
3.6.6 Comparing training results
fig( ) + plot( iris) + plot( blc) + plot( blc1) + plot( blc2)
3.7 Using support vector machine to complete iris classification
3.7.1 Loading the dataset
iris= data. get( 'iris-simple' )
fig( ) + plot( iris)
3.7.2 Create a classifier
blc = binary_linear_classifier( )
3.7.3 Training a Classifier with Support Vector Machines
blc. train( iris, alg= SVM( ) )
fig( ) + plot( iris) + plot( blc)
3.8 Test and application of classifier
3.8.1 Loading the dataset
iris= data. get( 'iris-simple' )
fig( ) + plot( iris)
3.8.2 Data Segmentation
iris_train, iris_test = iris. split( 7 , 3 )
fig( ) + plot( iris_train)
3.8.3 Get classifier
blc1= binary_linear_classifier( )
blc2= binary_linear_classifier( )
blc1. train( iris_train, alg= Perceptron( lr= 0.2 ) )
blc2. train( iris_train, alg= SVM( ) )
fig( ) + plot( iris_train) + plot( blc1) + plot( blc2)
3.8.4 Testing the classifier
acc1 = blc1. accuracy( iris_test)
acc2 = blc2. accuracy( iris_test)
print ( 'Perceptron Accuracy:' , acc1)
print ( 'SVM Accuracy:' , acc2)
3.8.5 Classifier application
point = [ 2 , 0.7 ]
fig( ) + plot( iris) + plot( blc1) + plot( blc2) + plot( [ point] )
label1 = blc1. predict( point)
label2 = blc2. predict( point)
print ( 'Perceptron Prediction: ' , label1)
print ( 'SVM Prediction: ' , label2)
3.9 Understanding the K-means algorithm
3.9.1 Get Dataset
iris = data. get( 'iris' )
feature, label = iris[ 0 ]
print ( "Feature : " , feature)
3.9.2 Dataset feature selection
def select_features ( feature) :
return feature[ 2 : 4 ]
iris2 = iris. map ( select_features, on_field= 0 )
fig( ) + plot( iris2, type = 'scatter' )
3.9.3 Create K-means clustering model
model = KMeans( K= 3 )
3.9.4 Model training and observe the training effect
def select_features(feature):
return feature[2:4]
iris2 = iris.map(select_features, on_field=0)
model.train(iris2.field(0))
fig() + plot(model, iris2, type='cluster_statistics')
3.9.5 Repeat and compare
model2 = KMeans( K= 3 )
model2. train( iris. field( 0 ) )
fig( ) + plot( model2, iris, type = 'cluster_statistics' )