SVM model application (3) Improve the prediction effect of SVM model through data normalization

import numpy as np
from sklearn import svm
from sklearn.linear_model import LogisticRegression

my_matrix=np.loadtxt("E:\\pima-indians-diabetes.txt",delimiter=",",skiprows=0) 

lenth_x=len(my_matrix[0])

data_y=my_matrix[:,lenth_x-1]

data_x=my_matrix[:,0:lenth_x-1]
print(data_x[0:2],len(data_x[0]),len(data_x))
data_shape=data_x.shape
data_rows=data_shape[0]
data_cols=data_shape[1]

data_col_max=data_x.max(axis=0)#获取二维数组列向最大值
data_col_min=data_x.min(axis=0)#获取二维数组列向最小值
for i in xrange(0, data_rows, 1):#将输入数组归一化
    for j in xrange(0, data_cols, 1):
        data_x[i][j] = \
            (data_x[i][j] - data_col_min[j]) / \
            (data_col_max[j] - data_col_min[j])
print(data_x[0:2])
(array([[   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
           0.627,   50.   ],
       [   1.   ,   85.   ,   66.   ,   29.   ,    0.   ,   26.6  ,
           0.351,   31.   ]]), 8, 768)
[[ 0.35294118  0.74371859  0.59016393  0.35353535  0.          0.50074516
   0.23441503  0.48333333]
 [ 0.05882353  0.42713568  0.54098361  0.29292929  0.          0.39642325
   0.11656704  0.16666667]]
n_train=int(len(data_y)*0.7)#选择70%的数据作为训练集,30%的数据作为测试集

X_train=data_x[:n_train]
y_train=data_y[:n_train]
X_test=data_x[n_train:]
y_test=data_y[n_train:]

clf1=svm.SVC()#模型1选择SVM经典模型
clf1.fit(X_train,y_train)
clf2=LogisticRegression()#模型2选择逻辑回归
clf2.fit(X_train,y_train)


y_predictions1=clf1.predict(X_test)
y_predictions2=clf2.predict(X_test)

print(y_predictions1)
print(y_predictions2)
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.
  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  1.  0.  0.
  0.  0.  1.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.
  1.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  0.  0.  0.  0.  0.  1.  0.  0.  0.  1.  0.  0.  1.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
  0.  0.  0.  0.  0.  1.  0.  0.  1.  1.  0.  0.  0.  1.  0.  0.  0.  0.
  1.  1.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.  0.]
[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  1.
  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  1.  0.  0.
  0.  0.  1.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.  1.  1.
  1.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  1.  1.  0.  0.  0.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  1.  0.  0.  0.  0.  1.  0.  0.  0.  1.  0.  0.  1.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
  0.  0.  0.  0.  0.  1.  0.  0.  1.  1.  0.  0.  0.  1.  0.  0.  0.  0.
  1.  1.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.  0.]
k,h=0,0
for i in range(len(y_test)):
    if y_predictions1[i]==y_test[i]:
        k+=1
for i in range(len(y_test)):
    if y_predictions2[i]==y_test[i]:
        h+=1 
print(k,h)
(177, 181)
accuracy_svm=float(k)/float(len(y_test))
accuracy_LogR=float(h)/float(len(y_test))
print"The accuracy of SVM is %f, and the accuracy of LogisticRegression is %f"%(accuracy_svm,accuracy_LogR)
The accuracy of SVM is 0.766234, and the accuracy of LogisticRegression is 0.783550

Through the actual prediction effect test, the normalization of the data improves the prediction effect of the SVM model, which is basically close to the logistic regression model. In the future, the prediction effect will be improved by selecting the hyperparameters of the SVM model.

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325763620&siteId=291194637
svm