SVM模型应用（二）通过皮马印第安人糖尿病预测问题对逻辑回归和SVC模型的比较

import numpy as np
from sklearn import svm
from sklearn.linear_model import LogisticRegression

my_matrix=np.loadtxt("E:\\pima-indians-diabetes.txt",delimiter=",",skiprows=0)

lenth_x=len(my_matrix[0])

data_y=my_matrix[:,lenth_x-1]

data_x=my_matrix[:,0:lenth_x-1]
print(data_x[0],len(data_x[0]),len(data_x))

(array([   6.   ,  148.   ,   72.   ,   35.   ,    0.   ,   33.6  ,
          0.627,   50.   ]), 8, 768)

n_train=int(len(data_y)*0.7)

X_train=data_x[:n_train]
y_train=data_y[:n_train]
X_test=data_x[n_train:]
y_test=data_y[n_train:]

clf1=svm.SVC()
clf1.fit(X_train,y_train)
clf2=LogisticRegression()
clf2.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

y_predictions1=clf1.predict(X_test)
y_predictions2=clf2.predict(X_test)

print(y_predictions1)

[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]

print(y_predictions2)

[ 0.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  1.
  0.  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  1.  1.  0.  1.  0.  0.
  0.  0.  1.  1.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  1.  1.  1.
  1.  0.  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.
  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  1.  0.  0.  1.  0.  0.
  1.  0.  0.  1.  1.  0.  0.  0.  0.  1.  0.  0.  0.  1.  0.  0.  1.  1.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.
  0.  0.  0.  0.  0.  1.  0.  0.  1.  1.  0.  1.  0.  1.  1.  1.  0.  0.
  1.  1.  0.  0.  0.  0.  1.  0.  1.  0.  0.  0.  0.  0.  0.]

print(y_test)

[ 0.  0.  1.  1.  1.  1.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
  0.  0.  0.  0.  1.  0.  1.  1.  0.  0.  0.  1.  0.  1.  0.  1.  0.  1.
  0.  1.  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  1.  1.  0.  1.  0.  0.
  0.  0.  1.  1.  0.  1.  0.  0.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  0.  1.  0.  0.  0.  1.  0.  0.
  0.  1.  1.  1.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  1.  0.  1.  1.
  1.  1.  0.  1.  1.  0.  0.  0.  0.  0.  0.  0.  1.  1.  0.  1.  0.  0.
  1.  0.  1.  0.  0.  0.  0.  0.  1.  0.  1.  0.  1.  0.  1.  1.  0.  0.
  0.  0.  1.  1.  0.  0.  0.  1.  0.  1.  1.  0.  0.  1.  0.  0.  1.  1.
  0.  0.  1.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  1.  1.  1.  0.  0.
  0.  0.  0.  0.  1.  1.  0.  0.  1.  0.  0.  1.  0.  1.  1.  1.  0.  0.
  1.  1.  1.  0.  1.  0.  1.  0.  1.  0.  0.  0.  0.  1.  0.]

k,h=0,0
for i in range(len(y_test)):
    if y_predictions1[i]==y_test[i]:
        k+=1
for i in range(len(y_test)):
    if y_predictions2[i]==y_test[i]:
        h+=1        
print(k,h)

(152, 183)

由于SVM用了经典模型，未对参数进行调整，导致预测结果出现了全为0的情况，后面将对SVM模型进行深入的学习，调整参数，以改善模型的效果

SVM模型应用（二）通过皮马印第安人糖尿病预测问题对逻辑回归和SVC模型的比较

猜你喜欢