[詳細なアプリケーション]・sklearn機械学習乳がん識別更新(ロジスティック回帰、SGDClassifier比較)

コード:

from sklearn.datasets import load_breast_cancer
from sklearn.cross_validation import train_test_split as tsplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.metrics import classification_report as crt
import numpy as np
import pandas as pd
import time

breast_cancer = load_breast_cancer()


X_train, X_test, Y_train, Y_test = tsplit(breast_cancer.data,breast_cancer.target,test_size=0.2,random_state=1)


sts = StandardScaler()

X_train_sts = sts.fit_transform(X_train)
X_test_sts = sts.transform(X_test)
print(X_train_sts.shape,X_test_sts.shape)

lr = LogisticRegression()
sgdc = SGDClassifier()


ts1 = time.time()
lr.fit(X_train_sts,Y_train)
te1 = time.time()
print(te1-ts1)

ts2 = time.time()
sgdc.fit(X_train_sts,Y_train)
te2 = time.time()
print(te2-ts2)

score1 = lr.score(X_test_sts,Y_test)
score2 = sgdc.score(X_test_sts,Y_test)

print(score1,score2)

lr_pre1 = lr.predict(X_test_sts)

socres1 = crt(Y_test,lr_pre1,target_names=["0","1"])
print(socres1)

lr_pre2 = sgdc.predict(X_test_sts)
socres2 = crt(Y_test,lr_pre2,target_names=["0","1"])
print(socres2)
复制代码

アウト:

(455, 30) (114, 30)

0.004028797149658203

0.0019457340240478516
0.9824561403508771 0.9736842105263158
             precision    recall  f1-score   support

          0       1.00      0.95      0.98        42
          1       0.97      1.00      0.99        72

avg / total       0.98      0.98      0.98       114

             precision    recall  f1-score   support

          0       1.00      0.93      0.96        42
          1       0.96      1.00      0.98        72

avg / total       0.97      0.97      0.97       114
复制代码

 

おすすめ

転載: juejin.im/post/7000406362307952671