# SkLearn习题

题目

这里写图片描述

代码

from sklearn import cross_validation
from sklearn import datasets
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier 


dataset = datasets.make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=2, n_repeated=0, n_classes=2)
X,y = dataset[0],dataset[1]
case = 1
kf = cross_validation.KFold(len(X), n_folds=10, shuffle=True) 
for train_index, test_index in kf:
    X_train, y_train = X[train_index], y[train_index]
    X_test, y_test = X[test_index], y[test_index]

    #第i组
    print("Case:%d"%case)

    #GaussianNB
    print("GaussianNB:")
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    Accuracy = metrics.accuracy_score(y_test, pred)
    F1_score = metrics.f1_score(y_test, pred)
    print("Accuracy=", metrics.accuracy_score(y_test, pred))
    print("F1_score=", metrics.f1_score(y_test, pred))
    print("AUC ROC=",metrics.roc_auc_score(y_test, pred))

    #SVC
    print("SVC:")
    clf = SVC(C=1e-01, kernel='rbf', gamma=0.1)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("Accuracy=", metrics.accuracy_score(y_test, pred))
    print("F1_score=", metrics.f1_score(y_test, pred))
    print("AUC ROC=",metrics.roc_auc_score(y_test, pred))

    #随机森林
    print("Random Forest:")
    clf = RandomForestClassifier(n_estimators=6)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print("Accuracy=", metrics.accuracy_score(y_test, pred))
    print("F1_score=", metrics.f1_score(y_test, pred))
    print("AUC ROC=",metrics.roc_auc_score(y_test, pred))

    case += 1

结果

Case:1
GaussianNB:
Accuracy= 0.92
F1_score= 0.9333333333333333
AUC ROC= 0.9136006614303431
SVC:
Accuracy= 0.94
F1_score= 0.9464285714285715
AUC ROC= 0.9491525423728814
Random Forest:
Accuracy= 0.92
F1_score= 0.9310344827586206
AUC ROC= 0.9210417527904092
Traceback (most recent call last):
File “.\homework.py”, line 49, in
count += 1
NameError: name ‘count’ is not defined
PS F:\CODE\Python> python .\homework.py
F:\Python\lib\site-packages\sklearn\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
“This module will be removed in 0.20.”, DeprecationWarning)
Case:1
GaussianNB:
Accuracy= 0.82
F1_score= 0.823529411764706
AUC ROC= 0.8382594417077175
SVC:
Accuracy= 0.85
F1_score= 0.854368932038835
AUC ROC= 0.8674055829228243
Random Forest:
Accuracy= 0.85
F1_score= 0.8598130841121495
AUC ROC= 0.8608374384236455
Case:2
GaussianNB:
Accuracy= 0.83
F1_score= 0.8
AUC ROC= 0.8251733986128111
SVC:
Accuracy= 0.86
F1_score= 0.8333333333333333
AUC ROC= 0.8543451652386782
Random Forest:
Accuracy= 0.91
F1_score= 0.891566265060241
AUC ROC= 0.9039167686658507
Case:3
GaussianNB:
Accuracy= 0.86
F1_score= 0.851063829787234
AUC ROC= 0.858974358974359
SVC:
Accuracy= 0.89
F1_score= 0.8791208791208791
AUC ROC= 0.8878205128205129
Random Forest:
Accuracy= 0.89
F1_score= 0.8817204301075269
AUC ROC= 0.8886217948717948
Case:4
GaussianNB:
Accuracy= 0.9
F1_score= 0.8913043478260869
AUC ROC= 0.8982371794871794
SVC:
Accuracy= 0.91
F1_score= 0.9010989010989011
AUC ROC= 0.9078525641025641
Random Forest:
Accuracy= 0.89
F1_score= 0.8791208791208791
AUC ROC= 0.8878205128205129
Case:5
GaussianNB:
Accuracy= 0.87
F1_score= 0.8686868686868686
AUC ROC= 0.8737454837414693
SVC:
Accuracy= 0.89
F1_score= 0.8910891089108911
AUC ROC= 0.8926134082697712
Random Forest:
Accuracy= 0.92
F1_score= 0.9259259259259259
AUC ROC= 0.918506623845845
Case:6
GaussianNB:
Accuracy= 0.82
F1_score= 0.830188679245283
AUC ROC= 0.8246753246753247
SVC:
Accuracy= 0.86
F1_score= 0.8679245283018867
AUC ROC= 0.8652597402597402
Random Forest:
Accuracy= 0.87
F1_score= 0.8807339449541285
AUC ROC= 0.8717532467532467
Case:7
GaussianNB:
Accuracy= 0.88
F1_score= 0.8695652173913043
AUC ROC= 0.8787515006002401
SVC:
Accuracy= 0.89
F1_score= 0.8817204301075268
AUC ROC= 0.8889555822328932
Random Forest:
Accuracy= 0.88
F1_score= 0.875
AUC ROC= 0.8795518207282913
Case:8
GaussianNB:
Accuracy= 0.91
F1_score= 0.8941176470588235
AUC ROC= 0.9037842190016104
SVC:
Accuracy= 0.91
F1_score= 0.8965517241379309
AUC ROC= 0.9053945249597424
Random Forest:
Accuracy= 0.91
F1_score= 0.898876404494382
AUC ROC= 0.9070048309178743
Case:9
GaussianNB:
Accuracy= 0.86
F1_score= 0.8409090909090908
AUC ROC= 0.8565656565656564
SVC:
Accuracy= 0.88
F1_score= 0.8636363636363636
AUC ROC= 0.8767676767676769
Random Forest:
Accuracy= 0.82
F1_score= 0.8125
AUC ROC= 0.8242424242424242
Case:10
GaussianNB:
Accuracy= 0.84
F1_score= 0.84
AUC ROC= 0.8484848484848486
SVC:
Accuracy= 0.85
F1_score= 0.8514851485148516
AUC ROC= 0.8575757575757577
Random Forest:
Accuracy= 0.88
F1_score= 0.8846153846153846
AUC ROC= 0.8848484848484849

分析

对于相同的数据集,随机森林算法更好。

猜你喜欢

转载自blog.csdn.net/qq_39178023/article/details/80724887