Scikit-Learn

Scikit-Learn Assignment

这里写图片描述
这里写图片描述

Assignment

from sklearn import datasets
from sklearn import cross_validation
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

def DataAnalysis():
    """Followed by steps"""

    iris = datasets.load_iris()

    # Create a classification dataset (n_samples >= 1000, n_features >= 10)
    dataset = datasets.make_classification(n_samples = 1000, n_features = 10,
        n_informative = 2, n_redundant = 2, n_repeated = 0, n_classes = 2)

    print ("dataset information")
    # dataset description
    print (iris.DESCR)
    # data examples (features)
    print (iris.data)
    # data target labels (classes)
    print (iris.target)

    # Split the dataset using 10-fold cross validation
    kf = cross_validation.KFold(len(iris.data), n_folds = 10, shuffle = True)
    for train_index, test_index in kf:
        X_train, y_train = iris.data[train_index], iris.target[train_index]
        X_test, y_test = iris.data[test_index], iris.target[test_index]

    print ("\nsplit the dataset")
    print (X_train)
    print (y_train)
    print (X_test)
    print (y_test)

    # GaussianNB
    clf = GaussianNB()
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print ("\nGaussianNB")
    print (pred)
    print (y_test)

    # SVC
    clf = SVC(C = 1e-02, kernel = 'rbf', gamma = 0.1)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print ("\nSVC")
    print (pred)
    print (y_test)

    # RandomForestClassifier
    clf = RandomForestClassifier(n_estimators = 100)
    clf.fit(X_train, y_train)
    pred = clf.predict(X_test)
    print ("\nRandomForestClassifier")
    print (pred)
    print (y_test)

    # Performance evaluation
    acc = metrics.accuracy_score(y_test, pred)
    print ("\nAccuracy")
    print (acc)
    f1 = metrics.f1_score(y_test, pred, average = "weighted")
    print ("\nF1-score")
    print (f1)
    auc = metrics.roc_auc_score(y_test, pred)
    print ("\nAUC ROC")
    print (auc)

DataAnalysis()

Result of the Assignment
Attention: only part of the results are displayed

dataset information

这里写图片描述
这里写图片描述

split the dataset

这里写图片描述

Algorithm and Evaluation

这里写图片描述

猜你喜欢

转载自blog.csdn.net/abyssalseaa/article/details/80687671