Several iris data classification model training set

Training of iris data in several common classification algorithms, and Lee K-fold cross validation to evaluate

K-fold cross-validation : sklearn.model_selection.KFold (n_splits = k, shuffle = False, random_state = None)

Ideas: a training / test data set into n_splits disjoint subsets, with each subset wherein a validation set as the remaining n_splits-1 as a training set for training and testing times n_splits, results obtained n_splits

Parameters:
n_splits: dividing showing several aliquots
shuffle: dividing each time, whether or not shuffle
① If falses is, its effect is equivalent to an integer random_state, the same result each time division
② If is True, each the results are not the same division, it expressed through the reshuffle, random sampling
random_state: random number seed

Data Set : iris (in this example from a local file)

Code


import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold


from sklearn import tree
from sklearn import naive_bayes
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier


#读取数据
data = pd.read_csv('iris.csv',header=None)
data.columns = ["花萼长度","花萼宽度","花瓣长度","花瓣宽度","category"]

X = data.iloc[:,0:4]
Y = data.iloc[:,4]


k = 10
kf = KFold(n_splits=k, shuffle=True)

def eval_model(model_name,model):
    accuracies = []
    i=0
    for train_index, test_index in kf.split(data): #拆分
        x_train, x_test = X.loc[train_index] ,X.loc[test_index]
        y_train, y_test = Y.loc[train_index] ,Y.loc[test_index]
        
        model.fit(x_train,y_train) #训练
        y_predict = model.predict(x_test) #预测
        
        accuracy = accuracy_score(y_pred=y_predict,y_true=y_test) #精度
        accuracies.append(accuracy)
        i+=1
        print('第{}轮: {}'.format(i,accuracy))
        
    print(model_name+"模型精度: ",np.mean(accuracies))
    
    
models={
        'decision tree':lambda:tree.DecisionTreeClassifier(),
        'random forest':lambda:RandomForestClassifier(n_estimators=100),
        'naive bayes':lambda:naive_bayes.GaussianNB(),
        'svm':lambda:svm.SVC(gamma='scale'),
        'GBDT':lambda:GradientBoostingClassifier(),
        'MLP':lambda:MLPClassifier(max_iter=1000),        
        }


for name,m in models.items():
    eval_model(name,m())


Guess you like

Origin blog.csdn.net/d1240673769/article/details/88817833