在iris上 对比机器学习效率

# coding: utf-8
"""
在iris上 对比机器学习效率
"""

import numpy as np
import pandas as pd
import lightgbm as lgb
# SVC向量机
from sklearn.ensemble import RandomForestClassifier
# 训练测试数据分割
# 准确率
# from keras.utils import to_categorical
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import matplotlib as mpl
from xgboost import XGBClassifier
from sklearn import naive_bayes
# MultiLabelBinarizer
# LabelBinarizer
mpl.rcParams['font.sans-serif'] = [u'SimHei']
mpl.rcParams['axes.unicode_minus'] = False
encoder = LabelEncoder()
# lb = LabelBinarizer()


def load_data():
raw_data = np.loadtxt("./iris.txt", dtype='str', encoding='utf-8')
x, y = list(), list()
for index in range(len(raw_data)):
x.append([float(i) for i in raw_data[index].split(",")[:-1]])
y.append(raw_data[index].split(",")[-1])
x = np.array(x)
# y = encoder.fit_transform(_y)
# print(lb.fit_transform(y))
y = pd.Categorical(y).codes
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=33)
return x_train, x_test, y_train, y_test


if __name__ == '__main__':
x_train, x_test, y_train, y_test = load_data()
forest = RandomForestClassifier(n_estimators=10)
clf1 = forest.fit(x_train, y_train)
svm = SVC(C=1, kernel='poly')
clf2 = svm.fit(x_train, y_train)
model = XGBClassifier()
clf3 = model.fit(x_train, y_train)
lgb_train = lgb.Dataset(x_train, y_train)
params = {'max_depth': 6, 'min_data_in_leaf': 20, 'num_leaves': 35, 'learning_rate': 0.1, 'lambda_l1': 0.1,
'lambda_l2': 0.2, 'objective': 'multiclass', 'num_class': 50, 'verbose': -1}
clf4 = lgb.train(params, lgb_train, num_boost_round=1000, verbose_eval=100)

nb = naive_bayes.MultinomialNB(alpha=1.0, fit_prior=True, class_prior=None)
clf5 = nb.fit(x_train, y_train)
mlp = MLPClassifier(activation='relu', solver='adam', alpha=0.0001)
clf6 = mlp.fit(x_train, y_train)
# classfier = bayes_classfier.navie_bayes_classifier()
x_plot = ["forest", "svm", "xg", "lgb", "nb", "mlp"]
clf1_accuracy = accuracy_score(y_test, clf1.predict(x_test))
clf2_accuracy = accuracy_score(y_test, clf2.predict(x_test))
clf3_accuracy = accuracy_score(y_test, clf3.predict(x_test))
result = clf4.predict(x_test, num_iteration=clf4.best_iteration)
clf4_accuracy = accuracy_score(y_test, np.argmax(result, axis=1))
clf5_accuracy = accuracy_score(y_test, clf5.predict(x_test))
clf6_accuracy = accuracy_score(y_test, clf6.predict(x_test))
print(clf5.predict(x_test))
y_plot = [clf1_accuracy, clf2_accuracy, clf3_accuracy, clf4_accuracy, clf5_accuracy, clf6_accuracy]
print(clf1_accuracy, clf2_accuracy, clf3_accuracy, clf4_accuracy, clf5_accuracy, clf6_accuracy)
plt.bar(x_plot, y_plot, label=u'测试集准确率')
plt.legend()
plt.show()

from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf6, x_test, y_test, cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
# print(scores)

猜你喜欢

转载自www.cnblogs.com/xiennnnn/p/12155350.html