import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from pandas.plotting import scatter_matrix
from sklearn.datasets import load_iris
#第四部分,特征工程
array = iris_d.values
X = array[:,0:4]
Y = array[:,4]#选取模型的特征,本模型选择80%数据量作为训练数据,20%作为测试数据
x_train, x_test, y_train, y_test = model_selection.train_test_split(X, Y, test_size=0.2, random_state=7)
#第五部分,机器学习模型和评估#K近邻(KNN)
model = KNeighborsClassifier()
model.fit(x_train,y_train)
predictions = model.predict(x_test)#模型评估,交叉验证print(accuracy_score(y_test, predictions))
0.9
#机器学习模型#支持向量机(SVM)
model = SVC()
model.fit(x_train,y_train)
predictions = model.predict(x_test)#模型评估,交叉验证print(accuracy_score(y_test, predictions))
0.8666666666666667
#机器学习模型#随机森林(RF)
model = RandomForestClassifier()
model.fit(x_train,y_train)
predictions = model.predict(x_test)#模型评估,交叉验证print(accuracy_score(y_test, predictions))
0.8666666666666667
#机器学习模型#逻辑回归(LR)
model = LogisticRegression()
model.fit(x_train,y_train)
predictions = model.predict(x_test)#模型评估,交叉验证print(accuracy_score(y_test, predictions))
0.8666666666666667
d:\program files\python3.7\lib\site-packages\sklearn\linear_model\_logistic.py:765: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)