机器学习过程

机器学习过程
1.加载数据集
from sklearn import databases#加载sklearn的数据集
databases.load_boston()#加载波士顿房价的数据集
databases.load_iris()#加载鸢尾花数据集
databases.load_digits()#加载手写识别数字的数据集
data.data为样本数据,data.target类别数据
2.数据分割
from sklearn.model_selection import train_test_split#引入分割数据方法
X_train,X_test,y_train,y_test = train_test_spilt(data.data,data.target,test_size=0.2,random_state=0)#
3.选择模型
(1).knn

from sklearn import neighbors
clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
(2).决策树

from sklearn import tree
clf = tree.DecisionTreeClassifier()
(3).k-means

from sklearn.cluster import KMeans
kmeans = KMeans(init=‘k-means++’, n_clusters=n_digits, n_init=10)
(4).朴素贝叶斯

from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
(5).线性回归

from sklearn import linear_model
reg = linear_model.LinearRegression()
4.模型的训练
model.fit(x_train,y_train)
5.模型的预测
model.predict(x_test)
6.模型的评测
(1).精度 precision_score
(2).召回率 recall_score
(3).F1得分 f1_score
(4).准确率 accuracy_score
from sklearn.metrics import precision_score,
recall_score,accuracy_score, f1_score
(5).交叉验证
from sklearn.model_selection import cross_val_score
scores = cross_val_score(svc,x,y,cv=10,scoring=’’)
7.模型的保存
from sklearn.externals import joblib
joblib.dump(svc, ‘D:/filename.pkl’)
svc1 = joblib.load(‘D:/filename.pkl’)
print(svc1.score(X_test, y_test))

猜你喜欢

转载自blog.csdn.net/m0_43432638/article/details/84979352
今日推荐