Machine learning - more than simple classification (containing tree, naive Bayes, random forests, svm SVM) including source code self-study guide

Directly to the code:

  1 # -- coding: gbk --
  2 from sklearn.datasets import load_breast_cancer
  3 from sklearn.tree import DecisionTreeClassifier
  4 from sklearn.model_selection import  train_test_split
  5 from sklearn.tree import export_graphviz
  6 import pandas as pd
  7 import graphviz
  8 import mglearn
  9 from sklearn.ensemble import RandomForestClassifier
 10 from sklearn.datasets import make_moons
 11 from sklearn.ensemble import GradientBoostingClassifier
 12 from sklearn.svm import SVC
 13 from pylab import *
 14 def 决策树():
 15     cancer = load_breast_cancer()
 16     X_train, X_test, y_train, y_test = train_test_split(
 17         cancer.data, cancer.target, stratify=cancer.target, random_state=42)
 18     tree = DecisionTreeClassifier(random_state=0)
 19     print(X_train)
 20     print(y_train.shape)
 21     tree.fit(X_train, y_train)
 22     y_predict=tree.predict(X_test)
 23     print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
 24     print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))
 25     '''
 26     export_graphviz(tree, out_file="tree.dot", class_names=["malignant", "benign"], feature_names=cancer.feature_names,
 27                     impurity=False, filled=True)
 28 
 29     Open with ( "tree.dot") AS F:
 30          dot_graph reached, f.read = ()
 31 is      graphviz.Source (dot_graph)
 32      '' ' 
33 is      Print ( " feature key: \ n-{} " .format (tree.feature_importances_ ))
 34 is  
35  DEF random forest ():
 36      X-, Y = make_moons (N_SAMPLES = 100, Noise = 0.25, random_state =. 3 )
 37 [      X_train, X_test, y_train, android.permission.FACTOR. = train_test_split (X-, Y, Stratify = Y, random_state = 42 )
 38      '' ' five random forests ' '' 
39      forest = RandomForestClassifier (n_estimators =. 5,random_state=2)
 40     forest.fit(X_train, y_train)
 41     y_pred=forest.predict(X_test)
 42     print(y_pred)
 43     print(y_test)
 44     print(np.mean(y_test==y_pred ))
 45     fig, axes = plt.subplots(2, 3, figsize=(20, 10))
 46     for i, (ax, tree) in enumerate(zip(axes.ravel(), forest.estimators_)):    ax.set_title("Tree {}".format(i))
 47     mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax)
 48     mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4)
 49     axes[-1, -1].set_title("Random Forest")
 50     mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
 51 
 52 def 梯度提升树():
 53     cancer = load_breast_cancer()
 54     X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
 55     #gbrt = GradientBoostingClassifier(random_state=0, max_depth=1)
 56     gbrt = GradientBoostingClassifier(random_state=0,  learning_rate=0.01)
 57 is      gbrt.fit (X_train, y_train)
 58      y_pred = gbrt.predict (X_test)
 59      Print (y_pred)
 60      Print (np.mean (y_pred == android.permission.FACTOR.))
 61 is  
62 is  DEF the SVM Easy Vector ():
 63 is      X-, = Y mglearn.tools.make_handcrafted_dataset ()
 64      '' ' 
65      Gamma parameter is the formula of a given parameter, for controlling the width of the Gaussian kernel. It determines the dots between "close" refers to how much distance.
66      C parameter is the regularization parameter, similar to the linear model used. It limits the importance of each point (or, more precisely, dual_coef_ each point).
67  
68      '' ' 
69      SVM = the SVC (Kernel = ' RBF ' , C = 10, Gamma = 0.1).fit(X, y)
 70     mglearn.plots.plot_2d_separator(svm, X, eps=.5)
 71     mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
 72     # 画出支持向量
 73     sv = svm.support_vectors_
 74     sv_labels = svm.dual_coef_.ravel() > 0
 75     mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3)
 76     plt.xlabel("Feature 0")
 77     plt.ylabel("Feature 1")
 78     plt.show()
 79 
 80 def preprocessing Vector Data ():
 81      Cancer = load_breast_cancer ()
 82      X_train, X_test, y_train, android.permission.FACTOR. = train_test_split (cancer.data, cancer.target, random_state = 0)
 83      SVC = the SVC ()
 84      svc.fit (X_train , y_train)
 85      y_pred = svc.predict (X_test)
 86      Print (np.mean (y_pred == android.permission.FACTOR.))
 87  
88      '' ' pretreatment - zoom ' '' 
89      min_on_training = X_train.min (Axis = 0)
 90      = range_on_training (X_train - min_on_training) .max (Axis = 0)
 91 is  
92     X_train_scaled = (X_train - min_on_training) / range_on_training
 93     X_test_scaled = (X_test - min_on_training) / range_on_training
 94     print("Minimum for each feature\n{}".format(X_train_scaled.min(axis=0)))
 95     print("Maximum for each feature\n {}".format(X_train_scaled.max(axis=0)))
 96 
 97     '''变换'''
 98     X_test_scaled = (X_test - min_on_training) / range_on_training
 99     svc = SVC()
100     svc.fit (X_train_scaled, y_train)
 101      y_pred = svc.predict (X_test_scaled)
 102      Print (np.mean (y_pred == android.permission.FACTOR.))
 103  IF  the __name__ == ' __main__ ' :
 104      preprocessing Vector data ()

 

Guess you like

Origin www.cnblogs.com/smartisn/p/12578560.html