Directly to the code:
1 # -- coding: gbk -- 2 from sklearn.datasets import load_breast_cancer 3 from sklearn.tree import DecisionTreeClassifier 4 from sklearn.model_selection import train_test_split 5 from sklearn.tree import export_graphviz 6 import pandas as pd 7 import graphviz 8 import mglearn 9 from sklearn.ensemble import RandomForestClassifier 10 from sklearn.datasets import make_moons 11 from sklearn.ensemble import GradientBoostingClassifier 12 from sklearn.svm import SVC 13 from pylab import * 14 def 决策树(): 15 cancer = load_breast_cancer() 16 X_train, X_test, y_train, y_test = train_test_split( 17 cancer.data, cancer.target, stratify=cancer.target, random_state=42) 18 tree = DecisionTreeClassifier(random_state=0) 19 print(X_train) 20 print(y_train.shape) 21 tree.fit(X_train, y_train) 22 y_predict=tree.predict(X_test) 23 print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train))) 24 print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test))) 25 ''' 26 export_graphviz(tree, out_file="tree.dot", class_names=["malignant", "benign"], feature_names=cancer.feature_names, 27 impurity=False, filled=True) 28 29 Open with ( "tree.dot") AS F: 30 dot_graph reached, f.read = () 31 is graphviz.Source (dot_graph) 32 '' ' 33 is Print ( " feature key: \ n-{} " .format (tree.feature_importances_ )) 34 is 35 DEF random forest (): 36 X-, Y = make_moons (N_SAMPLES = 100, Noise = 0.25, random_state =. 3 ) 37 [ X_train, X_test, y_train, android.permission.FACTOR. = train_test_split (X-, Y, Stratify = Y, random_state = 42 ) 38 '' ' five random forests ' '' 39 forest = RandomForestClassifier (n_estimators =. 5,random_state=2) 40 forest.fit(X_train, y_train) 41 y_pred=forest.predict(X_test) 42 print(y_pred) 43 print(y_test) 44 print(np.mean(y_test==y_pred )) 45 fig, axes = plt.subplots(2, 3, figsize=(20, 10)) 46 for i, (ax, tree) in enumerate(zip(axes.ravel(), forest.estimators_)): ax.set_title("Tree {}".format(i)) 47 mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax) 48 mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4) 49 axes[-1, -1].set_title("Random Forest") 50 mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) 51 52 def 梯度提升树(): 53 cancer = load_breast_cancer() 54 X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0) 55 #gbrt = GradientBoostingClassifier(random_state=0, max_depth=1) 56 gbrt = GradientBoostingClassifier(random_state=0, learning_rate=0.01) 57 is gbrt.fit (X_train, y_train) 58 y_pred = gbrt.predict (X_test) 59 Print (y_pred) 60 Print (np.mean (y_pred == android.permission.FACTOR.)) 61 is 62 is DEF the SVM Easy Vector (): 63 is X-, = Y mglearn.tools.make_handcrafted_dataset () 64 '' ' 65 Gamma parameter is the formula of a given parameter, for controlling the width of the Gaussian kernel. It determines the dots between "close" refers to how much distance. 66 C parameter is the regularization parameter, similar to the linear model used. It limits the importance of each point (or, more precisely, dual_coef_ each point). 67 68 '' ' 69 SVM = the SVC (Kernel = ' RBF ' , C = 10, Gamma = 0.1).fit(X, y) 70 mglearn.plots.plot_2d_separator(svm, X, eps=.5) 71 mglearn.discrete_scatter(X[:, 0], X[:, 1], y) 72 # 画出支持向量 73 sv = svm.support_vectors_ 74 sv_labels = svm.dual_coef_.ravel() > 0 75 mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3) 76 plt.xlabel("Feature 0") 77 plt.ylabel("Feature 1") 78 plt.show() 79 80 def preprocessing Vector Data (): 81 Cancer = load_breast_cancer () 82 X_train, X_test, y_train, android.permission.FACTOR. = train_test_split (cancer.data, cancer.target, random_state = 0) 83 SVC = the SVC () 84 svc.fit (X_train , y_train) 85 y_pred = svc.predict (X_test) 86 Print (np.mean (y_pred == android.permission.FACTOR.)) 87 88 '' ' pretreatment - zoom ' '' 89 min_on_training = X_train.min (Axis = 0) 90 = range_on_training (X_train - min_on_training) .max (Axis = 0) 91 is 92 X_train_scaled = (X_train - min_on_training) / range_on_training 93 X_test_scaled = (X_test - min_on_training) / range_on_training 94 print("Minimum for each feature\n{}".format(X_train_scaled.min(axis=0))) 95 print("Maximum for each feature\n {}".format(X_train_scaled.max(axis=0))) 96 97 '''变换''' 98 X_test_scaled = (X_test - min_on_training) / range_on_training 99 svc = SVC() 100 svc.fit (X_train_scaled, y_train) 101 y_pred = svc.predict (X_test_scaled) 102 Print (np.mean (y_pred == android.permission.FACTOR.)) 103 IF the __name__ == ' __main__ ' : 104 preprocessing Vector data ()