python常用包学习(五)——sklearn

本文主要是对sklearn的一些常用方法做一些简单的介绍,这个包中的内容主要包括一些机器学习的算法,需要结合机器学习的原理进行理解。

sklearn是一些封装较高的算法集:
分类、回归、无监督、决策树、数据降维、数据预处理等,包括常见的一些机器学习的方法。

#!/usr/bin/env python
# _*_ UTF-8 _*_

import sklearn
from sklearn.neighbors.classification import KNeighborsClassifier

'''1、强大的附带数据集:'''

# 鸢尾花数据集:
# from sklearn.datasets import load_iris
# loaded_data = load_iris()
# data_x = loaded_data.data
# data_y = loaded_data.target
# print(data_x)
# 波士顿房价:
# from sklearn import datasets
# loaded_data = datasets.load_boston()
# data_x = loaded_data.data
# data_y = loaded_data.target
# print(data_x)

'''2、常用算法'''

# knn算法:
# from sklearn.model_selection import train_test_split
# from sklearn import datasets
# from sklearn.neighbors import KNeighborsClassifier
# iris = datasets.load_iris()
# iris_X = iris.data
# iris_Y = iris.target
# X_train, X_test, Y_train, Y_test = train_test_split(iris_X, iris_Y, test_size=0.3)
# knn = KNeighborsClassifier()
# knn.fit(X_train, Y_train)
# print(knn.predict(X_test))
# print(Y_test)
# Y = knn.predict(X_test)
# 线性回归:
# import matplotlib.pyplot as plt
# from sklearn import datasets
# from sklearn.linear_model import LinearRegression
# loaded_data = datasets.load_boston()
# data_x = loaded_data.data
# data_y = loaded_data.target
# model = LinearRegression()
# model.fit(data_x, data_y)
# print(data_x)
# 4行所有列:
# print(data_x[:4, :])
# print(model.predict(data_x[:4, :]))
# 对于一维数据获取4之前的数据:
# print(data_y[:4])
# 正规化,标准化过程:
# from sklearn import preprocessing
# import numpy as np
# a = np.array([[1,2,3],[2,4,6],[3,6,9]])
# print(preprocessing.scale(a))
# 是一个数组,不是一维向量,可以将其转化成一维向量,然后再进行矩阵运算:
# a = np.random.randn(5)
# print(type(a))
# from sklearn import preprocessing
# import numpy as np
# from sklearn.model_selection import train_test_split
# from sklearn.datasets.samples_generator import make_classification
# from sklearn.svm import SVC
# import matplotlib.pyplot as plt
# x, y = make_classification(n_samples=300, 
#                            n_features=2, 
#                            n_redundant=0,
#                            n_informative=2,
#                            random_state=22,
#                            n_clusters_per_class=1,
#                            scale=100)
# plt.plot.scatter(x[:,0], x[:, 1], c=y)
# plt.show()
# 随机森林:
# from sklearn.ensemble import RandomForestClassifier
# X = [[0,0],[1,1]]
# Y = [0,1]
# clf = RandomForestClassifier(n_estimators=10)
# clf = clf.fit(X, Y)
# print(clf)
# ------
# from sklearn.model_selection import cross_val_score
# from sklearn.datasets import make_blobs
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.ensemble import ExtraTreesClassifier
# from sklearn.tree import DecisionTreeClassifier
# X, y = make_blobs(n_samples=10000, n_features=10, centers=100)
# clf = DecisionTreeClassifier(max_depth=None, min_impurity_split=2)
# scores = cross_val_score(clf, X, y)
# print(scores.mean())

猜你喜欢

转载自blog.csdn.net/livan1234/article/details/81516257