sklearn学习笔记之决策树分类和线性回归

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_37195257/article/details/79856488
decisoin tree:
# -*- coding: utf-8 -*-
import sklearn
from sklearn import tree
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets
import pandas as pd
import numpy


def getData_1():

    iris = datasets.load_iris()
    X = iris.data   #样本特征矩阵,150*4矩阵,每行一个样本,每个样本维度是4
    y = iris.target #样本类别矩阵,150维行向量,每个元素代表一个样本的类别


    df1=pd.DataFrame(X, columns =['SepalLengthCm','SepalWidthCm','PetalLengthCm','PetalWidthCm'])
    df1['target']=y

    return df1

df=getData_1()


X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,0:3],df['target'], test_size=0.3, random_state=42)
print X_train, X_test, y_train, y_test

model = tree.DecisionTreeClassifier(criterion='gini')   #cart树
model.fit(X_train, y_train)


model2= tree.DecisionTreeClassifier(criterion='entropy')  #c4.5树

model2.fit(X_train, y_train)

print 'cart树:{:.3f}'.format(model.score(X_test, y_test))   # 决策树
print 'c4.5树::{:.3f}'.format(model2.score(X_test, y_test))
结果:输出的准确度

LinearRegression:

# -*- coding: utf-8 -*-
import sklearn
from sklearn.datasets.samples_generator import make_classification
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


X, y = make_classification(n_samples=2400, n_features=5, n_informative=2,
    n_redundant=2, n_classes=2, n_clusters_per_class=2, scale=1.0,
    random_state=20)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model = LinearRegression(fit_intercept=True, normalize=False,
    copy_X=True, n_jobs=1)


model.fit(X_train, y_train)
print 'FINISH'
print model.score(X_train, y_train) # 线性回归:R square; 分类问题: acc
print model.score(X_test, y_test)

print X_train,y_train
print X_test,y_test

猜你喜欢

转载自blog.csdn.net/qq_37195257/article/details/79856488