sklearn 入门,把各个机器学习用法用一遍原来这么简单

本文主要参考王老师的文章贷款或者是不贷,这个文章入门容易,强烈推荐

数据源在这里,点击下载成CSV格式,大概是长这个样子的:

接下来就是代码:

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from collections import defaultdict
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.linear_model import LinearRegression as LR
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.ensemble import AdaBoostClassifier as ADA
from sklearn.ensemble import BaggingClassifier as BC
from sklearn.ensemble import GradientBoostingClassifier as GDBC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.naive_bayes import BernoulliNB as BLNB
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
#获得训练集和测试集
data=pd.read_csv(r'E:\loans.csv')
X=data.drop('safe_loans',axis=1)
y=data.safe_loans
d=defaultdict(LabelEncoder)
X_trans = X.apply(lambda x: d[x.name].fit_transform(x))
x_train,x_test,y_train,y_test=train_test_split(X_trans,y,test_size=0.2,random_state=1)
#训练数据获得得分
def func(clf):
    clf.fit(x_train, y_train)
    score = clf.score(x_test, y_test)
    return score
#决策树
print('决策树结果为:{}'.format(func(DTC())))
#线性回归
print('线性回归结果为:{}'.format(func(LR())))
#KNN
print('KNN结果为:{}'.format(func(KNN())))
#随机森林
print('随机森林结果为:{}'.format(func(RFC(n_estimators=20))))
#ADAboost
print('Adaboost结果为:{}'.format(func(ADA(n_estimators=20))))
#GDBC
print('GDBT结果为:{}'.format(func(GDBC(n_estimators=20))))
#Bagging
print('Bagging结果为:{}'.format(func(BC(n_estimators=20))))
#伯努利贝叶斯
print('伯努利贝叶斯结果为:{}'.format(func(BLNB())))
#高斯贝叶斯
print('高斯贝叶斯结果为:{}'.format(func(GNB())))

运行结果:

确实有点低了,,从中也可以看出集成学习效果好一些,,抽空优化

猜你喜欢

转载自blog.csdn.net/lbship/article/details/81701650
今日推荐