LogisticRegression_model(Python)

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import preprocessing
from sklearn.model_selection import train_test_split,cross_val_score,GridSearchCV
from sklearn.feature_selection import SelectFromModel,RFE
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score

dataset = datasets.load_breast_cancer()
featurenames = dataset.feature_names
X,y = dataset.data,dataset.target

scaler = preprocessing.StandardScaler()
x = scaler.fit_transform(X)

#RFE特征选择方法
model = RFE(LogisticRegression(),10)
model.fit(x,y)
index = list(np.arange(x.shape[1])[model.support_])
print('特征选择优先级顺序为:',index)

tx = x[:,index]

#寻找最佳参数
params = {
            'C':[0.01,0.1,1.0,10,100,1000],
            'penalty':['l1','l2']
        }
model = GridSearchCV(LogisticRegression(),params)
model.fit(tx,y)
print('最佳模型为:\n',model.best_estimator_)
print('最佳参数为:\n',model.best_params_)

x_train,x_test,y_train,y_test = train_test_split(tx,y,test_size=0.3,random_state=1)

clf = LogisticRegression(C=1.0,penalty='l2')
clf.fit(x_train,y_train)
train = clf.predict(x_train)
print('---------训练集---------')
print('accuracy:',round(accuracy_score(y_train,train),4))
print(classification_report(y_train,train,target_names=dataset.target_names))
print(pd.DataFrame(confusion_matrix(y_train,train)))

test = clf.predict(x_test)
print('---------测试集---------')
print('accuracy:',round(accuracy_score(y_test,test),4))
print(classification_report(y_test,test,target_names=dataset.target_names))
print(pd.DataFrame(confusion_matrix(y_test,test)))

猜你喜欢

转载自blog.csdn.net/qinlan1994/article/details/82291267