机器学习:特征工程补充

非原创,代码来自葁sir

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.datasets import load_iris
iris = load_iris()
data = iris.data
target = iris.target
feature_names = iris.feature_names
# 先做无量纲处理,再基于方差选择
data.std(axis=0) # 每个特征都有方差
array([0.82530129, 0.43441097, 1.75940407, 0.75969263])
from sklearn.preprocessing import StandardScaler,MinMaxScaler,Normalizer
#1.normalizer
X = Normalizer().fit_transform(data)
X = X.std(axis=0)
# X.std(axis=0)
array([0.04421945, 0.10527109, 0.15945216, 0.07771647])
# 2.standarscaler
ss_X = StandardScaler().fit_transform(data)
# ss_X.std(axis=0)
ss_X = ss_X.std(axis=0)
# 3.MinMaxscaler
mm_X = MinMaxScaler().fit_transform(data)
# mm_X.std(axis=0)
mm_X = mm_X.std(axis=0)
# 展现三种不同形式的转化 对于方差的对比
result = {
    
    
    'True':data.std(axis=0),
    'Normalizer':X,
    'Standar':ss_X,
    'MinMax':mm_X
}
sns.set()
plt.figure(figsize=(12,12))
loc = 1
for key,v in result.items():
    ax = plt.subplot(2,2,loc)
    loc += 1
    ax.set_title(key)
    plt.bar(np.arange(len(feature_names)),v,color=sns.color_palette('husl'))
    plt.xticks(np.arange(len(feature_names)),feature_names,rotation=10)

请添加图片描述

# 继续查看数据的分布(分类问题,可以查看不同分类下,相同特征的分布情况)
np.unique(target)
array([0, 1, 2])
0 == target # 过滤数据的条件(布尔值)
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False])
iris_df = DataFrame(data=data,columns=feature_names)
target_name = iris.target_names
target_name
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
import warnings 
warnings.filterwarnings('ignore')
# 每个特征画个图
for col_name in feature_names:
    # 读取一列数据
    data = iris_df[col_name]
    # 把读取出的一列数据,按照不同的分类进行绘制
    for i in np.unique(target):
        # 获取每一类的数据
        condition = (i == target)
        col_data_i = data[condition]
        sns.distplot(col_data_i,label = target_name[i])
    plt.legend()
    plt.show()

请添加图片描述

请添加图片描述
请添加图片描述
请添加图片描述

使用sklearn的方法进行方差选择

data = iris.data
data
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
data.var(axis=0)
array([0.68112222, 0.18871289, 3.09550267, 0.57713289])
from sklearn.feature_selection import VarianceThreshold
# 方差选择法 返回值就是特征选择后的数据
# 参数threshold方差选择的阈值 低于方差阈值的特征直接过滤
VarianceThreshold(threshold=0.5).fit_transform(data)
array([[5.1, 1.4, 0.2],
       [4.9, 1.4, 0.2],
       [4.7, 1.3, 0.2],
       [4.6, 1.5, 0.2],
       [5. , 1.4, 0.2],
       [5.4, 1.7, 0.4],
       [4.6, 1.4, 0.3],
       [5. , 1.5, 0.2],
       [4.4, 1.4, 0.2],
       [4.9, 1.5, 0.1],
       [5.4, 1.5, 0.2],
       [4.8, 1.6, 0.2],
       [4.8, 1.4, 0.1],
       [4.3, 1.1, 0.1],
       [5.8, 1.2, 0.2],
       [5.7, 1.5, 0.4],
       [5.4, 1.3, 0.4],
       [5.1, 1.4, 0.3],
       [5.7, 1.7, 0.3],
       [5.1, 1.5, 0.3],
       [5.4, 1.7, 0.2],
       [5.1, 1.5, 0.4],
       [4.6, 1. , 0.2],
       [5.1, 1.7, 0.5],
       [4.8, 1.9, 0.2],
       [5. , 1.6, 0.2],
       [5. , 1.6, 0.4],
       [5.2, 1.5, 0.2],
       [5.2, 1.4, 0.2],
       [4.7, 1.6, 0.2],
       [4.8, 1.6, 0.2],
       [5.4, 1.5, 0.4],
       [5.2, 1.5, 0.1],
       [5.5, 1.4, 0.2],
       [4.9, 1.5, 0.2],
       [5. , 1.2, 0.2],
       [5.5, 1.3, 0.2],
       [4.9, 1.4, 0.1],
       [4.4, 1.3, 0.2],
       [5.1, 1.5, 0.2],
       [5. , 1.3, 0.3],
       [4.5, 1.3, 0.3],
       [4.4, 1.3, 0.2],
       [5. , 1.6, 0.6],
       [5.1, 1.9, 0.4],
       [4.8, 1.4, 0.3],
       [5.1, 1.6, 0.2],
       [4.6, 1.4, 0.2],
       [5.3, 1.5, 0.2],
       [5. , 1.4, 0.2],
       [7. , 4.7, 1.4],
       [6.4, 4.5, 1.5],
       [6.9, 4.9, 1.5],
       [5.5, 4. , 1.3],
       [6.5, 4.6, 1.5],
       [5.7, 4.5, 1.3],
       [6.3, 4.7, 1.6],
       [4.9, 3.3, 1. ],
       [6.6, 4.6, 1.3],
       [5.2, 3.9, 1.4],
       [5. , 3.5, 1. ],
       [5.9, 4.2, 1.5],
       [6. , 4. , 1. ],
       [6.1, 4.7, 1.4],
       [5.6, 3.6, 1.3],
       [6.7, 4.4, 1.4],
       [5.6, 4.5, 1.5],
       [5.8, 4.1, 1. ],
       [6.2, 4.5, 1.5],
       [5.6, 3.9, 1.1],
       [5.9, 4.8, 1.8],
       [6.1, 4. , 1.3],
       [6.3, 4.9, 1.5],
       [6.1, 4.7, 1.2],
       [6.4, 4.3, 1.3],
       [6.6, 4.4, 1.4],
       [6.8, 4.8, 1.4],
       [6.7, 5. , 1.7],
       [6. , 4.5, 1.5],
       [5.7, 3.5, 1. ],
       [5.5, 3.8, 1.1],
       [5.5, 3.7, 1. ],
       [5.8, 3.9, 1.2],
       [6. , 5.1, 1.6],
       [5.4, 4.5, 1.5],
       [6. , 4.5, 1.6],
       [6.7, 4.7, 1.5],
       [6.3, 4.4, 1.3],
       [5.6, 4.1, 1.3],
       [5.5, 4. , 1.3],
       [5.5, 4.4, 1.2],
       [6.1, 4.6, 1.4],
       [5.8, 4. , 1.2],
       [5. , 3.3, 1. ],
       [5.6, 4.2, 1.3],
       [5.7, 4.2, 1.2],
       [5.7, 4.2, 1.3],
       [6.2, 4.3, 1.3],
       [5.1, 3. , 1.1],
       [5.7, 4.1, 1.3],
       [6.3, 6. , 2.5],
       [5.8, 5.1, 1.9],
       [7.1, 5.9, 2.1],
       [6.3, 5.6, 1.8],
       [6.5, 5.8, 2.2],
       [7.6, 6.6, 2.1],
       [4.9, 4.5, 1.7],
       [7.3, 6.3, 1.8],
       [6.7, 5.8, 1.8],
       [7.2, 6.1, 2.5],
       [6.5, 5.1, 2. ],
       [6.4, 5.3, 1.9],
       [6.8, 5.5, 2.1],
       [5.7, 5. , 2. ],
       [5.8, 5.1, 2.4],
       [6.4, 5.3, 2.3],
       [6.5, 5.5, 1.8],
       [7.7, 6.7, 2.2],
       [7.7, 6.9, 2.3],
       [6. , 5. , 1.5],
       [6.9, 5.7, 2.3],
       [5.6, 4.9, 2. ],
       [7.7, 6.7, 2. ],
       [6.3, 4.9, 1.8],
       [6.7, 5.7, 2.1],
       [7.2, 6. , 1.8],
       [6.2, 4.8, 1.8],
       [6.1, 4.9, 1.8],
       [6.4, 5.6, 2.1],
       [7.2, 5.8, 1.6],
       [7.4, 6.1, 1.9],
       [7.9, 6.4, 2. ],
       [6.4, 5.6, 2.2],
       [6.3, 5.1, 1.5],
       [6.1, 5.6, 1.4],
       [7.7, 6.1, 2.3],
       [6.3, 5.6, 2.4],
       [6.4, 5.5, 1.8],
       [6. , 4.8, 1.8],
       [6.9, 5.4, 2.1],
       [6.7, 5.6, 2.4],
       [6.9, 5.1, 2.3],
       [5.8, 5.1, 1.9],
       [6.8, 5.9, 2.3],
       [6.7, 5.7, 2.5],
       [6.7, 5.2, 2.3],
       [6.3, 5. , 1.9],
       [6.5, 5.2, 2. ],
       [6.2, 5.4, 2.3],
       [5.9, 5.1, 1.8]])
X = Normalizer().fit_transform(data)
X.var(axis=0)
array([0.00195536, 0.011082  , 0.02542499, 0.00603985])
VarianceThreshold(threshold=0.01).fit_transform(X)
array([[0.55160877, 0.22064351],
       [0.50702013, 0.23660939],
       [0.54831188, 0.2227517 ],
       [0.53915082, 0.26087943],
       [0.5694948 , 0.2214702 ],
       [0.5663486 , 0.2468699 ],
       [0.57660257, 0.23742459],
       [0.54548574, 0.24065548],
       [0.5315065 , 0.25658935],
       [0.51752994, 0.25041771],
       [0.55070744, 0.22325977],
       [0.55745196, 0.26233033],
       [0.51442011, 0.24006272],
       [0.55989251, 0.20529392],
       [0.55945424, 0.16783627],
       [0.59732787, 0.2036345 ],
       [0.57365349, 0.19121783],
       [0.55126656, 0.22050662],
       [0.53788547, 0.24063297],
       [0.58091482, 0.22930848],
       [0.51462016, 0.25731008],
       [0.57017622, 0.23115252],
       [0.60712493, 0.16864581],
       [0.52151512, 0.26865931],
       [0.54974742, 0.30721179],
       [0.4958847 , 0.26447184],
       [0.5424918 , 0.25529026],
       [0.54278246, 0.23262105],
       [0.5336001 , 0.21971769],
       [0.54144043, 0.27072022],
       [0.52213419, 0.26948861],
       [0.51771314, 0.22840286],
       [0.60379053, 0.22089897],
       [0.59462414, 0.19820805],
       [0.51731371, 0.25031309],
       [0.52807869, 0.19802951],
       [0.52627116, 0.19547215],
       [0.5769053 , 0.22435206],
       [0.54690282, 0.23699122],
       [0.53853046, 0.23758697],
       [0.56023311, 0.20808658],
       [0.44003527, 0.24871559],
       [0.57170209, 0.23225397],
       [0.55222635, 0.25244633],
       [0.57144472, 0.28572236],
       [0.51381615, 0.23978087],
       [0.57915795, 0.24385598],
       [0.55370283, 0.24224499],
       [0.55735281, 0.22595384],
       [0.5361072 , 0.22743942],
       [0.35063361, 0.51499312],
       [0.37274878, 0.52417798],
       [0.33928954, 0.53629637],
       [0.31524601, 0.54825394],
       [0.32659863, 0.5365549 ],
       [0.35482858, 0.57026022],
       [0.38046824, 0.54187901],
       [0.37183615, 0.51127471],
       [0.33526572, 0.53180079],
       [0.37623583, 0.54345175],
       [0.30769231, 0.53846154],
       [0.37588201, 0.52623481],
       [0.28927343, 0.52595168],
       [0.34743622, 0.56308629],
       [0.3931142 , 0.48800383],
       [0.35601624, 0.50531337],
       [0.37838513, 0.5675777 ],
       [0.35228714, 0.53495455],
       [0.27125375, 0.55483721],
       [0.34011245, 0.53057542],
       [0.37889063, 0.56833595],
       [0.35349703, 0.50499576],
       [0.29421947, 0.57667016],
       [0.33811099, 0.56754345],
       [0.34773582, 0.51560829],
       [0.34902603, 0.51190484],
       [0.31486523, 0.53976896],
       [0.33173989, 0.55289982],
       [0.35452959, 0.55013212],
       [0.35883409, 0.48304589],
       [0.33391355, 0.52869645],
       [0.33706004, 0.51963422],
       [0.35581802, 0.51395936],
       [0.31850786, 0.60162596],
       [0.38518561, 0.57777841],
       [0.40530797, 0.53643702],
       [0.34913098, 0.52932761],
       [0.28304611, 0.54147951],
       [0.39103094, 0.53440896],
       [0.33960997, 0.54337595],
       [0.34195729, 0.57869695],
       [0.36029701, 0.55245541],
       [0.34186859, 0.52595168],
       [0.35413965, 0.5081134 ],
       [0.35458851, 0.55158213],
       [0.38547167, 0.53966034],
       [0.37367287, 0.5411814 ],
       [0.3542121 , 0.52521104],
       [0.38361791, 0.4603415 ],
       [0.36505526, 0.5345452 ],
       [0.34250725, 0.62274045],
       [0.32145135, 0.60718588],
       [0.30207636, 0.59408351],
       [0.31889319, 0.61579374],
       [0.31670318, 0.61229281],
       [0.28008043, 0.61617694],
       [0.34211284, 0.61580312],
       [0.28351098, 0.61590317],
       [0.26647062, 0.61821183],
       [0.34599394, 0.58626751],
       [0.3523084 , 0.56149152],
       [0.30196356, 0.59274328],
       [0.31640359, 0.58007326],
       [0.30375079, 0.60750157],
       [0.32715549, 0.59589036],
       [0.34794944, 0.57629125],
       [0.3258945 , 0.59747324],
       [0.34199555, 0.60299216],
       [0.2383917 , 0.63265489],
       [0.26661281, 0.60593821],
       [0.32722984, 0.58287815],
       [0.34153961, 0.59769433],
       [0.25995106, 0.62202576],
       [0.31338199, 0.56873028],
       [0.3427843 , 0.59208198],
       [0.31790868, 0.59607878],
       [0.32870733, 0.56349829],
       [0.35002236, 0.57170319],
       [0.30447376, 0.60894751],
       [0.30454106, 0.58877939],
       [0.27533141, 0.59982915],
       [0.34430405, 0.5798805 ],
       [0.30370264, 0.60740528],
       [0.32162669, 0.58582004],
       [0.29574111, 0.63698085],
       [0.28501714, 0.57953485],
       [0.36168166, 0.59571097],
       [0.338117  , 0.59988499],
       [0.35533453, 0.56853524],
       [0.32534391, 0.56672811],
       [0.32386689, 0.58504986],
       [0.32948905, 0.54206264],
       [0.32145135, 0.60718588],
       [0.32561648, 0.60035539],
       [0.33943145, 0.58629069],
       [0.32308533, 0.56001458],
       [0.28954508, 0.57909015],
       [0.3307103 , 0.57323119],
       [0.36998072, 0.58761643],
       [0.35097923, 0.5966647 ]])

Wrapper包装法 根据目标函数(通常是预测评分) 每次选择若干特征 / 排除若干特征

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
# 递归特征消除法,返回特征选择后的数据
# estimator 算法对象(基模型)
# n_features_to_select 要选择几个特征出来
RFE(estimator=LogisticRegression(),n_features_to_select=2).fit_transform(iris.data, iris.target)
array([[1.4, 0.2],
       [1.4, 0.2],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.7, 0.4],
       [1.4, 0.3],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.5, 0.1],
       [1.5, 0.2],
       [1.6, 0.2],
       [1.4, 0.1],
       [1.1, 0.1],
       [1.2, 0.2],
       [1.5, 0.4],
       [1.3, 0.4],
       [1.4, 0.3],
       [1.7, 0.3],
       [1.5, 0.3],
       [1.7, 0.2],
       [1.5, 0.4],
       [1. , 0.2],
       [1.7, 0.5],
       [1.9, 0.2],
       [1.6, 0.2],
       [1.6, 0.4],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.6, 0.2],
       [1.6, 0.2],
       [1.5, 0.4],
       [1.5, 0.1],
       [1.4, 0.2],
       [1.5, 0.2],
       [1.2, 0.2],
       [1.3, 0.2],
       [1.4, 0.1],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.3, 0.3],
       [1.3, 0.3],
       [1.3, 0.2],
       [1.6, 0.6],
       [1.9, 0.4],
       [1.4, 0.3],
       [1.6, 0.2],
       [1.4, 0.2],
       [1.5, 0.2],
       [1.4, 0.2],
       [4.7, 1.4],
       [4.5, 1.5],
       [4.9, 1.5],
       [4. , 1.3],
       [4.6, 1.5],
       [4.5, 1.3],
       [4.7, 1.6],
       [3.3, 1. ],
       [4.6, 1.3],
       [3.9, 1.4],
       [3.5, 1. ],
       [4.2, 1.5],
       [4. , 1. ],
       [4.7, 1.4],
       [3.6, 1.3],
       [4.4, 1.4],
       [4.5, 1.5],
       [4.1, 1. ],
       [4.5, 1.5],
       [3.9, 1.1],
       [4.8, 1.8],
       [4. , 1.3],
       [4.9, 1.5],
       [4.7, 1.2],
       [4.3, 1.3],
       [4.4, 1.4],
       [4.8, 1.4],
       [5. , 1.7],
       [4.5, 1.5],
       [3.5, 1. ],
       [3.8, 1.1],
       [3.7, 1. ],
       [3.9, 1.2],
       [5.1, 1.6],
       [4.5, 1.5],
       [4.5, 1.6],
       [4.7, 1.5],
       [4.4, 1.3],
       [4.1, 1.3],
       [4. , 1.3],
       [4.4, 1.2],
       [4.6, 1.4],
       [4. , 1.2],
       [3.3, 1. ],
       [4.2, 1.3],
       [4.2, 1.2],
       [4.2, 1.3],
       [4.3, 1.3],
       [3. , 1.1],
       [4.1, 1.3],
       [6. , 2.5],
       [5.1, 1.9],
       [5.9, 2.1],
       [5.6, 1.8],
       [5.8, 2.2],
       [6.6, 2.1],
       [4.5, 1.7],
       [6.3, 1.8],
       [5.8, 1.8],
       [6.1, 2.5],
       [5.1, 2. ],
       [5.3, 1.9],
       [5.5, 2.1],
       [5. , 2. ],
       [5.1, 2.4],
       [5.3, 2.3],
       [5.5, 1.8],
       [6.7, 2.2],
       [6.9, 2.3],
       [5. , 1.5],
       [5.7, 2.3],
       [4.9, 2. ],
       [6.7, 2. ],
       [4.9, 1.8],
       [5.7, 2.1],
       [6. , 1.8],
       [4.8, 1.8],
       [4.9, 1.8],
       [5.6, 2.1],
       [5.8, 1.6],
       [6.1, 1.9],
       [6.4, 2. ],
       [5.6, 2.2],
       [5.1, 1.5],
       [5.6, 1.4],
       [6.1, 2.3],
       [5.6, 2.4],
       [5.5, 1.8],
       [4.8, 1.8],
       [5.4, 2.1],
       [5.6, 2.4],
       [5.1, 2.3],
       [5.1, 1.9],
       [5.9, 2.3],
       [5.7, 2.5],
       [5.2, 2.3],
       [5. , 1.9],
       [5.2, 2. ],
       [5.4, 2.3],
       [5.1, 1.8]])
lr = LogisticRegression()
from sklearn.model_selection import GridSearchCV
gscv = GridSearchCV(lr,param_grid={
    
    
    'C':[0.01,0.1,1,5],
    'max_iter':[50,100,200]
},cv=10)
gscv.fit(iris.data,iris.target)
GridSearchCV(cv=10, estimator=LogisticRegression(),
             param_grid={'C': [0.01, 0.1, 1, 5], 'max_iter': [50, 100, 200]})
gscv.best_params_
{'C': 5, 'max_iter': 50}
lr.fit(iris.data,iris.target)
LogisticRegression()
from sklearn.ensemble import GradientBoostingClassifier
gbdt = GradientBoostingClassifier()
gbdt.fit(iris.data,iris.target)
GradientBoostingClassifier()
gbdt.feature_importances_
array([0.00653328, 0.00992017, 0.20269654, 0.78085   ])
RFE(estimator=LogisticRegression(C=5,max_iter=50),n_features_to_select=3).fit_transform(iris.data,iris.target)
array([[3.5, 1.4, 0.2],
       [3. , 1.4, 0.2],
       [3.2, 1.3, 0.2],
       [3.1, 1.5, 0.2],
       [3.6, 1.4, 0.2],
       [3.9, 1.7, 0.4],
       [3.4, 1.4, 0.3],
       [3.4, 1.5, 0.2],
       [2.9, 1.4, 0.2],
       [3.1, 1.5, 0.1],
       [3.7, 1.5, 0.2],
       [3.4, 1.6, 0.2],
       [3. , 1.4, 0.1],
       [3. , 1.1, 0.1],
       [4. , 1.2, 0.2],
       [4.4, 1.5, 0.4],
       [3.9, 1.3, 0.4],
       [3.5, 1.4, 0.3],
       [3.8, 1.7, 0.3],
       [3.8, 1.5, 0.3],
       [3.4, 1.7, 0.2],
       [3.7, 1.5, 0.4],
       [3.6, 1. , 0.2],
       [3.3, 1.7, 0.5],
       [3.4, 1.9, 0.2],
       [3. , 1.6, 0.2],
       [3.4, 1.6, 0.4],
       [3.5, 1.5, 0.2],
       [3.4, 1.4, 0.2],
       [3.2, 1.6, 0.2],
       [3.1, 1.6, 0.2],
       [3.4, 1.5, 0.4],
       [4.1, 1.5, 0.1],
       [4.2, 1.4, 0.2],
       [3.1, 1.5, 0.2],
       [3.2, 1.2, 0.2],
       [3.5, 1.3, 0.2],
       [3.6, 1.4, 0.1],
       [3. , 1.3, 0.2],
       [3.4, 1.5, 0.2],
       [3.5, 1.3, 0.3],
       [2.3, 1.3, 0.3],
       [3.2, 1.3, 0.2],
       [3.5, 1.6, 0.6],
       [3.8, 1.9, 0.4],
       [3. , 1.4, 0.3],
       [3.8, 1.6, 0.2],
       [3.2, 1.4, 0.2],
       [3.7, 1.5, 0.2],
       [3.3, 1.4, 0.2],
       [3.2, 4.7, 1.4],
       [3.2, 4.5, 1.5],
       [3.1, 4.9, 1.5],
       [2.3, 4. , 1.3],
       [2.8, 4.6, 1.5],
       [2.8, 4.5, 1.3],
       [3.3, 4.7, 1.6],
       [2.4, 3.3, 1. ],
       [2.9, 4.6, 1.3],
       [2.7, 3.9, 1.4],
       [2. , 3.5, 1. ],
       [3. , 4.2, 1.5],
       [2.2, 4. , 1. ],
       [2.9, 4.7, 1.4],
       [2.9, 3.6, 1.3],
       [3.1, 4.4, 1.4],
       [3. , 4.5, 1.5],
       [2.7, 4.1, 1. ],
       [2.2, 4.5, 1.5],
       [2.5, 3.9, 1.1],
       [3.2, 4.8, 1.8],
       [2.8, 4. , 1.3],
       [2.5, 4.9, 1.5],
       [2.8, 4.7, 1.2],
       [2.9, 4.3, 1.3],
       [3. , 4.4, 1.4],
       [2.8, 4.8, 1.4],
       [3. , 5. , 1.7],
       [2.9, 4.5, 1.5],
       [2.6, 3.5, 1. ],
       [2.4, 3.8, 1.1],
       [2.4, 3.7, 1. ],
       [2.7, 3.9, 1.2],
       [2.7, 5.1, 1.6],
       [3. , 4.5, 1.5],
       [3.4, 4.5, 1.6],
       [3.1, 4.7, 1.5],
       [2.3, 4.4, 1.3],
       [3. , 4.1, 1.3],
       [2.5, 4. , 1.3],
       [2.6, 4.4, 1.2],
       [3. , 4.6, 1.4],
       [2.6, 4. , 1.2],
       [2.3, 3.3, 1. ],
       [2.7, 4.2, 1.3],
       [3. , 4.2, 1.2],
       [2.9, 4.2, 1.3],
       [2.9, 4.3, 1.3],
       [2.5, 3. , 1.1],
       [2.8, 4.1, 1.3],
       [3.3, 6. , 2.5],
       [2.7, 5.1, 1.9],
       [3. , 5.9, 2.1],
       [2.9, 5.6, 1.8],
       [3. , 5.8, 2.2],
       [3. , 6.6, 2.1],
       [2.5, 4.5, 1.7],
       [2.9, 6.3, 1.8],
       [2.5, 5.8, 1.8],
       [3.6, 6.1, 2.5],
       [3.2, 5.1, 2. ],
       [2.7, 5.3, 1.9],
       [3. , 5.5, 2.1],
       [2.5, 5. , 2. ],
       [2.8, 5.1, 2.4],
       [3.2, 5.3, 2.3],
       [3. , 5.5, 1.8],
       [3.8, 6.7, 2.2],
       [2.6, 6.9, 2.3],
       [2.2, 5. , 1.5],
       [3.2, 5.7, 2.3],
       [2.8, 4.9, 2. ],
       [2.8, 6.7, 2. ],
       [2.7, 4.9, 1.8],
       [3.3, 5.7, 2.1],
       [3.2, 6. , 1.8],
       [2.8, 4.8, 1.8],
       [3. , 4.9, 1.8],
       [2.8, 5.6, 2.1],
       [3. , 5.8, 1.6],
       [2.8, 6.1, 1.9],
       [3.8, 6.4, 2. ],
       [2.8, 5.6, 2.2],
       [2.8, 5.1, 1.5],
       [2.6, 5.6, 1.4],
       [3. , 6.1, 2.3],
       [3.4, 5.6, 2.4],
       [3.1, 5.5, 1.8],
       [3. , 4.8, 1.8],
       [3.1, 5.4, 2.1],
       [3.1, 5.6, 2.4],
       [3.1, 5.1, 2.3],
       [2.7, 5.1, 1.9],
       [3.2, 5.9, 2.3],
       [3.3, 5.7, 2.5],
       [3. , 5.2, 2.3],
       [2.5, 5. , 1.9],
       [3. , 5.2, 2. ],
       [3.4, 5.4, 2.3],
       [3. , 5.1, 1.8]])
iris.data
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
RFE(estimator=GradientBoostingClassifier(),n_features_to_select=3).fit_transform(iris.data,iris.target)
array([[3.5, 1.4, 0.2],
       [3. , 1.4, 0.2],
       [3.2, 1.3, 0.2],
       [3.1, 1.5, 0.2],
       [3.6, 1.4, 0.2],
       [3.9, 1.7, 0.4],
       [3.4, 1.4, 0.3],
       [3.4, 1.5, 0.2],
       [2.9, 1.4, 0.2],
       [3.1, 1.5, 0.1],
       [3.7, 1.5, 0.2],
       [3.4, 1.6, 0.2],
       [3. , 1.4, 0.1],
       [3. , 1.1, 0.1],
       [4. , 1.2, 0.2],
       [4.4, 1.5, 0.4],
       [3.9, 1.3, 0.4],
       [3.5, 1.4, 0.3],
       [3.8, 1.7, 0.3],
       [3.8, 1.5, 0.3],
       [3.4, 1.7, 0.2],
       [3.7, 1.5, 0.4],
       [3.6, 1. , 0.2],
       [3.3, 1.7, 0.5],
       [3.4, 1.9, 0.2],
       [3. , 1.6, 0.2],
       [3.4, 1.6, 0.4],
       [3.5, 1.5, 0.2],
       [3.4, 1.4, 0.2],
       [3.2, 1.6, 0.2],
       [3.1, 1.6, 0.2],
       [3.4, 1.5, 0.4],
       [4.1, 1.5, 0.1],
       [4.2, 1.4, 0.2],
       [3.1, 1.5, 0.2],
       [3.2, 1.2, 0.2],
       [3.5, 1.3, 0.2],
       [3.6, 1.4, 0.1],
       [3. , 1.3, 0.2],
       [3.4, 1.5, 0.2],
       [3.5, 1.3, 0.3],
       [2.3, 1.3, 0.3],
       [3.2, 1.3, 0.2],
       [3.5, 1.6, 0.6],
       [3.8, 1.9, 0.4],
       [3. , 1.4, 0.3],
       [3.8, 1.6, 0.2],
       [3.2, 1.4, 0.2],
       [3.7, 1.5, 0.2],
       [3.3, 1.4, 0.2],
       [3.2, 4.7, 1.4],
       [3.2, 4.5, 1.5],
       [3.1, 4.9, 1.5],
       [2.3, 4. , 1.3],
       [2.8, 4.6, 1.5],
       [2.8, 4.5, 1.3],
       [3.3, 4.7, 1.6],
       [2.4, 3.3, 1. ],
       [2.9, 4.6, 1.3],
       [2.7, 3.9, 1.4],
       [2. , 3.5, 1. ],
       [3. , 4.2, 1.5],
       [2.2, 4. , 1. ],
       [2.9, 4.7, 1.4],
       [2.9, 3.6, 1.3],
       [3.1, 4.4, 1.4],
       [3. , 4.5, 1.5],
       [2.7, 4.1, 1. ],
       [2.2, 4.5, 1.5],
       [2.5, 3.9, 1.1],
       [3.2, 4.8, 1.8],
       [2.8, 4. , 1.3],
       [2.5, 4.9, 1.5],
       [2.8, 4.7, 1.2],
       [2.9, 4.3, 1.3],
       [3. , 4.4, 1.4],
       [2.8, 4.8, 1.4],
       [3. , 5. , 1.7],
       [2.9, 4.5, 1.5],
       [2.6, 3.5, 1. ],
       [2.4, 3.8, 1.1],
       [2.4, 3.7, 1. ],
       [2.7, 3.9, 1.2],
       [2.7, 5.1, 1.6],
       [3. , 4.5, 1.5],
       [3.4, 4.5, 1.6],
       [3.1, 4.7, 1.5],
       [2.3, 4.4, 1.3],
       [3. , 4.1, 1.3],
       [2.5, 4. , 1.3],
       [2.6, 4.4, 1.2],
       [3. , 4.6, 1.4],
       [2.6, 4. , 1.2],
       [2.3, 3.3, 1. ],
       [2.7, 4.2, 1.3],
       [3. , 4.2, 1.2],
       [2.9, 4.2, 1.3],
       [2.9, 4.3, 1.3],
       [2.5, 3. , 1.1],
       [2.8, 4.1, 1.3],
       [3.3, 6. , 2.5],
       [2.7, 5.1, 1.9],
       [3. , 5.9, 2.1],
       [2.9, 5.6, 1.8],
       [3. , 5.8, 2.2],
       [3. , 6.6, 2.1],
       [2.5, 4.5, 1.7],
       [2.9, 6.3, 1.8],
       [2.5, 5.8, 1.8],
       [3.6, 6.1, 2.5],
       [3.2, 5.1, 2. ],
       [2.7, 5.3, 1.9],
       [3. , 5.5, 2.1],
       [2.5, 5. , 2. ],
       [2.8, 5.1, 2.4],
       [3.2, 5.3, 2.3],
       [3. , 5.5, 1.8],
       [3.8, 6.7, 2.2],
       [2.6, 6.9, 2.3],
       [2.2, 5. , 1.5],
       [3.2, 5.7, 2.3],
       [2.8, 4.9, 2. ],
       [2.8, 6.7, 2. ],
       [2.7, 4.9, 1.8],
       [3.3, 5.7, 2.1],
       [3.2, 6. , 1.8],
       [2.8, 4.8, 1.8],
       [3. , 4.9, 1.8],
       [2.8, 5.6, 2.1],
       [3. , 5.8, 1.6],
       [2.8, 6.1, 1.9],
       [3.8, 6.4, 2. ],
       [2.8, 5.6, 2.2],
       [2.8, 5.1, 1.5],
       [2.6, 5.6, 1.4],
       [3. , 6.1, 2.3],
       [3.4, 5.6, 2.4],
       [3.1, 5.5, 1.8],
       [3. , 4.8, 1.8],
       [3.1, 5.4, 2.1],
       [3.1, 5.6, 2.4],
       [3.1, 5.1, 2.3],
       [2.7, 5.1, 1.9],
       [3.2, 5.9, 2.3],
       [3.3, 5.7, 2.5],
       [3. , 5.2, 2.3],
       [2.5, 5. , 1.9],
       [3. , 5.2, 2. ],
       [3.4, 5.4, 2.3],
       [3. , 5.1, 1.8]])

存在这两个属性的算法都可以做特征选择:coef_ feature_importances

基于惩罚项的特征选择法:Embedded

from sklearn.feature_selection import SelectFromModel
# 使用L1惩罚项的逻辑斯蒂回归来作为基模型来进行特征选择
# 最终选择的特征个数 由C来决定
SelectFromModel(estimator=LogisticRegression(penalty='l1',solver='liblinear')).fit_transform(iris.data,iris.target)
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
SelectFromModel(estimator=LogisticRegression()).fit_transform(iris.data,iris.target)
array([[1.4, 0.2],
       [1.4, 0.2],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.7, 0.4],
       [1.4, 0.3],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.5, 0.1],
       [1.5, 0.2],
       [1.6, 0.2],
       [1.4, 0.1],
       [1.1, 0.1],
       [1.2, 0.2],
       [1.5, 0.4],
       [1.3, 0.4],
       [1.4, 0.3],
       [1.7, 0.3],
       [1.5, 0.3],
       [1.7, 0.2],
       [1.5, 0.4],
       [1. , 0.2],
       [1.7, 0.5],
       [1.9, 0.2],
       [1.6, 0.2],
       [1.6, 0.4],
       [1.5, 0.2],
       [1.4, 0.2],
       [1.6, 0.2],
       [1.6, 0.2],
       [1.5, 0.4],
       [1.5, 0.1],
       [1.4, 0.2],
       [1.5, 0.2],
       [1.2, 0.2],
       [1.3, 0.2],
       [1.4, 0.1],
       [1.3, 0.2],
       [1.5, 0.2],
       [1.3, 0.3],
       [1.3, 0.3],
       [1.3, 0.2],
       [1.6, 0.6],
       [1.9, 0.4],
       [1.4, 0.3],
       [1.6, 0.2],
       [1.4, 0.2],
       [1.5, 0.2],
       [1.4, 0.2],
       [4.7, 1.4],
       [4.5, 1.5],
       [4.9, 1.5],
       [4. , 1.3],
       [4.6, 1.5],
       [4.5, 1.3],
       [4.7, 1.6],
       [3.3, 1. ],
       [4.6, 1.3],
       [3.9, 1.4],
       [3.5, 1. ],
       [4.2, 1.5],
       [4. , 1. ],
       [4.7, 1.4],
       [3.6, 1.3],
       [4.4, 1.4],
       [4.5, 1.5],
       [4.1, 1. ],
       [4.5, 1.5],
       [3.9, 1.1],
       [4.8, 1.8],
       [4. , 1.3],
       [4.9, 1.5],
       [4.7, 1.2],
       [4.3, 1.3],
       [4.4, 1.4],
       [4.8, 1.4],
       [5. , 1.7],
       [4.5, 1.5],
       [3.5, 1. ],
       [3.8, 1.1],
       [3.7, 1. ],
       [3.9, 1.2],
       [5.1, 1.6],
       [4.5, 1.5],
       [4.5, 1.6],
       [4.7, 1.5],
       [4.4, 1.3],
       [4.1, 1.3],
       [4. , 1.3],
       [4.4, 1.2],
       [4.6, 1.4],
       [4. , 1.2],
       [3.3, 1. ],
       [4.2, 1.3],
       [4.2, 1.2],
       [4.2, 1.3],
       [4.3, 1.3],
       [3. , 1.1],
       [4.1, 1.3],
       [6. , 2.5],
       [5.1, 1.9],
       [5.9, 2.1],
       [5.6, 1.8],
       [5.8, 2.2],
       [6.6, 2.1],
       [4.5, 1.7],
       [6.3, 1.8],
       [5.8, 1.8],
       [6.1, 2.5],
       [5.1, 2. ],
       [5.3, 1.9],
       [5.5, 2.1],
       [5. , 2. ],
       [5.1, 2.4],
       [5.3, 2.3],
       [5.5, 1.8],
       [6.7, 2.2],
       [6.9, 2.3],
       [5. , 1.5],
       [5.7, 2.3],
       [4.9, 2. ],
       [6.7, 2. ],
       [4.9, 1.8],
       [5.7, 2.1],
       [6. , 1.8],
       [4.8, 1.8],
       [4.9, 1.8],
       [5.6, 2.1],
       [5.8, 1.6],
       [6.1, 1.9],
       [6.4, 2. ],
       [5.6, 2.2],
       [5.1, 1.5],
       [5.6, 1.4],
       [6.1, 2.3],
       [5.6, 2.4],
       [5.5, 1.8],
       [4.8, 1.8],
       [5.4, 2.1],
       [5.6, 2.4],
       [5.1, 2.3],
       [5.1, 1.9],
       [5.9, 2.3],
       [5.7, 2.5],
       [5.2, 2.3],
       [5. , 1.9],
       [5.2, 2. ],
       [5.4, 2.3],
       [5.1, 1.8]])

特征相关性选择

iris_df['cate'] = iris.target
iris_df
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) cate
0 5.1 3.5 1.4 0.2 0
1 4.9 3.0 1.4 0.2 0
2 4.7 3.2 1.3 0.2 0
3 4.6 3.1 1.5 0.2 0
4 5.0 3.6 1.4 0.2 0
... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 2
146 6.3 2.5 5.0 1.9 2
147 6.5 3.0 5.2 2.0 2
148 6.2 3.4 5.4 2.3 2
149 5.9 3.0 5.1 1.8 2

150 rows × 5 columns

# pearson :standard correlation coefficient 标准连续值相关度
# kendall : Kendall Tau correlation coefficient 离散值相关度
# spearman : Spearman rank correlation 做优化 做了秩的处理 0 1 5 100 -> 0 1 2 3
 
iris_df.corr(method='pearson')
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) cate
sepal length (cm) 1.000000 -0.117570 0.871754 0.817941 0.782561
sepal width (cm) -0.117570 1.000000 -0.428440 -0.366126 -0.426658
petal length (cm) 0.871754 -0.428440 1.000000 0.962865 0.949035
petal width (cm) 0.817941 -0.366126 0.962865 1.000000 0.956547
cate 0.782561 -0.426658 0.949035 0.956547 1.000000
sns.heatmap(iris_df.corr(method='pearson'),cmap=plt.cm.coolwarm,vmin = -1,vmax=1,annot=True)
<AxesSubplot:>

请添加图片描述

iris_df.corr(method='pearson').loc['cate'][:-1].plot(kind='bar')
# 每个特征和目标的相关性
plt.ylim([-1,1])
(-1.0, 1.0)

请添加图片描述

# 特征选择原则:
# 可以采用多种不同的特征选择方案对特征重要性进行评估
# 可以去选择公认的比较重要的
# 个别的认为比较重要的,可以先保留 再尝试

猜你喜欢

转载自blog.csdn.net/qq_33489955/article/details/124337843
今日推荐