ML之FE:特征工程中常用的一些处理手段(缺失值填充、异常值检测等)及其对应的底层代码的实现

ML之FE:特征工程中常用的一些处理手段(缺失值填充、异常值检测等)及其对应的底层代码的实现

目录

特征工程中常用的一些处理手段(缺失值填充、异常值检测等)及其对应的底层代码的实现

缺失值填充

fillna(self, value=None, method=None, axis=None,  inplace=False, limit=None, downcast=None, **kwargs)


特征工程中常用的一些处理手段(缺失值填充、异常值检测等)及其对应的底层代码的实现

缺失值填充

df = pd.read_csv('test01.csv')
print(df['feature01'])
df['feature02'] = df['feature01'].fillna(-1).astype(int)
print(df['feature02'])

fillna(self, value=None, method=None, axis=None,  inplace=False, limit=None, downcast=None, **kwargs)

@Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
def fillna(self, value=None, method=None, axis=None, 
 inplace=False, 
    limit=None, downcast=None, **kwargs):
    return super(DataFrame, 
        self).fillna(value=value, method=method, axis=axis, 
        inplace=inplace, limit=limit, 
        downcast=downcast, **kwargs)


df.fillna()
@Appender(_shared_docs['fillna'] % _shared_doc_kwargs)
def fillna(self, value=None, method=None, axis=None, 
 inplace=False, 
    limit=None, downcast=None):
    inplace = validate_bool_kwarg(inplace, 'inplace')
    if isinstance(value, (list, tuple)):
        raise TypeError('"value" parameter must be a scalar or 
         dict, but '
            'you passed a "{0}"'.format(type(value).__name__))
    self._consolidate_inplace()
    # set the default here, so functions examining the signaure
    # can detect if something was set (e.g. in groupby) 
     (GH9221)
    if axis is None:
        axis = 0
    axis = self._get_axis_number(axis)
    method = missing.clean_fill_method(method)
    from pandas import DataFrame
    if value is None:
        if method is None:
            raise ValueError('must specify a fill method or value')
        if self._is_mixed_type and axis == 1:
            if inplace:
                raise NotImplementedError()
            result = self.T.fillna(method=method, limit=limit).T
            # need to downcast here because of all of the 
             transposes
            result._data = result._data.downcast()
            return result
        # > 3d
        if self.ndim > 3:
            raise NotImplementedError('Cannot fillna with a 
             method for > '
                '3dims')
        # 3d
        elif self.ndim == 3:
            # fill in 2d chunks
            result = dict([(col, s.fillna(method=method, 
             value=value))
                    for (col, s) in self.iteritems()])
            new_obj = self._constructor.from_dict(result).
             __finalize__(self)
            new_data = new_obj._data
        else:
            # 2d or less
            method = missing.clean_fill_method(method)
            new_data = self._data.interpolate(method=method, 
             axis=axis, 
                limit=limit, inplace=inplace, 
                coerce=True, 
                downcast=downcast)
    else:
        if method is not None:
            raise ValueError('cannot specify both a fill method 
             and value')
        if len(self._get_axis(axis)) == 0:
            return self
        if self.ndim == 1:
            if isinstance(value, (dict, ABCSeries)):
                from pandas import Series
                value = Series(value)
            elif not is_list_like(value):
                pass
            else:
                raise ValueError("invalid fill value with a %s" % 
                    type(value))
            new_data = self._data.fillna(value=value, limit=limit, 
                inplace=inplace, 
                downcast=downcast)
        elif isinstance(value, (dict, ABCSeries)):
            if axis == 1:
                raise NotImplementedError('Currently only can fill '
                    'with dict/Series column '
                    'by column')
            result = self if inplace else self.copy()
            for k, v in compat.iteritems(value):
                if k not in result:
                    continue
                obj = result[k]
                obj.fillna(v, limit=limit, inplace=True, 
                 downcast=downcast)
            
            return result
        elif not is_list_like(value):
            new_data = self._data.fillna(value=value, limit=limit, 
                inplace=inplace, 
                downcast=downcast)
        elif isinstance(value, DataFrame) and self.ndim == 2:
            new_data = self.where(self.notnull(), value)
        else:
            raise ValueError("invalid fill value with a %s" % type
             (value))
    if inplace:
        self._update_inplace(new_data)
    else:
        return self._constructor(new_data).__finalize__(self)



Enter: apply completion.
  + Ctrl: remove arguments and replace current word (no 
   Pop-up focus).
  + Shift: remove arguments (requires Pop-up focus).
发布了1664 篇原创文章 · 获赞 7398 · 访问量 1342万+

猜你喜欢

转载自blog.csdn.net/qq_41185868/article/details/105375109