def函数变量变化

版权声明:本文为博主原创文章,未经博主允许不得转载。https://blog.csdn.net/yukyin https://blog.csdn.net/yukyin/article/details/82945520

介绍下本人。中山大学,医学生+计科学生的集合体,机器学习爱好者。
一、我们容易搞混但是理解的def和return变量变化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 16:39
# @Author:Yolanda
# @File  :ceshi1.py

#反击啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
#full变,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
full=feature_count(data)
print(full)
print(data)
print(data.keys())
exit(0)
#full为空,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data)
print(full)#none
print(data)
print(data.keys())
exit(0)
#full为空,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
data=feature_count(data)#这样写相当于没有return出一个值,也就是用空值替换了原data,原data变为空
print(data)
print(data)
print(data.keys())#nonetype没有keys
exit(0)
#full为空,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data)
print(full)#none
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return data
data=feature_count(data)#这样写就很混乱了,return的应该是def内定义的变量名
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data变(因为下面data=feature_count(data)赋值了data)
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
data=feature_count(data)
print(data)
print(data.keys())
exit(0)
#full空。data不变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
full=feature_count(data)
print(full)
print(data)
print(data.keys())
exit(0)
#data变——错了,data不变!
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
feature_count(data)
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
feature_count(data)
print(data)
print(data.keys())
exit(0)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full变,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#full空,data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
    return full
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(full):
    data_cate = full.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = full.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        full = full.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(full)
    print(full.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
data=feature_count(data.copy())
print(data)
print(data.keys())
exit(0)

#full为空,data不变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
full=feature_count(data.copy())
print(full)
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
    return data
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)
#data不变
#统计特征
count_feature_list = []
def feature_count(data):
    data_cate = data.nunique().reset_index().rename(columns={0: 'data_nunique'})
    print(data_cate)
    nunique2features=data_cate[data_cate['data_nunique']>3]['index']
    print(nunique2features)
    for i in nunique2features:
        temp = data.groupby(i).size().reset_index().rename(columns={0: 'cnt_' + str(i)})
        # print(temp)
        data = data.merge(temp, 'left')
        new_feature = 'cnt_' + i
        # print(new_feature)
        count_feature_list.append(new_feature)
    print(count_feature_list)
    print(data)
    print(data.keys())
feature_count(data.copy())
print(data)
print(data.keys())
exit(0)

二、让我们看一个有趣的事情。我们容易搞混但是难以理解的def和return变量变化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 8:20
# @Author:Yolanda
# @File  :ceshi.py

#反击啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
full=temp(data)
print(full)
print(data)
exit(0)
#变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
temp(data)
print(data)
exit(0)
#变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
data=temp(data)
print(data)
exit(0)
#第一个变了,第二个是none
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
data=temp(data)
print(data)
exit(0)
#变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
temp(data)
print(data)
exit(0)
#变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
temp(data)
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#不变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
full=temp(data.copy())
print(full)
print(data)
exit(0)
#不变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
temp(data.copy())
print(data)
exit(0)
#不变
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
temp(data.copy())
print(data)
exit(0)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
data=temp(data.copy())
print(data)
exit(0)
#第一个变,第二个none
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
data=temp(data.copy())
print(data)
exit(0)
#不变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
    return data
temp(data.copy())
print(data)
exit(0)
#不变
def temp(data):
    data['user_tags']=data['user_tags'].fillna(str(-1))#
    print(data)
temp(data.copy())
print(data)
exit(0)

三、让我们接着看一个更有趣的事情。我们容易搞混但是更加无法理解的def和return变量变化

#!/usr/bin/env python
# -*- coding:utf8 -*-
# @TIME  :2018/10/3 8:20
# @Author:Yolanda
# @File  :ceshi.py

#反击啊!少女
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from tqdm import tqdm

path = '/home/lab/cyy/ceshi35/kdxf/data'
train = pd.read_table(path + '/train.txt')
test = pd.read_table(path + '/test.txt')
data = pd.concat([train, test], axis=0, ignore_index=True)
# print(data)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#full变,data不变(类似count)
def temp(full):
    full=full.fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#full变,data变(这种情况想要data不变,只能传入data.copy()),可能涉及到pandas底层
def temp(full):
    full['user_tags']=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)
#full变(变成user_tags一列),data不变(类似count)
def temp(full):
    full=full['user_tags'].fillna(str(-1))#
    print(full)
    return full
full=temp(data)
print(full)
print(data)
exit(0)

我猜可能涉及到pandas底层,有懂的大佬来指点小白一下~~总结到此,理解基础上多加练习。

猜你喜欢

转载自blog.csdn.net/yukyin/article/details/82945520