腾讯广告算法赛Top4—SDD特征处理1

用户次数统计特征

import pandas as pd

train = pd.read_csv('train.csv')
test1 = pd.read_csv('test1.csv')
test2 = pd.read_csv('test2.csv')
test=pd.concat([test1,test2])
test['label'] = -1
all_data = pd.concat([train, test])
print(all_data.head())

aid_627 = all_data[all_data.aid == 627]
print(len(aid_627))
u_17557009 = all_data[all_data.uid == 17557009]
print(len(u_17557009))

uid_count = all_data.groupby('uid')['label'].agg(['count']).reset_index()
aid_count = all_data.groupby('aid')['label'].agg(['count']).reset_index()

all_data = pd.merge(all_data, uid_count, how='left', on='uid')
all_data = pd.merge(all_data, aid_count, how='left', on='aid')


print(all_data.head())
all_data = all_data[['count_x','count_y']]
all_data.columns=['uid_count','aid_count']
print(all_data.head())
all_data.to_csv('./SDD_data/sdd_uid_count.csv',header=True,index=False)

猜你喜欢

转载自blog.csdn.net/ML_SDD/article/details/81702015
今日推荐