用户次数统计特征
import pandas as pd
train = pd.read_csv('train.csv')
test1 = pd.read_csv('test1.csv')
test2 = pd.read_csv('test2.csv')
test=pd.concat([test1,test2])
test['label'] = -1
all_data = pd.concat([train, test])
print(all_data.head())
aid_627 = all_data[all_data.aid == 627]
print(len(aid_627))
u_17557009 = all_data[all_data.uid == 17557009]
print(len(u_17557009))
uid_count = all_data.groupby('uid')['label'].agg(['count']).reset_index()
aid_count = all_data.groupby('aid')['label'].agg(['count']).reset_index()
all_data = pd.merge(all_data, uid_count, how='left', on='uid')
all_data = pd.merge(all_data, aid_count, how='left', on='aid')
print(all_data.head())
all_data = all_data[['count_x','count_y']]
all_data.columns=['uid_count','aid_count']
print(all_data.head())
all_data.to_csv('./SDD_data/sdd_uid_count.csv',header=True,index=False)