def get_launch_feature(data, day_start, day_end):
features = pd.Series()
features['user_id'] = data['user_id'].iloc(0)
features['登录次数'] = data.shape[0]
features['最后一次登录时间差'] = day_end - data['day'].max()
return features
def read_launch_log(day_start, day_end):
launch_log = pd.read_csv(INPUT_FILE + 'app_launch_log.csv', sep='\t', names=['user_id', 'day'])
launch_log = launch_log[(launch_log['day'] >= day_start) & (launch_log['day'] <= day_end)]
launch_log = launch_log.groupby('user_id')
data = launch_log.apply(get_launch_feature, day_start=day_start, day_end=day_end)
print(data.shape)
pandas中apply函数的使用
猜你喜欢
转载自blog.csdn.net/qq_25987491/article/details/81261546
今日推荐
周排行