pandas calculates the proportion of all values in a dimension to the total value

For example, I have a box office data:

Types of box office
Plot, disaster 2913118
War history 3094524
Drama, comedy 3099961
Plot 3176119

Based on the known box office information, I want to know what percentage of the total box office can be captured by different types of films. The effect is as follows:
Insert picture description here

import pandas as pd


def split_and_sum(dataframe: pd.DataFrame, column: str, sum_column: str):
    def get_all_type(type_se):
        """获得全部的类型"""
        type_info = type_se[column]
        type_list = type_info.split(',')
        for i in type_list:
            if i in all_data.index:
                all_data.loc[i, 0] += type_se[sum_column]
            else:
                all_data.loc[i] = type_se[sum_column]

    def set_value(type_se):
        """为对应的类型赋值"""
        type_info = type_se[column]
        type_list = type_info.split(',')
        type_value = 0.0
        for i in type_list:
            type_value += data_dict[i]
        return type_value

    all_data: pd.DataFrame = pd.DataFrame([['test']], index=['test'])
    dataframe.apply(get_all_type, axis=1)
    all_data.drop('test', inplace=True)  # 删除第一行
    all_data.loc[:, 0] = all_data.loc[:, 0] / all_data.loc[:, 0].sum()  # 计算比值
    data_dict = all_data.to_dict()[0]
    series = dataframe.apply(set_value, axis=1)
    return series


if __name__ == '__main__':
    df: pd.DataFrame = pd.DataFrame([
        ['剧情, 灾难', 2913118, ],
        ['战争, 历史', 3094524, ],
        ['剧情, 喜剧', 3099961, ],
        ['剧情,', 3176119, ],
    ], columns=['type', 'box_office'])

    df['type_value'] = split_and_sum(df, 'type', 'box_office')

Guess you like

Origin blog.csdn.net/weixin_35757704/article/details/114977031