当多个变量被存储为列名和列值时进行清理

import pandas as pd
# 读取sensors数据集
sensors = pd.read_csv('data/sensors.csv')
sensors
  Group Property 2012 2013 2014 2015 2016
0 A Pressure 928 873 814 973 870
1 A Temperature 1026 1038 1009 1036 1042
2 A Flow 819 806 861 882 856
3 B Pressure 817 877 914 806 942
4 B Temperature 1008 1041 1009 1002 1013
5 B Flow 887 899 837 824 873
# 用melt清理数据
sensors.melt(id_vars=['Group', 'Property'], var_name='Year').head(6)
  Group Property Year value
0 A Pressure 2012 928
1 A Temperature 2012 1026
2 A Flow 2012 819
3 B Pressure 2012 817
4 B Temperature 2012 1008
5 B Flow 2012 887
# 用pivot_table,将Property列转化为新的列名
sensors.melt(id_vars=['Group', 'Property'], var_name='Year') \
.pivot_table(index=['Group', 'Year'], columns='Property', values='value')\
.reset_index() \
.rename_axis(None, axis='columns')
  Group Year Flow Pressure Temperature
0 A 2012 819 928 1026
1 A 2013 806 873 1038
2 A 2014 861 814 1009
3 A 2015 882 973 1036
4 A 2016 856 870 1042
5 B 2012 887 817 1008
6 B 2013 899 877 1041
7 B 2014 837 914 1009
8 B 2015 824 806 1002
9 B 2016 873 942 1013
# 用stack和unstack实现上述方法
sensors.set_index(['Group', 'Property']) \
                .stack() \
                .unstack('Property') \
                .rename_axis(['Group', 'Year'], axis='index') \
                .rename_axis(None, axis='columns') \
                .reset_index()
  Group Year Flow Pressure Temperature
0 A 2012 819 928 1026
1 A 2013 806 873 1038
2 A 2014 861 814 1009
3 A 2015 882 973 1036
4 A 2016 856 870 1042
5 B 2012 887 817 1008
6 B 2013 899 877 1041
7 B 2014 837 914 1009
8 B 2015 824 806 1002
9 B 2016 873 942 1013

猜你喜欢

转载自blog.csdn.net/weixin_48135624/article/details/114195719