Python中把多维数组展开成DataFrame

import numpy as np
import pandas as pd

################# 准备数据 #################
a1 = np.arange(1,101)
a3 = a1.reshape((2,5,10))
a3
'''
array([[[  1,   2,   3,   4,   5,   6,   7,   8,   9,  10],
        [ 11,  12,  13,  14,  15,  16,  17,  18,  19,  20],
        [ 21,  22,  23,  24,  25,  26,  27,  28,  29,  30],
        [ 31,  32,  33,  34,  35,  36,  37,  38,  39,  40],
        [ 41,  42,  43,  44,  45,  46,  47,  48,  49,  50]],        
       [[ 51,  52,  53,  54,  55,  56,  57,  58,  59,  60],
        [ 61,  62,  63,  64,  65,  66,  67,  68,  69,  70],
        [ 71,  72,  73,  74,  75,  76,  77,  78,  79,  80],
        [ 81,  82,  83,  84,  85,  86,  87,  88,  89,  90],
        [ 91,  92,  93,  94,  95,  96,  97,  98,  99, 100]]])
'''

################# 准备标签 #################
# 第 1 维的标签
index1 = pd.Series(np.arange(1,11))
index1 = index1.astype(str)
index1 = 'A'+index1
index1
'''
0     A1
1     A2
2     A3
3     A4
4     A5
5     A6
6     A7
7     A8
8     A9
9    A10
'''

# 第 2 维的标签
index2 = pd.Series(np.arange(1,6))
index2 = index2.astype(str)
index2 = 'B'+index2
index2
'''
0    B1
1    B2
2    B3
3    B4
4    B5
'''

# 第 3 维的标签
index3 = pd.Series(np.arange(1,3))
index3 = index3.astype(str)
index3 = 'C'+index3
index3
'''
0    C1
1    C2
'''

################# 展开数据 #################
# 把三维数组展开
value = a3.flatten()
value = pd.Series(value)
value.name = 'value'
value
'''
0       1
1       2
2       3
     ... 
97     98
98     99
99    100
Name: value, Length: 100, dtype: int64
'''

################# 展开标签 #################
import itertools

# index的笛卡尔乘积。注意:高维在前,低维在后
prod = itertools.product(index3, index2, index1 )
# 转换为DataFrame
prod = pd.DataFrame([x for x in prod])
prod.columns = ['C', 'B', 'A']
prod.T
'''
   0   1   2   3   4   5   6   7   8    9  ...   90  91  92  93  94  95  96  \
C  C1  C1  C1  C1  C1  C1  C1  C1  C1   C1 ...   C2  C2  C2  C2  C2  C2  C2   
B  B1  B1  B1  B1  B1  B1  B1  B1  B1   B1 ...   B5  B5  B5  B5  B5  B5  B5   
A  A1  A2  A3  A4  A5  A6  A7  A8  A9  A10 ...   A1  A2  A3  A4  A5  A6  A7   
   97  98   99  
C  C2  C2   C2  
B  B5  B5   B5  
A  A8  A9  A10  
[3 rows x 100 columns]
'''

################# 最终数据 #################
# 合并成一个DataFrame
pd.concat([prod, value], axis=1)
'''
     C   B    A  value
0   C1  B1   A1      1
1   C1  B1   A2      2
2   C1  B1   A3      3
3   C1  B1   A4      4
4   C1  B1   A5      5
5   C1  B1   A6      6
6   C1  B1   A7      7
7   C1  B1   A8      8
8   C1  B1   A9      9
9   C1  B1  A10     10
10  C1  B2   A1     11
11  C1  B2   A2     12
12  C1  B2   A3     13
13  C1  B2   A4     14
14  C1  B2   A5     15
15  C1  B2   A6     16
16  C1  B2   A7     17
17  C1  B2   A8     18
18  C1  B2   A9     19
19  C1  B2  A10     20
20  C1  B3   A1     21
21  C1  B3   A2     22
22  C1  B3   A3     23
23  C1  B3   A4     24
24  C1  B3   A5     25
25  C1  B3   A6     26
26  C1  B3   A7     27
27  C1  B3   A8     28
28  C1  B3   A9     29
29  C1  B3  A10     30
..  ..  ..  ...    ...
70  C2  B3   A1     71
71  C2  B3   A2     72
72  C2  B3   A3     73
73  C2  B3   A4     74
74  C2  B3   A5     75
75  C2  B3   A6     76
76  C2  B3   A7     77
77  C2  B3   A8     78
78  C2  B3   A9     79
79  C2  B3  A10     80
80  C2  B4   A1     81
81  C2  B4   A2     82
82  C2  B4   A3     83
83  C2  B4   A4     84
84  C2  B4   A5     85
85  C2  B4   A6     86
86  C2  B4   A7     87
87  C2  B4   A8     88
88  C2  B4   A9     89
89  C2  B4  A10     90
90  C2  B5   A1     91
91  C2  B5   A2     92
92  C2  B5   A3     93
93  C2  B5   A4     94
94  C2  B5   A5     95
95  C2  B5   A6     96
96  C2  B5   A7     97
97  C2  B5   A8     98
98  C2  B5   A9     99
99  C2  B5  A10    100
[100 rows x 4 columns]
'''



猜你喜欢

转载自blog.csdn.net/lyghe/article/details/80827743
今日推荐