python数据分析九:pandas层次化索引

# -*- coding: utf-8 -*-
import pandas as pd

from pandas import Series,DataFrame

import numpy as np
'''
数据分类
'''
data=Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,6,2]])
print(data)
# a  1   -0.130178
#    2    0.137005
#    3    1.941608
# b  1    0.768033
#    2    0.739661
#    3   -0.227032
# c  1   -0.106498
#    2    0.158814
# d  6   -0.291577
#    2    0.170918

#获取子集
print(data['a'])
# 1   -0.322706
# 2    0.474766
# 3   -1.054212
# dtype: float64

print(data['a':'c'])
# a  1    0.080364
#    2    0.216486
#    3   -0.038789
# b  1    1.365482
#    2   -0.283908
#    3    1.202427
# c  1    1.054704
#    2    0.120473

print(data[['a','c']])
# a  1    1.753260
#    2   -0.169620
#    3   -0.263574
# c  1   -0.840767
#    2   -0.901693

#获取内层
print(data[:,2])
# a    0.137005
# b    0.739661
# c    0.158814
# d    0.170918

#生成表格
print(data.unstack())
#  1         2         3         6
# a -0.130178  0.137005  1.941608       NaN
# b  0.768033  0.739661 -0.227032       NaN
# c -0.106498  0.158814       NaN       NaN
# d       NaN  0.170918       NaN -0.291577

#生成表格的逆运算
print(data.unstack().stack())
# a  1   -0.130178
#    2    0.137005
#    3    1.941608
# b  1    0.768033
#    2    0.739661
#    3   -0.227032
# c  1   -0.106498
#    2    0.158814
# d  2    0.170918
#    6   -0.291577


'''
dataFrame分类
'''
data=DataFrame(np.arange(12).reshape(4,3),index=[['a','a','b','c'],['1','2','1','2']],columns=[['hhb','hhb','zjx'],['man','men','man']])
print(data)

#   hhb       zjx
#     man men wemen
# a 1   0   1     2
#   2   3   4     5
# b 1   6   7     8
# c 2   9  10    11

#给各层目录指定名称
data.index.names=['str','num']
data.columns.names=['name','sex']
print(data)

# name    hhb       zjx
# sex     man men wemen
# str num
# a   1     0   1     2
#     2     3   4     5
# b   1     6   7     8
# c   2     9  10    11


#进行筛查获取
print(data['hhb'])
# sex      man  men
# str num
# a   1      0    1
#     2      3    4
# b   1      6    7
# c   2      9   10

print(data.T)
# str         a     b   c
# num         1  2  1   2
# name sex
# hhb  man    0  3  6   9
#      men    1  4  7  10
# zjx  wemen  2  5  8  11



'''
重新指定分级顺序,有时需求会调换分级顺序
'''
print(data.swaplevel('str','num'))
# sex     man men wemen
# num str
# 1   a     0   1     2
# 2   a     3   4     5
# 1   b     6   7     8
# 2   c     9  10    11


#层次重排进行汇总
print(data.sortlevel(1))
# name    hhb       zjx
# sex     man men wemen
# str num
# a   1     0   1     2
# b   1     6   7     8
# a   2     3   4     5
# c   2     9  10    11

print(data.swaplevel(0,1).sortlevel(0))
# name    hhb       zjx
# sex     man men wemen
# num str
# 1   a     0   1     2
#     b     6   7     8
# 2   a     3   4     5
#     c     9  10    11



'''

统计
'''
print(data.sum(level='num'))
# name hhb     zjx
# sex  man men man
# num
# 1      6   8  10
# 2     12  14  16

print(data.sum(level='sex',axis=1))
# sex      man  men
# str num
# a   1      2    1
#     2      8    4
# b   1     14    7
# c   2     20   10

'''
Dataframe的列
'''

frame=DataFrame({'a':range(7),'b':range(7,0,-1),'c':['one','one','one','two','two','two','two'],'d':[0,1,2,0,1,2,3]})
print(frame)
#   a  b    c  d
# 0  0  7  one  0
# 1  1  6  one  1
# 2  2  5  one  2
# 3  3  4  two  0
# 4  4  3  two  1
# 5  5  2  two  2
# 6  6  1  two  3

#将set_index函数将一个或多个列索引转换为行索引,并创建一个新的Dataframe
data=frame.set_index(['c','d'])
print(data)
#     a  b
# c   d
# one 0  0  7
#     1  1  6
#     2  2  5
# two 0  3  4
#     1  4  3
#     2  5  2
#     3  6  1

#保存原有的值
data2=frame.set_index(['c','d'],drop=False)
print(data2)
#     a  b    c  d
# c   d
# one 0  0  7  one  0
#     1  1  6  one  1
#     2  2  5  one  2
# two 0  3  4  two  0
#     1  4  3  two  1
#     2  5  2  two  2
#     3  6  1  two  3


#还原层次化的列
frame=data.reset_index()
print(frame)
#    c  d  a  b
# 0  one  0  0  7
# 1  one  1  1  6
# 2  one  2  2  5
# 3  two  0  3  4
# 4  two  1  4  3
# 5  two  2  5  2
# 6  two  3  6  1

# from pandas_datareader import data, wb
#
# #我们使用pandas_datareader获得苹果股票信息:
#
# import pandas_datareader as pdr
#
# print(pdr.get_data_yahoo('AAPL'))

猜你喜欢

转载自blog.csdn.net/qq_38788128/article/details/80695563