python数据分析六:Series与DataFrame的计算

# -*- coding: utf-8 -*-
import pandas as pd

from pandas import Series,DataFrame

import numpy as np

'''
padans的最重要的一个功能是对不同的索引对象进行运算
'''
s1=Series([1,2,3,4],index=['a','b','c','d'])
s2=Series([5,6,7,8],index=['a','b','d','f'])
print(s1+s2)
# a     6.0
# b     8.0
# c     NaN
# d    11.0
# f     NaN
# dtype: float64

'''
DataFrame的运算
'''
df1=DataFrame(np.arange(9).reshape(3,3),index=['Oh','Te','Co'],columns=list('bcd'))
df2=DataFrame(np.arange(12).reshape(4,3),index=['Ut','Oh','Te','Or'],columns=list('bde'))
print(df1)
#     b  c  d
# Oh  0  1  2
# Te  3  4  5
# Co  6  7  8
print(df2)
#     b   d   e
# Ut  0   1   2
# Oh  3   4   5
# Te  6   7   8
# Or  9  10  11
'''
DataFrame相加是,只有列与行都对应时才能相加,但是和行,列的位置没关系
'''
print(df1+df2)
#    b   c     d   e
# Co  NaN NaN   NaN NaN
# Oh  3.0 NaN   6.0 NaN
# Or  NaN NaN   NaN NaN
# Te  9.0 NaN  12.0 NaN
# Ut  NaN NaN   NaN NaN


'''
DataFrame算数填充的方法,不使没有的默认值为NaN
'''
df1=DataFrame(np.arange(12).reshape(3,4),columns=list('abcd'))
print(df1)
#    a  b   c   d
# 0  0  1   2   3
# 1  4  5   6   7
# 2  8  9  10  11
df2=DataFrame(np.arange(20).reshape(4,5),columns=list('abcde'))
print(df2)
#   a   b   c   d   e
# 0   0   1   2   3   4
# 1   5   6   7   8   9
# 2  10  11  12  13  14
# 3  15  16  17  18  19
print(df1+df2)
#       a     b     c     d   e
# 0   0.0   2.0   4.0   6.0 NaN
# 1   9.0  11.0  13.0  15.0 NaN
# 2  18.0  20.0  22.0  24.0 NaN
# 3   NaN   NaN   NaN   NaN NaN

'''
设置值
'''
df3=df1.add(df2,fill_value=0)
print(df3)
#   a     b     c     d     e
# 0   0.0   2.0   4.0   6.0   4.0
# 1   9.0  11.0  13.0  15.0   9.0
# 2  18.0  20.0  22.0  24.0  14.0
# 3  15.0  16.0  17.0  18.0  19.0

#重新指定列值
df4=df1.reindex(columns=df2.columns,fill_value=0)
print(df4)
#    a  b   c   d  e
# 0  0  1   2   3  0
# 1  4  5   6   7  0
# 2  8  9  10  11  0

'''
DataFrame和Series的运算
'''

#numpy广播
arr=np.arange(12).reshape(3,4)
print(arr)
# [[ 0  1  2  3]
#  [ 4  5  6  7]
#  [ 8  9 10 11]]
print(arr[0])
# [0 1 2 3]
print(arr-arr[0])
# [[0 0 0 0]
#  [4 4 4 4]
#  [8 8 8 8]]


#DataFrame广播
frame=DataFrame(np.arange(12).reshape(4,3),columns=list('bde'),index=['Ut','Oh','Te','Or'])
print(frame)
#    b   d   e
# Ut  0   1   2
# Oh  3   4   5
# Te  6   7   8
# Or  9  10  11
series=frame.ix[0]
print(series)
# b    0
# d    1
# e    2
# Name: Ut, dtype: int32

print(frame-series)
#   b  d  e
# Ut  0  0  0
# Oh  3  3  3
# Te  6  6  6
# Or  9  9  9

series2=Series(range(3),index=list('bef'))
print(series2)
# b    0
# e    1
# f    2
# dtype: int64

#相加时行列不对应的时候为NaN
print(series2+frame)
#       b   d     e   f
# Ut  0.0 NaN   3.0 NaN
# Oh  3.0 NaN   6.0 NaN
# Te  6.0 NaN   9.0 NaN
# Or  9.0 NaN  12.0 NaN


series3=frame['d']
print(series3)

print(frame.sub(series3,axis=0))
#     b  d  e
# Ut -1  0  1
# Oh -1  0  1
# Te -1  0  1
# Or -1  0  1

猜你喜欢

转载自blog.csdn.net/qq_38788128/article/details/80652921