Pandas data selection

place

loc uses the index name and column name to select the row and column. It receives 2 parameters. The first parameter is used to select the index, and the second parameter is used to select the column.

import pandas as pd
import numpy as np

# 设置种子,让每次测试数据一样
np.random.seed(147258)

dates = pd.date_range('20200101', periods=5)
# 数据范围[0,20) 5行4列 二维数组
data = np.random.randint(0, 20, (5, 4))

df = pd.DataFrame(data, index=dates, columns=list('ABCD'))
print(df)
print("--------------------")

# 索引为20200101的A、B两列
print(df.loc['20200101', ['A', 'B']])
print("--------------------")

# 所有行的A、C两列
print(df.loc[:, ['A', 'C']])
print("--------------------")

# 索引'20200101'到'20200103'行,A到C列
print(df.loc['20200101':'20200103', 'A':'C'])
print("--------------------")

# '20200101':'20200103'行,所有列
print(df.loc['20200101':'20200103', :])
print("--------------------")

# 指定行指定列
print(df.loc['20200101', 'A'])

place

iloc

iloc and loc are basically the same, but instead of using names, they use location to select. The location starts from 0.

import pandas as pd
import numpy as np

# 设置种子,让每次测试数据一样
np.random.seed(147258)

dates = pd.date_range('20200101', periods=5)
# 数据范围[0,20) 5行4列 二维数组
data = np.random.randint(0, 20, (5, 4))

df = pd.DataFrame(data, index=dates, columns=list('ABCD'))
print(df)
print("--------------------")

# 第一行
print(df.iloc[0])
print("--------------------")

# 所有行的前两列
print(df.iloc[:, 0:2])
print("--------------------")

# 前2行 所有列
print(df.iloc[0:2, :])
print("--------------------")

# 2到5行 0到2列
print(df.iloc[1:4, 0:2])
print("--------------------")

# 2、3、5行,2、4列
print(df.iloc[[1, 2, 4], [1,3]])
print("--------------------")

# 指定行指定列,1行1列
print(df.iloc[0, 0])

iat and at

at is used to select the value of the specified row and specified column, at is selected by name, iat is selected by location

import pandas as pd
import numpy as np

# 设置种子,让每次测试数据一样
np.random.seed(147258)

dates = pd.date_range('20200101', periods=5)
# 数据范围[0,20) 5行4列 二维数组
data = np.random.randint(0, 20, (5, 4))

df = pd.DataFrame(data, index=dates, columns=list('ABCD'))
print(df)
print("--------------------")
print(df.at['20200101', 'A'])
print(df.iat[0, 0])

Shorthand

import pandas as pd
import numpy as np

# 设置种子,让每次测试数据一样
np.random.seed(147258)

dates = pd.date_range('20200101', periods=5)
# 数据范围[0,20) 5行4列 二维数组
data = np.random.randint(0, 20, (5, 4))

df = pd.DataFrame(data, index=dates, columns=list('ABCD'))
print(df)
print("--------------------")

# B列
print(df['B'])
print("--------------------")

# C与D列
print(df[['C', 'D']])
print("--------------------")

# 前3行
print(df[0:3])
print("--------------------")

# '20200101'到'20200103'行
print(df['20200101':'20200103'])

Guess you like

Origin blog.csdn.net/trayvontang/article/details/103787620