2. pandas学习笔记DataFrame入门

版权声明:转载注明出处 https://blog.csdn.net/deephacking/article/details/82563422
import numpy as np
import pandas as pd

# 从csv文件中读取数据
grades_info = pd.read_csv('Grade.csv')
# 打印pandas的类型:DataFrame
print(type(grades_info))
# 打印数据类型
print(grades_info.dtypes)
# 打印数据的head,只显示前5条参数
print(grades_info.head())
# 打印数据的前十行
print(grades_info.head(10))
# 数据前5行分割
grades_head = grades_info.head(5)
print(grades_head)
# 打印数据的尾十行
grades_tail = grades_info.tail(10)
print(grades_tail)
# 打印列名
print(grades_info.columns)
# 打印矩阵维度
print(grades_info.shape)


# 取数据,按条取
# 取第0条
data0 = grades_info.loc[0]
print(type(data0))
# 取多条,取第3条-第9条数据
data1 = grades_info.loc[2:8]
print(data1)
# 取多条,取第3、5、7条数据
data2 = grades_info.loc[[3, 5, 7]]
print(data2)

# 按列取数据
names = grades_info["name"]
print(names)
# 取多列数据
data3 = grades_info[["name", "patient ID"]]
print(data3)

columns_name = grades_info.columns.tolist()
print(columns_name)

# 寻找列名带)的列,并重新打印
newcolumns = []
for c in columns_name:
    if c.endswith(")"):
        newcolumns.append(c)
print(grades_info[newcolumns])

# 对列进行运算
patient_new_ID = grades_info["patient ID"] + 6
print(patient_new_ID)
# 新建一列
grades_info["patient new ID"] = patient_new_ID
print(grades_info.columns.tolist())
# 求某列最大值
max_patient_ID = grades_info["patient ID"].max()
print(max_patient_ID)
# 让某一列归一化,列值除以最大值
normalized_patient_ID = grades_info["patient ID"] / grades_info["patient ID"].max()
print(normalized_patient_ID)

# pandas 数据预处理
# 对数据进行排序, inplace=True 表示在原表上排序, ascending=False 表示降序
grades_info.sort_values("patient ID", inplace=True, ascending=False)
print(grades_info)



猜你喜欢

转载自blog.csdn.net/deephacking/article/details/82563422