Pandas入门学习之一 Series和DataFrame

参考:https://www.jianshu.com/u/ff0242a57145

import pandas as pd
import numpy as np

# 1. 合理应用 dir 和help
# print(dir(pd))
# print(help(pd.Series))

# 2. Series
# 2.1 list创建
S_with_user_label = pd.Series([1,2,3,4,5],['a','b','c','a','b'])
S_with_default_label = pd.Series([1,2,3,4,5])
print(S_with_default_label)
print(S_with_user_label)
# 2.2 dict创建,无index按键来创建,有index按index创建
S_with_user_label_1 = pd.Series({'a': 1, 'b': 2, 'c': 3})
print(S_with_user_label_1)
# 2.3 scalar创建相同值
S_from_scalar = pd.Series(5, ['a', 'b', 'c'])
print(S_from_scalar)
# 2.4 operation
# 2.4.1 slice
random_data = np.random.rand(1, 5)  # np.random.rand(5)区别
print(random_data[0])
print(random_data[0, 4])
Series = pd.Series(random_data[0], index=['A', 'B', 'C', 'D', 'E'])
print(Series)
print(Series[1])
print(Series[1:3])
print("Max is %f, Min is %f, Median is %f" % (Series.max(), Series.min(), Series.median()))
print(Series[Series > Series.mean()])  # 取大于平均值的值
print(Series[[3, 1]])  # 索引为list 按list迭代查询
# 2.4.2  作为Numpy函数的输入参数
print(np.exp(Series))
print(np.sin(Series))
# 2.4.3 Dict 利用键值对来引用
print(Series['A'])  # 利用label查看
Series['G'] = 10  # 修改  有则改无则添
print('B' in Series)
Series.at['F'] = -5 #  添加
print(Series)
# 2.4.4 + - * / 只有标签相同才可以
print(Series+Series)
print(Series*3)

# 3 DataFrame  index columns values
# 3.1 创建dict + series
Data = {"A": pd.Series([1, 2, 3], index=['a', 'b', 'c']), "B": [4, 5, 6], "C": [7, 8, 9]}
DataFra = pd.DataFrame(Data)
print(Data)
print(DataFra)
print(DataFra.index, DataFra.columns, DataFra.values)
# 3.2 创建dict + index
Data1 = {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}
DataFra_1 = pd.DataFrame(Data1, index=['A1', 'B1', 'C1'])
print(DataFra_1)
# 3.3 创建 np.ndarray + index
Data2 = np.zeros((3,), dtype=[("A1", "f4"), ("B1", "f4"), ("C1", "U10")])
Data2[:] = [(1, 2, 'hello'), (3, 4, 'world'), (5, 6, 'ly')]
DataFra_2 = pd.DataFrame(Data2, index=['AA1', 'BB1', 'CC1'])
print(DataFra_2 )
# 3.4 理解 i4 f8 c10 a25 u25
dt = np.dtype('i4')  # 查
# 3.5 基本操作
print(DataFra_2['A1'])  # index列索引  无columns行索引
DataFra_2['D1'] = [7, 8, 'liu'] # 添
DataFra_2.insert(2, 'AA', [11, 22, 33]) #添
print(DataFra_2)
del DataFra_2['D1']  # 无返回值
print(DataFra_2)
A =DataFra_2.pop('AA')  # 有返回值
print(DataFra_2)
print(DataFra_2.loc['AA1'])  #按行键索引
print(DataFra_2.iloc[1])     #按行序号索引
DataFra_2.drop(index=['CC1'], inplace=True) #删除C1列
print(DataFra_2)
DataFra_2.loc['C1']=[111, 222, '333']
print(DataFra_2)
# 3.6 常用属性、方法和运算    .csv文件
data_new = pd.read_csv('iris.csv')
print(data_new.describe())
print(data_new.head(3))
print(data_new.tail(3))

猜你喜欢

转载自blog.csdn.net/liuyang_1106/article/details/88375764