版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/made_in_china_too/article/details/82079313
# Series 对象可以理解为一维数组
s = pd.Series([4, 2, 5, 0, 6, 3])
s
0 4
1 2
2 5
3 0
4 6
5 3
dtype: int64
# DataFrame 是二维数组对象
df = pd.DataFrame(np.random.randn(6,4), columns=list('ABCD'))
df
A B C D
0 0.968762 1.501239 -0.284952 -0.456468
1 1.413471 -0.309746 0.407559 1.536548
2 -0.399065 -0.040439 1.339359 -0.318217
3 -0.152205 -0.121888 0.841658 -1.493958
4 0.248414 -0.676985 1.326487 -0.455541
5 0.906221 -2.158694 -0.201354 -0.024769
df.iloc[0]
A 0.968762
B 1.501239
C -0.284952
D -0.456468
Name: 0, dtype: float64
df.A
0 0.968762
1 1.413471
2 -0.399065
3 -0.152205
4 0.248414
5 0.906221
Name: A, dtype: float64
print("Row data type: {}".format(type(df.iloc[0])))
print("Column data type: {}".format(type(df.A)))
Row data type: <class 'pandas.core.series.Series'>
Column data type: <class 'pandas.core.series.Series'>
df.shape
(6, 4)
df.head(3)
A B C D
0 0.968762 1.501239 -0.284952 -0.456468
1 1.413471 -0.309746 0.407559 1.536548
2 -0.399065 -0.040439 1.339359 -0.318217
df.tail(2)
A B C D
4 0.248414 -0.676985 1.326487 -0.455541
5 0.906221 -2.158694 -0.201354 -0.024769
df.columns
Index([u'A', u'B', u'C', u'D'], dtype='object')
df.index
RangeIndex(start=0, stop=6, step=1)
df.describe()
A B C D
count 6.000000 6.000000 6.000000 6.000000
mean 0.497600 -0.301086 0.571459 -0.202068
std 0.709385 1.178177 0.719762 0.986474
min -0.399065 -2.158694 -0.284952 -1.493958
25% -0.052050 -0.585176 -0.049126 -0.456236
50% 0.577317 -0.215817 0.624608 -0.386879
75% 0.953127 -0.060802 1.205280 -0.098131
max 1.413471 1.501239 1.339359 1.536548
df.sort_index(axis=1, ascending=False)
D C B A
0 -0.456468 -0.284952 1.501239 0.968762
1 1.536548 0.407559 -0.309746 1.413471
2 -0.318217 1.339359 -0.040439 -0.399065
3 -1.493958 0.841658 -0.121888 -0.152205
4 -0.455541 1.326487 -0.676985 0.248414
5 -0.024769 -0.201354 -2.158694 0.906221
df.sort_values(by='B')
A B C D
5 0.906221 -2.158694 -0.201354 -0.024769
4 0.248414 -0.676985 1.326487 -0.455541
1 1.413471 -0.309746 0.407559 1.536548
3 -0.152205 -0.121888 0.841658 -1.493958
2 -0.399065 -0.040439 1.339359 -0.318217
0 0.968762 1.501239 -0.284952 -0.456468
df[3:5]
A B C D
3 -0.152205 -0.121888 0.841658 -1.493958
4 0.248414 -0.676985 1.326487 -0.455541
df[['A', 'B', 'D']]
A B D
0 0.968762 1.501239 -0.456468
1 1.413471 -0.309746 1.536548
2 -0.399065 -0.040439 -0.318217
3 -0.152205 -0.121888 -1.493958
4 0.248414 -0.676985 -0.455541
5 0.906221 -2.158694 -0.024769
df.loc[3, 'A']
-0.15220488957687467
df.iloc[3, 0]
-0.15220488957687467
df.iloc[2:5, 0:2]
A B
2 -0.399065 -0.040439
3 -0.152205 -0.121888
4 0.248414 -0.676985
df[df.C > 0]
A B C D
1 1.413471 -0.309746 0.407559 1.536548
2 -0.399065 -0.040439 1.339359 -0.318217
3 -0.152205 -0.121888 0.841658 -1.493958
4 0.248414 -0.676985 1.326487 -0.455541
df["TAG"] = ["cat", "dog", "cat", "cat", "cat", "dog"]
df
A B C D TAG
0 0.968762 1.501239 -0.284952 -0.456468 cat
1 1.413471 -0.309746 0.407559 1.536548 dog
2 -0.399065 -0.040439 1.339359 -0.318217 cat
3 -0.152205 -0.121888 0.841658 -1.493958 cat
4 0.248414 -0.676985 1.326487 -0.455541 cat
5 0.906221 -2.158694 -0.201354 -0.024769 dog
df.groupby('TAG').sum()
A B C D
TAG
cat 0.665906 0.661926 3.222551 -2.724184
dog 2.319691 -2.468440 0.206205 1.511778