import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# Series排序
s1 = Series(np.random.randn(10))
s1
Out[5]:
0 -1.293472
1 0.017588
2 -0.654741
3 0.495720
4 -1.626396
5 -0.651238
6 0.776535
7 -0.746762
8 -1.358951
9 0.247930
dtype: float64
# 值排序
s2 = s1.sort_values()
s2
Out[10]:
4 -1.626396
8 -1.358951
0 -1.293472
7 -0.746762
2 -0.654741
5 -0.651238
1 0.017588
9 0.247930
3 0.495720
6 0.776535
dtype: float64
# 降序排序
s2 = s1.sort_values(ascending=False)
s2
Out[13]:
6 0.776535
3 0.495720
9 0.247930
1 0.017588
5 -0.651238
2 -0.654741
7 -0.746762
0 -1.293472
8 -1.358951
4 -1.626396
dtype: float64
# 对index进行排序,降序同样修改ascending为False就好
s2.sort_index()
Out[14]:
0 -1.293472
1 0.017588
2 -0.654741
3 0.495720
4 -1.626396
5 -0.651238
6 0.776535
7 -0.746762
8 -1.358951
9 0.247930
dtype: float64
# DataFrame排序
df1 = DataFrame(np.random.randn(40).reshape(8,5), columns=['A','B','C','D','E'])
df1
Out[17]:
A B C D E
0 -0.364749 -2.234539 0.560983 -0.205768 -0.685511
1 1.500545 0.669751 -0.810748 -1.499093 -0.369835
2 0.894716 -0.282788 0.293292 1.260618 -0.107138
3 -0.262395 1.970482 1.268629 -0.626314 -0.726878
4 -1.756154 0.471681 -0.204594 -0.978793 -2.082535
5 0.476344 0.588654 -0.303897 1.863167 -1.466623
6 -1.704993 -0.136662 -0.034966 0.159871 -0.848923
7 1.117809 0.548713 -1.713026 1.153380 -1.529988
# 某一列Series排序
df1['A'].sort_values()
Out[18]:
4 -1.756154
6 -1.704993
0 -0.364749
3 -0.262395
5 0.476344
2 0.894716
7 1.117809
1 1.500545
Name: A, dtype: float64
# DataFrame对某列进行排序
df1.sort_values('A')
Out[19]:
A B C D E
4 -1.756154 0.471681 -0.204594 -0.978793 -2.082535
6 -1.704993 -0.136662 -0.034966 0.159871 -0.848923
0 -0.364749 -2.234539 0.560983 -0.205768 -0.685511
3 -0.262395 1.970482 1.268629 -0.626314 -0.726878
5 0.476344 0.588654 -0.303897 1.863167 -1.466623
2 0.894716 -0.282788 0.293292 1.260618 -0.107138
7 1.117809 0.548713 -1.713026 1.153380 -1.529988
1 1.500545 0.669751 -0.810748 -1.499093 -0.369835
# 降序排序
df2 = df1.sort_values('A', ascending=False)
df2
Out[22]:
A B C D E
1 1.500545 0.669751 -0.810748 -1.499093 -0.369835
7 1.117809 0.548713 -1.713026 1.153380 -1.529988
2 0.894716 -0.282788 0.293292 1.260618 -0.107138
5 0.476344 0.588654 -0.303897 1.863167 -1.466623
3 -0.262395 1.970482 1.268629 -0.626314 -0.726878
0 -0.364749 -2.234539 0.560983 -0.205768 -0.685511
6 -1.704993 -0.136662 -0.034966 0.159871 -0.848923
4 -1.756154 0.471681 -0.204594 -0.978793 -2.082535
# 对index进行排序
df2.sort_index()
Out[23]:
A B C D E
0 -0.364749 -2.234539 0.560983 -0.205768 -0.685511
1 1.500545 0.669751 -0.810748 -1.499093 -0.369835
2 0.894716 -0.282788 0.293292 1.260618 -0.107138
3 -0.262395 1.970482 1.268629 -0.626314 -0.726878
4 -1.756154 0.471681 -0.204594 -0.978793 -2.082535
5 0.476344 0.588654 -0.303897 1.863167 -1.466623
6 -1.704993 -0.136662 -0.034966 0.159871 -0.848923
7 1.117809 0.548713 -1.713026 1.153380 -1.529988
# 一个简单的小例子,对movie_metadata.csv的imdb进行排序
f = open('movie_metadata.csv')
movie = pd.read_csv(f)
imdb = movie[["movie_title", "director_name","imdb_score"]].sort_values("imdb_score", ascending=False)
imdb.to_csv('imdb.csv')
Pandas玩转数据(二) -- Series和DataFrame排序
猜你喜欢
转载自blog.csdn.net/weixin_39778570/article/details/81106289
今日推荐
周排行