数据分析(numpy)

导入numpy并查看版本


import numpy as np # 后面遇到的np就代表numpy
np.__version__
'1.13.1'
什么是numpy？ Numpy即Numeric Python，python经过扩展可以支持数组和矩阵类型，并且具有大量的函数可以计算这些数组和矩阵。这些数组一般是多维的，而这个扩展的程序包就是numpy。 【注】是数据分析和机器学习中最基本的工具，后面许多API和工具都是建立在他得基础上，如：pandas、scipy、matplotlib等

一、创建ndarray
numpy中最基础数据结构就是ndarray：即数组

1. 使用np.array()由python list创建

#创建一个列表
data = [1,2,3,4,5,6,7,8,9,0,10,False]

nd1 = np.array(data)
print(nd1)
[ 1  2  3  4  5  6  7  8  9  0 10  0]

print(type(nd1))
<class 'numpy.ndarray'>

data = [1,2,3,5,'hi']

nd2 = np.array(data)
print(nd2)
['1' '2' '3' '5' 'hi']
【注意】1、numpy默认的左右元素类型都一样，2、如果列表中的元素类型不一样会按照优先级转化成一样的 优先级：str>float>int>boolean

【注意】图片是可以转化成数组的（图片在numpy里面可以用一个三维数组来表示）


import matplotlib.pyplot as plt # 用这个库引入图片



#引入一张图片
m = plt.imread("./source/girl.jpg")

print(m)

type(m)
numpy.ndarray

m.dtype
dtype('uint8')

plt.imshow(m)
<matplotlib.image.AxesImage at 0x27f5573d898>

plt.show()

2. 使用np的routines函数创建
1)np.ones(shape,dtype=None,order='C')

shape参数代表数据形状，是个元组,如shape=5代表创建一个五个元素的一维数组，shape=(3,4)代表创建一个3x4的数组


ones = np.ones((4,5,6),dtype='float')
ones

# 创建一张图片
boy = np.ones((666,666,3),dtype='uint8')
boy

plt.imshow(boy)
plt.show()


boy[::,::,:1] = 0
boy

plt.imshow(boy)
plt.show()
2）np.zeros(shape,dtype="float",order="C")


zeros = np.zeros((345,678,3))
zeros
3）np.full(shape,fill_value,dtype=None)


nd = np.full(12,fill_value=121,dtype='float')
nd
array([ 121.,  121.,  121.,  121.,  121.,  121.,  121.,  121.,  121.,
        121.,  121.,  121.])
4）np.eye(N,M,k=0,dtype='float')


nd = np.eye(6) # 生成一个单位矩阵
nd   # 一个矩阵行数和列数相等，称该矩阵为方阵，
#如果一个方阵对角线上全为1，其他地方全为0，称这个矩阵为单位矩阵，
#所有的矩阵都可以由单位矩阵生成
array([[ 1.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  1.]])
5）np.linspace(start,stop,num=50)


nd = np.linspace(10,100,num=11) # 平均切分把一个区间按照目标个数进行切分
nd  
# 在这里就是把[0,100]平均切分出11个数，即把这个区间划分10段
array([  10.,   19.,   28.,   37.,   46.,   55.,   64.,   73.,   82.,
         91.,  100.])

nd = np.logspace(0,40,11) # 了解
nd
6）np.arange([start,]stop,[step,]dtype=None) "[]"中是可选项


np.arange(20,30,2)
array([20, 22, 24, 26, 28])
7）np.random.randint(low,high=None,size=None,dtype='I')


a = np.random.randint(10,100,100)
a
array([94, 57, 41, 21, 31, 34, 90, 14, 15, 70, 53, 32, 25, 94, 74, 95, 28,
       63, 31, 96, 21, 79, 23, 97, 19, 72, 96, 55, 12, 40, 77, 23, 25, 77,
       24, 45, 80, 99, 98, 59, 54, 48, 26, 31, 86, 96, 41, 34, 65, 97, 38,
       19, 19, 84, 98, 94, 39, 97, 74, 69, 80, 56, 47, 63, 83, 39, 77, 90,
       49, 19, 41, 76, 56, 47, 21, 84, 90, 10, 22, 45, 63, 61, 51, 75, 85,
       72, 58, 77, 88, 40, 40, 62, 47, 48, 42, 59, 17, 96, 16, 44])

a.max()
99

a.min()
10

b = np.random.randint(0,100,size=(5,6))
b
array([[64, 58, 42, 83,  4, 56],
       [16, 52, 63, 86, 70, 13],
       [42, 50,  3, 39, 68, 45],
       [20, 56, 70, 30, 64, 58],
       [34, 32,  2, 76, 87, 66]])

# 生成一个boy的相片
boy = np.random.randint(0,255,size=(480,780,3),dtype="uint8")
boy

plt.imshow(boy)
plt.show()

8）np.random.randn(d0,d1,...,dn) 从第一维度到第n维度生成一个数组，数组中的数字符合标准正态分布


np.random.randn(2,3,4,5) 
9）np.random.normal(loc=0.0,scale=1.0,size=None)


np.random.normal(loc=175,scale=20,size=1000)
10）np.random.random(size=None)


np.random.random(100) # 随机生成一个小数（0-1之间的小数）

im = np.random.random(size=(100,100,3))
plt.imshow(im)
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-7eb1c693d4d4> in <module>()
----> 1 im = np.random.random(size=(100,100,3))
      2 plt.imshow(im)
      3 plt.show()

NameError: name 'np' is not defined

二、ndarray的属性
数组的常用属性：

维度 ndim， 大小 size， 形状 shape， 元素类型 dtype， 每项大小 itemsize， 数据 data


import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
# 使用绘图的魔法指令，将show嵌入到代码中，魔法指令后面不能加注释

girl = plt.imread("./source/meinv.jpg")
girl

tigger = plt.imread("./source/tigger.jpg")
tigger

girl.ndim
3

girl.data
<memory at 0x0000027891E5EE58>

tigger.data
<memory at 0x0000027895A1E048>

girl.size
2066400

tigger.size
2829600

girl.shape
(984, 700, 3)

tigger.shape
(786, 1200, 3)

tigger.dtype
dtype('uint8')

girl.itemsize
1

nd = np.random.random(10)
nd
array([ 0.47649406,  0.4357848 ,  0.2501734 ,  0.69621024,  0.51931843,
        0.67417236,  0.47050849,  0.95578777,  0.85247871,  0.97373996])

nd.itemsize
8

nd = np.array(['1234',1341,10.6889])
nd
array(['1234', '1341', '10.6889'],
      dtype='<U7')

nd.itemsize
28

plt.imshow(girl)
三、ndarray的基本操作
1、索引

#一维数组的索引和列表基本一致
l = [1,2,3,4,5,6,7,8,9]
l[7]

8

nd = np.random.randint(0,100,size=10)
nd
array([52,  5, 53, 87, 47, 95, 37, 37, 81, 32])

nd[4]
47

nd = np.random.randint(0,100,size=(20,8))
nd

nd[1,6] # 访问多维数组的某个元素
29

nd[1]
array([58,  9, 77, 51, 30, 48, 29, 42])
修改数据


nd[2,7] = 88
nd

nd[1] = 88
nd

nd[1] = np.random.randint(0,10,size=8)
nd


2、切片

nd = np.random.randint(0,200,size=(10,6))
nd
array([[148, 159,  64, 118,  95,  67],
       [141, 136, 177, 161, 108, 193],
       [171,   4, 162, 116,  56,  57],
       [102, 145, 133,  29, 124,  16],
       [185, 164, 102, 159, 117,  74],
       [155,  82,  58,  93, 139, 139],
       [ 17, 186,   2,  27,  30,  12],
       [ 79, 154, 152, 109, 112, 167],
       [193,  87,  40,  38,  36, 184],
       [114, 180, 120,   4,  99, 195]])

nd[3:]  
array([[102, 145, 133,  29, 124,  16],
       [185, 164, 102, 159, 117,  74],
       [155,  82,  58,  93, 139, 139],
       [ 17, 186,   2,  27,  30,  12],
       [ 79, 154, 152, 109, 112, 167],
       [193,  87,  40,  38,  36, 184],
       [114, 180, 120,   4,  99, 195]])

nd[:-7]
array([[148, 159,  64, 118,  95,  67],
       [141, 136, 177, 161, 108, 193],
       [171,   4, 162, 116,  56,  57]])

nd[:2,1:3]
array([[159,  64],
       [136, 177]])
切片赋值


nd[1:4,4:7] = np.random.randint(0,10,size=(3,1))
nd 
# ndarray的广播机制，如果赋值的时候数据不够，numpy会自行复制
array([[148, 159,  64, 118,  95,  67],
       [141, 136, 177, 161,   6,   6],
       [171,   4, 162, 116,   1,   1],
       [102, 145, 133,  29,   5,   5],
       [185, 164, 102, 159, 117,  74],
       [155,  82,  58,  93, 139, 139],
       [ 17, 186,   2,  27,  30,  12],
       [ 79, 154, 152, 109, 112, 167],
       [193,  87,  40,  38,  36, 184],
       [114, 180, 120,   4,  99, 195]])
数据的翻转 数据翻转的实质就是，把步长变成负的；在生成一个新的数组时，根据步长从后开始往前数


nd1 = np.random.randint(0,100,size=10)
nd1
array([13, 96, 63, 90,  0, 66, 13, 16, 61, 94])

nd1[::-1]
array([94, 61, 16, 13, 66,  0, 90, 63, 96, 13])

nd1[::-2]
array([94, 16, 66, 90, 96])

nd = np.random.randint(0,100,size=(4,4))
nd
array([[18, 33, 47, 50],
       [ 5, 62, 81, 66],
       [58, 17, 43, 23],
       [97, 79, 93, 45]])

nd[::-1]
array([[97, 79, 93, 45],
       [58, 17, 43, 23],
       [ 5, 62, 81, 66],
       [18, 33, 47, 50]])

nd[::1,::-1]
array([[50, 47, 33, 18],
       [66, 81, 62,  5],
       [23, 43, 17, 58],
       [45, 93, 79, 97]])

nd[::-1,::-1]
array([[45, 93, 79, 97],
       [23, 43, 17, 58],
       [66, 81, 62,  5],
       [50, 47, 33, 18]])

girl1 = girl[::3,::-3,::-1]

plt.imshow(girl1)
<matplotlib.image.AxesImage at 0x278960e4908>





3、变形

plt.imshow(tigger)
1) 改变数据成分


tigger_f = tigger / 255.0
tigger_f

plt.imshow(tigger_f)
<matplotlib.image.AxesImage at 0x27896691c50>


girl2 = plt.imread("./source/girl2.jpg")
girl2.shape
(300, 300, 3)

tigger[300:600,200:500] = girl2

plt.imshow(tigger)
<matplotlib.image.AxesImage at 0x278966faa58>

2）改变数据的形状


nd = np.random.randint(0,100,size=(6,6))
nd

nd.ravel() # 将一个数组降维（即多维变成一维）

nd.resize(1,1,2,3,6)
nd

nd.shape
(1, 1, 2, 3, 6)

nd.reshape(6,6)
array([[37, 93, 94, 94, 91, 98],
       [63,  9,  8, 74, 20, 51],
       [ 4, 21, 43, 65,  5, 30],
       [64, 89, 29, 15, 39, 14],
       [56, 67, 11, 87, 32,  1],
       [11, 38, 51, 95, 84, 45]])

nd
array([[[[[37, 93, 94, 94, 91, 98],
          [63,  9,  8, 74, 20, 51],
          [ 4, 21, 43, 65,  5, 30]],

         [[64, 89, 29, 15, 39, 14],
          [56, 67, 11, 87, 32,  1],
          [11, 38, 51, 95, 84, 45]]]]])
【注意】在改变数据的形状时有resize和reshape两个方法

resize方法：是将原来的数组重新形状的设置
reshape方法：是在原来数组的基础上，按照新的形状生成一个新的数组，原来的数组本身不变








4、级联

nd1 = np.random.randint(0,100,size=(4,6))
nd2 = np.random.randint(0,100,size=(4,3))
print(nd1)
print(nd2)
[[81 53 10  5 51 78]
 [37 22 19 81 86 26]
 [ 1 64 98 23 87 50]
 [54 71 82 72 37 28]]
[[31 75 80]
 [83 95 58]
 [57 13  2]
 [26 74 36]]

np.concatenate([nd1,nd2],axis=1)
array([[81, 53, 10,  5, 51, 78, 31, 75, 80],
       [37, 22, 19, 81, 86, 26, 83, 95, 58],
       [ 1, 64, 98, 23, 87, 50, 57, 13,  2],
       [54, 71, 82, 72, 37, 28, 26, 74, 36]])

np.concatenate([nd1,nd2],axis=0) 
# 这样不能级联，两个数组的列数不一样，不能参与行级联

nd3 = np.random.randint(0,100,size=(6,6))
nd3
array([[32, 65, 93, 31, 58, 69],
       [33, 48, 86, 98, 28,  1],
       [ 7, 97, 34, 82, 48, 93],
       [41,  1, 30,  4,  7, 22],
       [33, 85, 88, 26, 93, 82],
       [15, 99, 93, 30, 26, 76]])

np.concatenate([nd1,nd3],axis=0)
array([[81, 53, 10,  5, 51, 78],
       [37, 22, 19, 81, 86, 26],
       [ 1, 64, 98, 23, 87, 50],
       [54, 71, 82, 72, 37, 28],
       [98, 80, 30, 79, 51, 27],
       [52, 24, 86,  4, 80, 96],
       [23, 37, 14, 38, 34,  7],
       [ 3, 52, 93, 38, 73, 83],
       [33, 86, 45, 63, 85, 79],
       [61, 89, 40, 48,  1, 34]])
axis代表参与级联的维度，在这里0代表行参与级联，1代表列参与级联 【注意】如果axis=0两个数组列数必须一致，如果axis=1两个数组的行数必须一样


np.hstack(nd3)
array([32, 65, 93, 31, 58, 69, 33, 48, 86, 98, 28,  1,  7, 97, 34, 82, 48,
       93, 41,  1, 30,  4,  7, 22, 33, 85, 88, 26, 93, 82, 15, 99, 93, 30,
       26, 76])

np.vstack(nd3)
array([[32, 65, 93, 31, 58, 69],
       [33, 48, 86, 98, 28,  1],
       [ 7, 97, 34, 82, 48, 93],
       [41,  1, 30,  4,  7, 22],
       [33, 85, 88, 26, 93, 82],
       [15, 99, 93, 30, 26, 76]])

nd4 = np.random.randint(0,100,size=(10,1))
nd4

nd5 = np.hstack(nd4)
nd5
array([45, 42, 18, 38, 27, 58, 23,  4, 31, 41])

np.vstack(nd5)
5、切分

nd = np.random.randint(20,50,size=(5,6))
nd
array([[42, 37, 42, 40, 47, 36],
       [42, 28, 35, 22, 49, 25],
       [40, 30, 41, 22, 20, 39],
       [29, 44, 33, 31, 39, 41],
       [36, 49, 40, 37, 31, 48]])

np.vsplit(nd,(2,5))

np.hsplit(nd,(1,3,4))
[array([[42],
        [42],
        [40],
        [29],
        [36]]), array([[37, 42],
        [28, 35],
        [30, 41],
        [44, 33],
        [49, 40]]), array([[40],
        [22],
        [22],
        [31],
        [37]]), array([[47, 36],
        [49, 25],
        [20, 39],
        [39, 41],
        [31, 48]])]

np.split(nd,[3])
[array([[42, 37, 42, 40, 47, 36],
        [42, 28, 35, 22, 49, 25],
        [40, 30, 41, 22, 20, 39]]), array([[29, 44, 33, 31, 39, 41],
        [36, 49, 40, 37, 31, 48]])]

np.split(nd,[3],axis=1)
[array([[42, 37, 42],
        [42, 28, 35],
        [40, 30, 41],
        [29, 44, 33],
        [36, 49, 40]]), array([[40, 47, 36],
        [22, 49, 25],
        [22, 20, 39],
        [31, 39, 41],
        [37, 31, 48]])]
vsplit(nd,(x,y))#代表对nd数组进行纵向切割（切割的是下标），切割点分别是x和y hsplit(nd,(x,y)) # 对nd数组进行横向切割（切割的是横向的下标），切割点是x和y



6、副本
所有的赋值运算不会为ndarray创建任何元素的副本。对赋值以后对象操作也就是对原来的对象操作,这种情况称为浅拷贝


m = np.random.randint(0,10,size=6)
m
array([5, 0, 2, 3, 8, 5])

nd = m
nd
array([5, 0, 2, 3, 8, 5])

nd[5] = 1000
nd
array([   5,    0,    2,    3,    8, 1000])

m
array([   5,    0,    2,    3,    8, 1000])
可以使用copy()创建副本


nd_cp = nd.copy()
nd_cp
array([   5,    0,    2,    3,    8, 1000])

nd_cp[5] = 12345
nd_cp
array([    5,     0,     2,     3,     8, 12345])

nd
array([   5,    0,    2,    3,    8, 1000])

l = [1,2,3,4,123,124]

nd_l = np.array(l)
nd_l
array([  1,   2,   3,   4, 123, 124])

nd_l[0] = 10
nd_l
array([ 10,   2,   3,   4, 123, 124])

l
[1, 2, 3, 4, 123, 124]
np.array()函数是对l进行拷贝以后放入了数组中

四、ndarray的聚合操作
1、求和

nd = np.random.randint(0,10,size=(6,6))
nd
array([[6, 1, 7, 1, 4, 7],
       [1, 4, 0, 1, 5, 3],
       [8, 4, 4, 2, 2, 9],
       [6, 4, 8, 2, 9, 0],
       [4, 5, 7, 5, 4, 8],
       [2, 8, 2, 1, 2, 3]])

nd.sum(axis=1)
array([26, 14, 29, 29, 33, 18])
2、最值

nd.max()
9

nd.min()
0

nd.argmax(axis=0)  # 出现的第一个最大值的下标（按照维度线性增加）
24

nd.argmin()
8


3、其他聚合操作

np.power(nd,1) # 给每一个元素求乘方

nd.mean()
51.456249999999997

nd.sum(axis=(0,1))
149


思考题：给定一个4维矩阵，如何得到最后两维的和？

nd = np.random.randint(0,9,size=(2,3,4,5))
nd
array([[[[6, 1, 2, 5, 1],
         [5, 2, 0, 2, 8],
         [6, 0, 6, 3, 6],
         [5, 2, 8, 0, 3]],

        [[4, 2, 6, 8, 1],
         [5, 4, 4, 5, 4],
         [7, 8, 1, 6, 4],
         [4, 0, 5, 2, 3]],

        [[0, 2, 1, 7, 8],
         [6, 5, 2, 8, 2],
         [7, 4, 6, 4, 3],
         [4, 1, 6, 5, 5]]],


       [[[8, 2, 1, 3, 5],
         [4, 2, 0, 6, 7],
         [0, 7, 2, 1, 6],
         [3, 2, 8, 2, 2]],

        [[4, 8, 3, 5, 6],
         [6, 5, 1, 0, 6],
         [1, 0, 8, 0, 0],
         [1, 8, 7, 0, 2]],

        [[5, 5, 7, 6, 1],
         [2, 3, 6, 3, 2],
         [5, 3, 2, 7, 3],
         [5, 3, 4, 0, 7]]]])

nd.sum(axis=(2,3))
array([[71, 83, 86],
       [71, 71, 79]])
思考题：如何根据第3列来对一个5*5矩阵排序？

nd = np.random.randint(0,100,size=(5,5))
nd
array([[80, 43, 90, 36, 24],
       [99, 87, 80,  1, 29],
       [99, 23, 89, 73, 84],
       [26, 99, 68, 44, 33],
       [14, 63, 31, 48,  6]])

nd.argmin(axis=1)
array([4, 3, 1, 0, 4], dtype=int64)

nd[::,3]
array([36,  1, 73, 44, 48])

np.sort(nd[::,3])
array([ 1, 36, 44, 48, 73])

sort_index = np.argsort(nd[::,3])
sort_index
array([1, 0, 3, 4, 2], dtype=int64)

nd[[0,1,3]]
array([[80, 43, 90, 36, 24],
       [99, 87, 80,  1, 29],
       [26, 99, 68, 44, 33]])

nd[[3,1,0]]
array([[26, 99, 68, 44, 33],
       [99, 87, 80,  1, 29],
       [80, 43, 90, 36, 24]])

nd[sort_index]
array([[99, 87, 80,  1, 29],
       [80, 43, 90, 36, 24],
       [26, 99, 68, 44, 33],
       [14, 63, 31, 48,  6],
       [99, 23, 89, 73, 84]])
思路

# 1、如何取到第三列？
nd[1] # 取行
nd[::,1] # 取列（行需要从头到尾全取）
nd[::,3]
array([36,  1, 73, 44, 48])

# 能不对第三列排序，在排序的时候不要数字要下标
nd.argmin(axis=1)
dt = np.sort(nd[::,3])
index = np.argsort(nd[::,3])
index
array([1, 0, 3, 4, 2], dtype=int64)

# 如何取多行
nd[[1,2,3]]
# 根据上面获取到的排序以后的列下标来取
nd[index]
array([[99, 87, 80,  1, 29],
       [80, 43, 90, 36, 24],
       [26, 99, 68, 44, 33],
       [14, 63, 31, 48,  6],
       [99, 23, 89, 73, 84]])
五、ndarray的矩阵操作
1. 基本矩阵操作
1）算术运算（即加减乘除）
即矩阵和数字之间的运算，把矩阵中算有的元素都和数字进行运算


nd = np.random.randint(0,10,size=(3,3))
nd
array([[5, 1, 7],
       [5, 2, 9],
       [4, 9, 7]])

nd + 3
array([[ 8,  4, 10],
       [ 8,  5, 12],
       [ 7, 12, 10]])

nd - 10
array([[-5, -9, -3],
       [-5, -8, -1],
       [-6, -1, -3]])

nd * 2
array([[10,  2, 14],
       [10,  4, 18],
       [ 8, 18, 14]])

nd / 2
array([[ 2.5,  0.5,  3.5],
       [ 2.5,  1. ,  4.5],
       [ 2. ,  4.5,  3.5]])


2）矩阵积

nd1 = np.random.randint(0,10,size=(2,3))
nd2 = np.random.randint(0,10,size=(3,5))
print(nd1)
print(nd2)
[[1 3 1]
 [6 5 0]]
[[9 2 6 7 9]
 [8 7 5 7 8]
 [1 5 3 4 1]]

# print(nd1*nd2)

np.dot(nd1,nd2)
array([[34, 28, 24, 32, 34],
       [94, 47, 61, 77, 94]])
我们那第一个矩阵的第一行和第二个矩阵每一列相乘然后乘的结果相加，分别放在第一行的每一列；然后拿第一个矩阵后面的行和第二个矩阵后面所有列相乘，依次放在第二行的每一列，以此类推 【注意】1、可以相乘的条件：第一个矩阵的列数要和第二个矩阵的行数相同 2、第一个矩阵的行数决定了最后结果的行数，第二个矩阵的列数决定了最终结果的列数





2. 广播机制
ndarray的广播机制的两条规则：

1、为缺失维度补1
2、假定缺失的元素用已有值填充

m = np.ones((2,3))
m
array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

a = np.arange(3)
a
array([0, 1, 2])

m+a
array([[ 1.,  2.,  3.],
       [ 1.,  2.,  3.]])

b = np.arange(3).reshape((3,1))
b
array([[0],
       [1],
       [2]])

c = np.arange(3)
c
array([0, 1, 2])

b+c
array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])
维度不够，前面的来凑

六、ndarray的排序
用以上所学的numpy的知识，对一个ndarray对象进行选择排序


nd = np.random.randint(0,20,size=10)
nd
array([ 7,  7,  3,  1,  2,  4, 17, 15, 17, 10])

def sort_sel(nd):
    for i in range(nd.size):
        for j in range(i,nd.size):
            if nd[i] > nd[j]:
                nd[i],nd[j] = nd[j],nd[i]
    return nd

sort_sel(nd)
array([ 1,  2,  3,  4,  7,  7, 10, 15, 17, 17])
#改进一层for循环


def sort_sel2(nd):
    for i in range(nd.size):
        #获取最小值的索引
        index_min = np.argmin(nd[i:]) + i
        # 每一次得到的最小值得索引都做一次和第i个元素的替换
        nd[i],nd[index_min] = nd[index_min],nd[i]
        
    return nd 
    

sort_sel2(nd)
array([ 1,  2,  3,  4,  7,  7, 10, 15, 17, 17])
1. 快速排序
np.sort()与ndarray.sort()都可以，但有区别：

np.sort()不改变输入
ndarray.sort()本地处理，不占用空间，但改变输入

nd = np.random.randint(0,100,10)
nd
array([ 3, 99,  5, 17, 17, 40, 58, 15, 56, 69])

np.sort(nd)
array([ 3,  5, 15, 17, 17, 40, 56, 58, 69, 99])

nd
array([ 3, 99,  5, 17, 17, 40, 58, 15, 56, 69])

nd.sort()

nd
array([ 3,  5, 15, 17, 17, 40, 56, 58, 69, 99])


2. 部分排序
np.partition(a,k)

有的时候我们不是对全部数据感兴趣，我们可能只对最小或最大的一部分感兴趣。

当k为正时，我们想要得到最小的k个数
当k为负时，我们想要得到最大的k个数

nd = np.random.randint(0,100,50)
nd
array([ 0, 33,  5, 46,  9, 83, 52, 70, 16, 20,  0, 50, 24, 33, 10, 14, 42,
       90, 36, 32, 51, 82, 64, 20, 98, 29,  5, 27, 24,  9, 33, 96, 29, 22,
       68, 51, 20, 90, 83, 56, 95, 94,  8, 91, 67, 77, 98, 84, 56, 87])

np.partition(nd,20)[:20]
array([ 0, 24,  5,  5,  9, 20, 20, 22, 16, 20,  0,  9, 24,  8, 10, 14, 27,
       29, 29, 32])
猜你喜欢