机器学习番外篇 06 numpy之聚合操作

聚合操作

import numpy as np

ll=np.random.random(100)

ll

>>>array([0.70549308, 0.13813122, 0.68676378, 0.15042523, 0.83681194,
       0.87218509, 0.32134463, 0.41481414, 0.1859884 , 0.96850687,
       0.31416608, 0.21008812, 0.99428592, 0.46115633, 0.72684796,
       0.90903028, 0.55136231, 0.71917139, 0.85957803, 0.4053327 ,
       0.0562395 , 0.4537071 , 0.55295704, 0.58172866, 0.79453829,
       0.99137889, 0.53550284, 0.5597568 , 0.98417863, 0.87541461,
       0.21142854, 0.46313995, 0.96038217, 0.67166078, 0.87117283,
       0.30316081, 0.21839507, 0.94697902, 0.32928672, 0.08789202,
       0.86067533, 0.65499007, 0.13422996, 0.06142825, 0.13129524,
       0.75900217, 0.96175142, 0.18348081, 0.958543  , 0.56115103,
       0.79457103, 0.01238344, 0.07013364, 0.52053143, 0.16673591,
       0.03879356, 0.14954314, 0.5416357 , 0.6304953 , 0.99728983,
       0.56702189, 0.37792629, 0.92756374, 0.88349488, 0.89474025,
       0.14834338, 0.23984749, 0.65604293, 0.74447131, 0.45614988,
       0.56456757, 0.35956011, 0.53239017, 0.85920012, 0.83132901,
       0.95266354, 0.53890193, 0.88001859, 0.30787305, 0.48542745,
       0.0968102 , 0.29474133, 0.54043127, 0.6773138 , 0.89778729,
       0.37065375, 0.04918902, 0.54808586, 0.16099947, 0.74635076,
       0.05428566, 0.66934161, 0.59624292, 0.2375817 , 0.50423199,
       0.71066706, 0.70948914, 0.71501401, 0.81700958, 0.11208359])

sum(ll)

>>>53.78488763487191

np.sum(ll)

>>>53.78488763487189

np.min(ll)

>>>0.012383438681552783

np.max(ll)

>>>0.9972898306008766

mm=np.arange(16).reshape(4,-1)

mm

>>>array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

np.sum(mm,axis=0)

>>>array([24, 28, 32, 36])

np.sum(mm,axis=1)

>>>array([ 6, 22, 38, 54])

# 所有元素乘积

np.prod(mm)

>>>0

np.prod(mm+1)

>>>20922789888000

# 求平均值，中位数
np.mean(mm)

>>>7.5

np.median(mm)

>>>7.5

# 百分位点
for percent in [0,25,50,75,100]:
    print(np.percentile(ll,q=percent))

>>>0.012383438681552783
0.28101787029097997
0.5521596797739299
0.8001806692562233
0.9972898306008766

# 方差
np.var(mm)

>>>21.25

# 标准差
np.std(mm)

>>>4.6097722286464435

索引

ll

>>>array([0.70549308, 0.13813122, 0.68676378, 0.15042523, 0.83681194,
       0.87218509, 0.32134463, 0.41481414, 0.1859884 , 0.96850687,
       0.31416608, 0.21008812, 0.99428592, 0.46115633, 0.72684796,
       0.90903028, 0.55136231, 0.71917139, 0.85957803, 0.4053327 ,
       0.0562395 , 0.4537071 , 0.55295704, 0.58172866, 0.79453829,
       0.99137889, 0.53550284, 0.5597568 , 0.98417863, 0.87541461,
       0.21142854, 0.46313995, 0.96038217, 0.67166078, 0.87117283,
       0.30316081, 0.21839507, 0.94697902, 0.32928672, 0.08789202,
       0.86067533, 0.65499007, 0.13422996, 0.06142825, 0.13129524,
       0.75900217, 0.96175142, 0.18348081, 0.958543  , 0.56115103,
       0.79457103, 0.01238344, 0.07013364, 0.52053143, 0.16673591,
       0.03879356, 0.14954314, 0.5416357 , 0.6304953 , 0.99728983,
       0.56702189, 0.37792629, 0.92756374, 0.88349488, 0.89474025,
       0.14834338, 0.23984749, 0.65604293, 0.74447131, 0.45614988,
       0.56456757, 0.35956011, 0.53239017, 0.85920012, 0.83132901,
       0.95266354, 0.53890193, 0.88001859, 0.30787305, 0.48542745,
       0.0968102 , 0.29474133, 0.54043127, 0.6773138 , 0.89778729,
       0.37065375, 0.04918902, 0.54808586, 0.16099947, 0.74635076,
       0.05428566, 0.66934161, 0.59624292, 0.2375817 , 0.50423199,
       0.71066706, 0.70948914, 0.71501401, 0.81700958, 0.11208359])

np.min(ll)

>>>0.012383438681552783

np.argmin(ll) # 返回的索引值

>>>51

ll[51]

>>>0.012383438681552783

np.argmax(ll) # 返回的索引值

>>>59

排序和使用索引

x=np.arange(16)

>>>array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

np.random.shuffle(x)

>>>array([ 7,  0, 12,  3,  5,  8,  6,  4,  2,  1, 11, 15,  9, 13, 10, 14])

np.sort(x) # x本身不会改变

>>>array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

>>>array([ 7,  0, 12,  3,  5,  8,  6,  4,  2,  1, 11, 15,  9, 13, 10, 14])

x.sort() # x会改变

>>>array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

xx=np.random.randint(10,size=(4,4))
xx

>>>array([[9, 2, 6, 1],
       [7, 1, 1, 1],
       [2, 4, 3, 7],
       [0, 5, 7, 9]])

np.sort(xx)

>>>array([[1, 2, 6, 9],
       [1, 1, 1, 7],
       [2, 3, 4, 7],
       [0, 5, 7, 9]])

np.sort(xx,axis=1)

>>>array([[1, 2, 6, 9],
       [1, 1, 1, 7],
       [2, 3, 4, 7],
       [0, 5, 7, 9]])

np.sort(xx,axis=0)

>>>array([[0, 1, 1, 1],
       [2, 2, 3, 1],
       [7, 4, 6, 7],
       [9, 5, 7, 9]])

>>>array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

np.random.shuffle(x)

>>>array([14, 13,  3, 12,  9,  0, 10,  1,  8,  6, 11, 15,  5,  2,  4,  7])

np.argsort(x) # 返回的是在原数组的索引值

>>>array([ 5,  7, 13,  2, 14, 12,  9, 15,  8,  4,  6, 10,  3,  1,  0, 11])

np.partition(x,4) # 比4小的在左边，比4大的在右边

>>>array([ 1,  0,  2,  3,  4,  5,  6,  7,  8, 13, 11, 15, 10,  9, 12, 14])

机器学习 番外篇 06 numpy之聚合操作

聚合操作

索引

排序和使用索引

猜你喜欢

机器学习番外篇 06 numpy之聚合操作