Python数据分析基础工具 01 numpy 基本使用

1）ndarray

>>> import numpy as np
>>>
>>> list=[[1,2,3],[4,5,6]]
# 将list转换成numpy.array形式
>>> nlist=np.array(list)
# 指定元素类型为浮点型（float）
>>> nlistf=np.array(list,dtype=np.float)
# nlist的行数和列数
>>> nlist.shape
(2, 3)
# nlist的维数，2维
>>> nlist.ndim
2
# nlist中元素的类型
>>> nlist.dtype
dtype('int64')
# nlistf中元素的类型
>>> nlistf.dtype
dtype('float64')
# nlist中每个元素的大小，每个元素为8byte，64bit
>>> nlist.itemsize
8
# nlist中的元素个数
>>> nlist.size
6

2）常用Array

# 生成指定行数与列数的零矩阵
>>> np.zeros([2,4])
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

# 生成全1矩阵
>>> np.ones([3,5])
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

# [0,1)之间的随机数，2行4列
>>> np.random.rand(2,4) 
array([[0.08050753, 0.97414903, 0.80820358, 0.14523922],
       [0.01359425, 0.02753265, 0.60444699, 0.98612778]])

# [1,10)之间的3个随机的整数
>>> np.random.randint(1,10,3) 
array([9, 6, 7])

# 标准正态分布的随机数,2行4列
>>> np.random.randn(2,4)  
array([[ 1.71601963, -0.16823096, -0.04227458, -0.97308526],
       [ 0.85647333, -2.52831235,  0.8150042 , -0.34464529]])

# 指定范围的随机数
>>> np.random.choice([10,20,30]) 
20

3）常用操作

# 生成[1,11)的等差数列
>>> np.arange(1,11)
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

# 步长为2的数列
>>> np.arange(1,11,2)
array([1, 3, 5, 7, 9])

# 生成矩阵，-1代表默认
>>> lst=np.arange(1,11).reshape([2,-1])
>>> lst
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

# e^lst
>>> np.exp(lst)
array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
        1.48413159e+02],
       [4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
        2.20264658e+04]])

# 2^lst
>>> np.exp2(lst)
array([[   2.,    4.,    8.,   16.,   32.],
       [  64.,  128.,  256.,  512., 1024.]])

# 开根号，平方根
>>> np.sqrt(lst)
array([[1.        , 1.41421356, 1.73205081, 2.        , 2.23606798],
       [2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766]])

# 三角函数
>>> np.sin(lst)
array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111]])

# 求ln()
>>> np.log(lst)
array([[0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791],
       [1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509]])

# 对list所有元素求和
>>> lst.sum()
55

# 对list每列求和
>>> lst.sum(axis=0)
array([ 7,  9, 11, 13, 15])

# 对list每行求和
>>> lst.sum(axis=1)
array([15, 40])
>>>

# 求list中最大的元素
>>> lst.max()
10

# list中每列上最大的元素
>>> lst.max(axis=0)
array([ 6,  7,  8,  9, 10])

# list中每行上最小的元素
>>> lst.min(axis=1)
array([1, 6])


>>> lst1=np.array([10,20,30,40])
>>> lst1
array([10, 20, 30, 40])

>>> lst2=np.array([1,2,3,4])
>>> lst2
array([1, 2, 3, 4])

>>> lst1+lst2
array([11, 22, 33, 44])

>>> lst1-lst2
array([ 9, 18, 27, 36])

>>> lst1*lst2
array([ 10,  40,  90, 160])

>>> lst1/lst2
array([10., 10., 10., 10.])

>>> lst1**lst2
array([     10,     400,   27000, 2560000])


# 矩阵乘法
>>> np.dot(lst1.reshape([2,2]),lst2.reshape([2,2])) 
array([[ 70, 100],
       [150, 220]])

# 拼接两个list，拼接成一行
>>> np.concatenate((lst1,lst2),axis=0 )
array([10, 20, 30, 40,  1,  2,  3,  4])

# 竖直拼接
>>> np.vstack((lst1,lst2))
array([[10, 20, 30, 40],
       [ 1,  2,  3,  4]])

# 水平拼接
>>> np.hstack((lst1,lst2))
array([10, 20, 30, 40,  1,  2,  3,  4])

# 拆分一个list，变成2个
>>> np.split(lst1,2) 
[array([10, 20]), array([30, 40])]

# 复制一个list
>>> lst1c=np.copy(lst1) # 复制一个list
>>> lst1c
array([10, 20, 30, 40])

4）线性方程组和矩阵运算

>>> from numpy.linalg import *
# 单位矩阵
>>> np.eye(3) 
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
>>> lst=np.array([[1.,2.],[3.,4.]])
>>> lst
array([[1., 2.],
       [3., 4.]])

# 矩阵的逆
>>> inv(lst) 
array([[-2. ,  1. ],
       [ 1.5, -0.5]])

# 矩阵的转置
>>> lst.transpose() 
array([[1., 3.],
       [2., 4.]])

# 矩阵的行列式
>>> det(lst) 
-2.0000000000000004

# 特征值和特征向量，第一个array是特征值，第二个是特征向量
>>> eig(lst) 
(array([-0.37228132,  5.37228132]), array([[-0.82456484, -0.41597356],
       [ 0.56576746, -0.90937671]]))

# 解线性方程组
>>> y=np.array([[5.],[7.]])
>>> solve(lst,y)
array([[-3.],
       [ 4.]])

5）其他应用

# fft快速傅里叶变换
>>> np.fft.fft(np.array([1,1,1,1,1])) 
array([5.+0.j, 0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j])

# 相关系数
>>> np.corrcoef([1,0,1],[0,2,1]) 
array([[ 1.       , -0.8660254],
       [-0.8660254,  1.       ]])

# 生成一元多次函数
>>> np.poly1d([2,1,3]) 
poly1d([2, 1, 3])
>>> print(np.poly1d([2,1,3]))
   2
2 x + 1 x + 3

Python数据分析基础工具 01 numpy 基本使用

猜你喜欢