(Dos) numpy y padas

Uno, Numpy

1.1 Crear

# 类型转换方式创建
a = np.array([1,2,3])  # array([1, 2, 3])

b = np.array([[1,2,3],[4,5,6],[7,8,9]])
'''
 array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
'''
np.random.seed(666)   # 指定随机种子,增加随机生成时的可重复性

# ------arange方法
np.arange(6)  # [0 1 2 3 4 5]

np.arange(2,6,1)  # [2 3 4 5]

# ------randint方法
np.random.randint(0,10)  #  1

np.random.randint(1,3,8)  # array([2, 2, 1, 1, 2, 1, 2, 1])

np.random.randint(0,10,size=(3,5))
'''
array([[4, 9, 3, 2, 9],
       [3, 7, 5, 9, 7],
       [9, 5, 5, 0, 4]])
'''

# ------random方法
np.random.random()  # 0.7004371218578347

np.random.random((3,5))
'''
array([[0.84418664, 0.67651434, 0.72785806, 0.95145796, 0.0127032 ],
       [0.4135877 , 0.04881279, 0.09992856, 0.50806631, 0.20024754],
       [0.74415417, 0.192892  , 0.70084475, 0.29322811, 0.77447945]])
'''

np.random.normal()  # 均值为0,方差为1的浮点数

np.random.normal(0,1,(3,5))  # 均值为0方差为1,三行五列的随机矩阵
'''
array([[-0.57577075, -1.68290077,  0.22918525, -1.75662522,  0.84463262],
       [ 0.27721986,  0.85290153,  0.1945996 ,  1.31063772,  1.5438436 ],
       [-0.52904802, -0.6564723 , -0.2015057 , -0.70061583,  0.68713795]])
'''

# ------linspace方法
np.linspace(2,5,4) # 初始值为2,终值为5,元素个数为4个的等差数组 [2. 3. 4. 5.]

# ------zeros方法
np.zeros(5)  # [0. 0. 0. 0. 0.]

np.zeros(10, dtype=int)  # array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

np.zeros(shape=(3, 5),dtype=int )
'''
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])
'''

# ------ones方法
np.ones(10)   #  array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])


np.ones((3, 5 ))
'''
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])
'''

# ------full方法
np.full(shape=(3,5),fill_value=666)
'''
array([[666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666]])
'''

1.2 Propiedades

x = np.arange(10)
x    #  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

X = np.arange(15).reshape(3,5)
X
'''
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
'''

# ------ndim查询是几维数组-----
x.ndim  # 1
X.ndim  # 2

# ------shape查询元素形状,返回元组
x.shape  # (10,)
X.shape  # (3, 5)

# -----size查询元素个数------
x.size  # 10
X.size  # 15

 1.3 Acceso a datos

# ------索引访问------
x[0]  # 0

x[-1]  # 9

X[0][0]  # 0   不建议使用

X[(2,4)]  # 14   访问第2行第4个元素

X[2,4]  # 14   同上

# ------切片访问------
x[0:5]  # array([0, 1, 2, 3, 4])   从0开始到5,不包含5

x[:3]  # array([0, 1, 2])  从头开始到3的位置

x[:3]  # array([3, 4, 5, 6, 7, 8, 9])  从3的位置到最后

x[::3]  # array([0, 3, 6, 9])  从头到尾,步长为3

x[::-1]  # array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])  倒着访问

# ------切片访问------

X[:2,:3]  # 访问前两行的前三列
'''
array([[0, 1, 2],
       [5, 6, 7]])
'''

X[::-1,::-1]
'''
array([[14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])
'''

X[0]  #  array([0, 1, 2, 3, 4])   取一行
X[0,:]  # 同上

X[:,1]  # array([ 1,  6, 11])  取一列

1.4 Modificación

a = np.array([1,2,3,4,5])

np.append(a,7)   # array([1, 2, 3, 4, 5, 7])

np.insert(a,0,0)  # array([0, 1, 2, 3, 4, 5])

np.delete(a,3)  # array([1, 2, 3, 5])

a[0] = 666
a               # array([666,   2,   3,   4,   5])

a[2:4] = [999,888]
a               # array([666,   2, 999, 888,   5])
x.ndim  # 1  维度

x.reshape(10,-1)   # 改变成十行,列自动确定
'''
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])
'''

x1 = x.reshape(-1,10)  # array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]) 
x1.ndim  # 2  维度


a = np.array([[1,2,3],[4,5,6]])
'''
array([[1, 2, 3],
       [4, 5, 6]])
'''

a.reshape(3,2)
'''
array([[1, 2],
       [3, 4],
       [5, 6]])
'''

a.reshape(1,-1)  # array([[1, 2, 3, 4, 5, 6]])

a.reshape(-1)  # array([1, 2, 3, 4, 5, 6])
a = np.array([1,2,3,4,5],dtype=np.int64)

a.dtype  # dtype('int64')

a = a.astype(np.float32)

a.dtype   # dtype('float32')

1.5 Fusión

# ------一维------
x = np.array([1,2,3])
y = np.array([4,5,6])

np.concatenate([x,y])   # array([1, 2, 3, 4, 5, 6])

# ------二维------
a = np.array([[1,2,3],[4,5,6]])
'''
array([[1, 2, 3],
       [4, 5, 6]])
'''

np.concatenate([a,a])
'''
array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])
'''

np.concatenate([a,a],axis=1)
'''
array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])
'''

# ------一维、二维拼接------
b = np.array([666,888,999])
np.concatenate([a, b.reshape(1,-1)])
'''
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 888, 999]])
'''

np.vstack([a,b])
'''
array([[  1,   2,   3],
       [  4,   5,   6],
       [666, 888, 999]])
'''

np.hstack([a,a])
'''
array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])
'''

1.6 dividir

# ------一维
a = np.arange(10)

np.split(a,[3,8])   #  [array([0, 1, 2]), array([3, 4, 5, 6, 7]), array([8, 9])]

np.split(a,[6])  #  [array([0, 1, 2, 3, 4, 5]), array([6, 7, 8, 9])]

# ------二维
a = np.arange(20)
b = a.reshape((4,5))

# ---行分割
np.split(b,[2])
'''
[array([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]]),
 array([[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]])]
'''

# ---列分割
np.split(b,[2],axis=1)
'''
[array([[ 0,  1],
        [ 5,  6],
        [10, 11],
        [15, 16]]),
 array([[ 2,  3,  4],
        [ 7,  8,  9],
        [12, 13, 14],
        [17, 18, 19]])]
'''

np.vsplit(b,[3])
'''
[array([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]]),
 array([[15, 16, 17, 18, 19]])]
'''

np.hsplit(b,[2])
'''
[array([[ 0,  1],
        [ 5,  6],
        [10, 11],
        [15, 16]]),
 array([[ 2,  3,  4],
        [ 7,  8,  9],
        [12, 13, 14],
        [17, 18, 19]])]
'''

# ------示例------
data = np.arange(16).reshape(4,4)
'''
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
'''

X, y = np.hsplit(data, [-1])
'''
array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])

array([[ 3],
       [ 7],
       [11],
       [15]])
'''

y[:,0]  # 矩阵转换为向量  array([ 3,  7, 11, 15])

1.7 Operaciones con matrices 

L =np.arange(10)  #  array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

2 * L  #  array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

b = np.arange(1,16).reshape(3,5)
'''
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
'''

b + 8
'''
array([[ 9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23]])
'''

b * 3
'''
array([[ 3,  6,  9, 12, 15],
       [18, 21, 24, 27, 30],
       [33, 36, 39, 42, 45]])
'''

b**3
'''
array([[   1,    8,   27,   64,  125],
       [ 216,  343,  512,  729, 1000],
       [1331, 1728, 2197, 2744, 3375]], dtype=int32)
'''

np.abs(b)  # 绝对值

np.sin(b)  # 正弦函数
a = np.arange(4).reshape(2,2)
'''
array([[0, 1],
       [2, 3]])
'''

b = np.full((2,2),10)
'''
array([[10, 10],
       [10, 10]])
'''

a + b
'''
array([[10, 11],
       [12, 13]])
'''

a * b
'''
array([[ 0, 10],
       [20, 30]])
'''

# 备注:矩阵运算要保证维度相同
# ------矩阵的乘法:第一行乘第一列相加,第一行乘以第二列相加。。。------
a.dot(b)
'''
array([[10, 10],
       [50, 50]])
'''

# ------矩阵的转置------
a.T
'''
array([[0, 2],
       [1, 3]])
'''

1.8 Operaciones sobre matrices y vectores

a
'''
array([[0, 1],
       [2, 3]])
'''

b = np.array([1,2])
'''
array([1, 2])
'''

a + b   # b和a的每一行的相应元素相加
'''
array([[1, 3],
       [3, 5]])
'''

# ------将b堆叠,行堆叠两次,列不变
np.tile(b,(2,1))  
'''
array([[1, 2],
       [1, 2]])
'''

# ------将b堆叠,乘以a的行数
np.vstack([b]* a.shape[0])  # 同上

# ------b乘以第一列相应元素加和,b乘以第二列相应元素加和
b.dot(a)  # array([4, 7])

# ------a第一行乘以b相应元素加和,a第二行乘以b相应元素加和
a.dot(b)  # array([2, 8])

1.9 Operaciones agregadas 

# ------一维------
a  = np.random.random(100)

sum(a)  # 效率低

np.sum(a)  # 效率更高

np.min(a)

np.max(a)

# ------矩阵-----
data = np.arange(16).reshape(4,4)
np.sum(data)  # 120

np.sum(data,axis=0)   # array([24, 28, 32, 36])   每一列的和

np.sum(data,axis=1)  # array([ 6, 22, 38, 54])  每一行的和

np.prod(data)  # 0  所有数据相乘

np.mean(data)  # 7.5

np.var(data)

np.std(data)

Dos, pandas

2.1 Objeto de serie

2.1.1 Crear

# 创建
a = pd.Series([1,2,3],index=['a','b','c'])
a  
'''
a    1
b    2
c    3
dtype: int64
'''

b = pd.Series([1,2,3])
b
'''
0    1
1    2
2    3
dtype: int64
'''

c = pd.Series({"a":1,"b":2,"c":3})
c
'''
a    1
b    2
c    3
dtype: int64
'''

2.1.2 Consulta 

print(a['a'])   # 1

print(a[2])  # 3

print(a.values)  # Series由两个数组组成,数值和索引可作属性访问    [1 2 3]

print(a.index)  # Index(['a', 'b', 'c'], dtype='object')

print(a[['a','b']])  # 通过索引查
'''
a    1
b    2
dtype: int64

'''
print(a[[1,2]])   # 通过下标查
'''
b    2
c    3
dtype: int64
'''

print(a[:1])  # 切片查询
'''
a    1
dtype: int64
'''

2.1.3 Cruce de elementos

for index , value in  a.iteritems():
    print(index,value)  # index为索引,value为数据值

'''
a 1
b 2
c 3
'''

2.1.4 Agregar

print(a.append(c)
'''
a    1
b    2
c    3
a    1
b    2
c    3
dtype: int64
'''

2.1.5 Eliminar

print(a.drop('a'))
'''
b    2
c    3
dtype: int64
'''

2.2 Marco de datos

2.2.1 Crear

dic = {"a":[1,3],"b":[2,4]}  # a,b为列名
print(pd.DataFrame(dic,index=['item1','itme2']))  #index指定行索引
'''
       a  b
item1  1  2
itme2  3  4
'''


arr = [{"a":1,"b":2},{"a":3,"b":4}]  # 每一个字典为一条记录
print(pd.DataFrame(arr))
'''
   a  b
0  1  2
1  3  4
'''


arr = [[1,2],[3,4]]
print(pd.DataFrame(arr,columns=['a','b']))  # columns指定列名
'''
   a  b
0  1  2
1  3  4
'''

2.2.2 Agregar

df = pd.DataFrame([[1,2],[11,12]],columns=['a','b'])
print(df.append({'a':21,'b':22},ignore_index=True))
'''
    a   b
0   1   2
1  11  12
2  21  22
'''


print(df.append(df,ignore_index=True))
'''
    a   b
0   1   2
1  11  12
2   1   2
3  11  12
'''


arr = [[1,2],[11,12]]
df = pd.DataFrame(arr,columns=['a','b'])
df['c'] = [3,13]  # 添加新列c
df
'''
	a	b	c
0	1	2	3
1	11	12	13
'''


df.insert(0,'x',[0,10])  # 开始位置插入新列
df
'''
	x	a	b	c
0	0	1	2	3
1	10	11	12	13
'''

2.2.3 Eliminar

df = pd.DataFrame([[1,2],[11,12]],columns=['a','b'])  # 删除第一行
print(df.drop(1))
'''
   a  b
0  1  2
'''

print(df.drop('a',axis=1))  # 删除a列
'''
    b
0   2
1  12
'''

2.2.4 objeto de índice

df = pd.DataFrame({"a":[1,3],"b":[2,4]},index=['L1','L2'])

# -----查看索引
print(df.index)  # 行索引   Index(['L1', 'L2'], dtype='object')
print(df.columns)  # 列索引   Index(['a', 'b'], dtype='object')

# ----修改索引
df.index=(['11','12'])
df
'''
    a	b
11	1	2
12	3	4
'''

print(df.reindex(columns=['b','a']))
'''
    b  a
11  2  1
12  4  3
'''

2.2.5 Acceso a datos

df = pd.DataFrame([[1,2],[11,12]],columns=['a','b'])

# ----访问列
print(df['a'])
'''
0     1
1    11
'''

print(df[['a','b']])
'''
    a   b
0   1   2
1  11  12
'''

# ----切片访问行
print(df[:1])  # 返回的是一个DataFrame类型的数据
'''
   a  b
0  1  2
'''

# ----条件访问
print(df[df['a']==11]) # 筛选a值为11的所有行

print(df[(df['a']>10) & (df['a']<20)])  #多条件筛选
'''
    a   b
1  11  12
'''

2.2.6 Acceso a elementos específicos

df = pd.DataFrame([[1,2],[11,12]],columns=['a','b'])
'''
    a	b
0	1	2
1	11	12
'''

# 重点:loc和iloc,二者区别在于loc访问数据时使用行索引和列名,iloc方法使用行下标和列下标。
df.iloc[0,0]  # 用下标访问数据  1

df.loc[0,'b']  # 访问单个元素  2

 

Supongo que te gusta

Origin blog.csdn.net/qq_29644709/article/details/114801455
Recomendado
Clasificación