table of Contents

numpy module

numpy module is mainly used for data analysis, scientific computing numpy array

The main common attributes and methods, with the object are generated numpy .out

import numpy as np

Attributes	description
T	Transpose of the array, the rows and columns correspond, reconstruction, two elements per row
dtype	Data type array elements (Int32 and float64)
size	The number of array elements
help	Dimensions of the array
shape	Dimension size of the array (there are several odd row)
astype	Data type conversion

Common method	description
Element segmentation	[:,:] Represents the row and column
Logical values	Remove array numpy generating objects> element 4
Assignment	Remove numpy array object with the generated index value = 0
The combined lateral array	Rows and rows merger, consolidation columns and columns
The combined vertical array	Equivalent list update, add elements directly

Array Functions	description
np.array()	The list into an array, you can choose whether or not to develop dtype
np.ones()	Incoming number of rows and columns, values are 1
np.zeros()	Incoming number of rows and columns, values are 0
np.eye()	Enter the number of rows and columns, the value of a diagonal
np.arange()	And a range of different methods list, support floating point
np.linspace ()	Similarly arange (), the third parameter is the length of the array
np.empty()	Creating a full array of random elements
np.reshape()	Reshape
Array Operations	Functions associated with the array of + - * / D


Generates a random number (common)	np.random.rand (x, y)
	np.random.random(x,y)
	np.random.choice(x,y)
	np.random.shuffle(x,y)

numpy Statistical Methods	description
sum	Summing
cumsum	Cumulative sum
mean	Averaging
std	Seeking standard deviation
where	Seeking variance
me	For the minimum
max	Seeking maximum
argmin	For the minimum index
argmax	Seeking the maximum index
sort	Sequence

The following code specific explanation

lt1 = [1,2,3]
lt2 = [4,5,6]

lt = []
# 如果我们想要对这两个列表内数据相乘，我们可以用for循环
for i in range(len(lt1)):
    lt.append(lt1[i] * lt2[i])
print(lt)


import numpy as np

# 利用numpy 进行矩阵计算 更方便
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])
print(arr1 * arr2)
## [ 4 10 18]




# numpy 创建 numpy 数组   --》 可变的数据类型
# 一维数组  通常不使用，创建的数组没有,
arr = np.array([1,2,3])
print(arr)
# [1 2 3]

# 二维数组
arr = np.array([
    [1,2,3],
    [4,5,6]
])
print(arr)
# [[1 2 3]
#  [4 5 6]]

# 三维数组      通常不使用
arr = np.array([
    [1,2,3],
    [4,5,6],
    [7,8,9]
])
print(arr)
# [[1 2 3]
#  [4 5 6]
#  [7 8 9]]


# numpy 数组的属性 特性
arr = np.array([
    [1,2,3],
    [4,5,6]
])

# T数组的转置，行列互换
print(arr, "\n",arr.T)
# [[1 4]
#  [2 5]
# [3 6]]

# dtype 数组元素的数据类型，
# numpy数组是属于python解释器的，
# int32 float64 属于numpy数组
print(arr.dtype)
# int32

# size 数组元素的个数
print(arr.size)
# 6

# ndim 数据的维数
print(arr.ndim)
# 2

# shape 数据的纬度大小（以元组形式）
print(arr.shape)
# (2, 3)

# astype 类型转换 为int32
arr = arr.astype(np.float64)
print(arr)
# [[1. 2. 3.]
#  [4. 5. 6.]]

# 切片numpy数组
arr = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

print(arr[:,:])     # ：行,：列
# [[1 2 3]
#  [4 5 6]]
print(arr[0,0])
# 1
print(arr[1,2])
# 6
print(arr[:,-2:])
# [[2 3]
#  [5 6]]

# 逻辑取值
print(arr[arr > 4])
# [[2 3]
#  [5 6]]
#  [5 6]

# 赋值
arr[0,0] = 0
print(arr)
# [[0 2 3]
#  [4 5 6]]

# 数组合并
arr1 = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

arr2 = np.array([
    [7, 8, 9],
    ['a', 'b', 'c']
])

# 横向合并
print(np.hstack((arr1,arr2)))
# [['1' '2' '3' '7' '8' '9']
#  ['4' '5' '6' 'a' 'b' 'c']]

# 垂直合并
print(np.vstack((arr1,arr2)))
# [['1' '2' '3']
#  ['4' '5' '6']
#  ['7' '8' '9']
#  ['a' 'b' 'c']]

# 默认以列合并  #axis = 0    0表示列，1表示行
print(np.concatenate((arr1,arr2),axis=1))
# [['1' '2' '3' '7' '8' '9']
#  ['4' '5' '6' 'a' 'b' 'c']]

# 通过函数创建numpy数组

print(np.ones((2,3)))
# [[1. 1. 1.]
#  [1. 1. 1.]]

print(np.zeros((2,3)))
# [[0. 0. 0.]
#  [0. 0. 0.]]

print(np.eye(3,3))
# [0. 1. 0.]
# [0. 0. 1.]]

print(np.linspace(1,100,10))
# [  1.  12.  23.  34.  45.  56.  67.  78.  89. 100.]

print(np.arange(2,10))
# [2 3 4 5 6 7 8 9]

# 重构形状
arr1 = np.zeros((2,6))      #
print(arr1.reshape((3,4)))      # 重构形状必须相乘的 相等
# [[0. 0. 0. 0.]
#  [0. 0. 0. 0.]
#  [0. 0. 0. 0.]]


# numpy 数组运算
# +-*/
arr = np.ones((3,4)) * 4
print(arr)
# [[4. 4. 4. 4.]
#  [4. 4. 4. 4.]
#  [4. 4. 4. 4.]]

arr = np.ones((3,4)) + 4
print(arr)
# [[5. 5. 5. 5.]
#  [5. 5. 5. 5.]
#  [5. 5. 5. 5.]]

# numpy 数组运算函数      了解——————-
print(np.sin(arr))
# [[-0.95892427 -0.95892427 -0.95892427 -0.95892427]
#  [-0.95892427 -0.95892427 -0.95892427 -0.95892427]
#  [-0.95892427 -0.95892427 -0.95892427 -0.95892427]]

# 矩阵运算 --  点乘
arr1 = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

arr2 = np.array([
    [1, 2],
    [4, 5],
    [6, 7]
])
print(np.dot(arr1,arr2))
# [[27 33]
#  [60 75]]

# 求逆
arr = np.array([[1, 2, 3], [4, 5, 6], [9, 8, 9]])
print(np.linalg.inv(arr))
# [[ 0.5        -1.          0.5       ]
#  [-3.          3.         -1.        ]
#  [ 2.16666667 -1.66666667  0.5       ]]





# numpy 数组数学和统计方法

arr = np.array([
    [1, 2, 3],
    [4, 5, 6]
])
print(np.sum(arr[:,:]))
# 21

# 生成随机数
print(np.random.rand(3,4))
# [[0.76654824 0.23510842 0.79989748 0.93094884]
#  [0.97155472 0.29956374 0.27754847 0.91103403]
#  [0.43714323 0.7549109  0.14547903 0.20511579]]

print(np.random.random((3,4)))
# [[0.91673193 0.15218486 0.32976182 0.41812734]
#  [0.33360061 0.20190749 0.48689467 0.46679115]
#  [0.12490532 0.50441629 0.95525997 0.5402791 ]]


# 针对一维 随机选择数字
print(np.random.choice([1,2,3],1))
# [1]

# 追对某一范围
print(np.random.randint(1,100,(3,4)))
# [[33 40 93 18]
#  [80 65 64 51]
#  [66  6 83 10]]

matplotlib module

drawing module is used matplotlib

# 条形图

from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties

# 设置字体，不然画出来会乱码
font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc")

# 设置背景
plt.style.use("ggplot")

# 定义 行 列 信息
clas = ["3班","4班","5班","6班"]
students = [50,55,45,60]
clas_index = range(len(clas))

# 开始画
plt.bar(clas_index,students,color="darkblue")

plt.xlabel("学生",FontProperties=font)
plt.xlabel("学生人数",FontProperties=font)
plt.title("班级-学生人数",FontProperties=font,Fontsize=25,fontweight=20)
plt.xticks(clas_index,clas,FontProperties=font)

# 展示
plt.show()

1569746051687

# 直方图
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties

# 设置字体，不然画出来会乱码
font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc")
plt.style.use("ggplot")

# 生成随机数对象
x1 = np.random.randn(10000)
x2 = np.random.randn(10000)

# 生成画布
fig = plt.figure()

# 每行每列
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

ax1.hist(x1,bins=50,color="darkblue")
ax2.hist(x2,bins=50,color="y")

fig.suptitle("两个正太分布",FontProperties=font,fontsize=20)
ax1.set_title("x1的正态分布",FontProperties=font)
ax2.set_title("x2的正态分布",FontProperties=font)

# 展示
plt.show()

1569746069693

# 折线图

import numpy as np
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties

# 设置字体，不然画出来会乱码
font = FontProperties(fname=r"C:\Windows\Fonts\simsun.ttc")
plt.style.use("ggplot")

np.random.seed(10)

x1 = np.random.randn(40).cumsum()
x2 = np.random.randn(40).cumsum()
x3 = np.random.randn(40).cumsum()
x4 = np.random.randn(40).cumsum()

plt.plot(x1,color="r",linestyle="-",marker="o",label="红圆线")
plt.plot(x2,color="y",linestyle="--",marker="*",label="黄虚线")
plt.plot(x3,color="b",linestyle="-.",marker="s",label="蓝方线")
plt.plot(x4,color="black",linestyle=":",marker="s",label="黑方线")
plt.legend(loc="best",prop=font)

# 展示
plt.show()

1569746079556

# 散点图 + 直线图
import numpy as np
from matplotlib import pyplot as plt  # 约定俗成
from matplotlib.font_manager import FontProperties  # 修改字体

# 设置字体，不然画出来会乱码
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot')

fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)


x = np.arange(20)
y = x ** 2
x2 = np.arange(20)
y2 = x2 ** 2

ax1.scatter(x,y,color="r",label="红")
ax2.scatter(x2,y2,color="b",label="蓝")

ax1.plot(x,y)
ax2.plot(x2,y2)

fig.suptitle("两张图",FontProperties=font,fontsize=15)
ax1.set_title("散点图",FontProperties=font)
ax2.set_title("折线图",FontProperties=font)
ax1.legend(prop=font)

# 展示
plt.show()

1569746089270

pandas module

pandas module operates excel / json / sql / ini / csv file

import pandas as pd
import numpy as np

np.random.seed(10)

# 生成6个月份
index = pd.date_range("2019-01-01",periods=6,freq="M")
print(index)
columns = ["c1","c2","c3","c4"]

# 生成随机数
val = np.random.randn(6,4)


df = pd.DataFrame(index=index,columns=columns,data=val)
print(df)
#                   c1        c2        c3        c4
# 2019-01-31  1.331587  0.715279 -1.545400 -0.008384
# 2019-02-28  0.621336 -0.720086  0.265512  0.108549
# 2019-03-31  0.004291 -0.174600  0.433026  1.203037
# 2019-04-30 -0.965066  1.028274  0.228630  0.445138
# 2019-05-31 -1.136602  0.135137  1.484537 -1.079805
# 2019-06-30 -1.977728 -1.743372  0.266070  2.384967


# 保存成 xlsx 文件
df.to_excel("date_c.xlsx")
# 读出文件
df = pd.read_excel("date_c.xlsx",index_col=[0])
print(df)
#                   c1        c2        c3        c4
# 2019-01-31  1.331587  0.715279 -1.545400 -0.008384
# 2019-02-28  0.621336 -0.720086  0.265512  0.108549
# 2019-03-31  0.004291 -0.174600  0.433026  1.203037
# 2019-04-30 -0.965066  1.028274  0.228630  0.445138
# 2019-05-31 -1.136602  0.135137  1.484537 -1.079805
# 2019-06-30 -1.977728 -1.743372  0.266070  2.384967



###############
print(df.index)
print(df.columns)
print(df.values)

print(df[['c1', 'c2']])

# 按照index取值
# print(df['2019-01-31'])
print(df.loc['2019-01-31'])
print(df.loc['2019-01-31':'2019-05-31'])

# 按照values取值
print(df)
print(df.iloc[0, 0])

df.iloc[0, :] = 0
print(df)

Python- data analysis module

numpy module

matplotlib module

pandas module

Guess you like