Numpy、Matplotlib and Pandas

1. NumPy

1. Creation and Access

1. The difference from the list: the data type of all elements in the array is the same. The underlying fully optimized C language code has a high computing performance ratio. Provides a comprehensive set of mathematical functions that can be applied directly to arrays.
2. The defined array is called ndarray, n-dimensions-array is: n-dimensional array.

import numpy as np  # 按照传统导入

a = np.array([1, 2, 3])    # 或a = np.array((1, 2, 3))

print(a)  # [1 2 3]
print(type(a))  # <class 'numpy.ndarray'>

insert image description here

3. The way to create ndarray

import numpy as np

# ==================有值创建==================
a_list = np.array([1, 2, 3])
a_tuple = np.array((1, 2, 3))

# ==================填充创建==================
a_zeros = np.zeros((2, 3))
a_ones = np.ones((2, 3))
a_empty = np.empty((2, 3))

# ==================等差创建==================
a_ar = np.arange(6)
a_lin = np.linspace(0, 10, num=5)

# ==================随机创建==================
# 给定随机种子
np.random.seed(10)
# 创建维度为(3,1)的0~1的随机数列
t1 = np.random.rand(3, 1)
# 创建维度为(2,2)的(0~100)的小数随机数列
t2 = np.random.uniform(0, 100, (2, 2))
# 创建维度为(2,2)的(0~100)的整数随机数列
t3 = np.random.randint(0, 20, (2, 2))
# 给定均值、标准差、维度的正态分布
t4 = np.random.normal(0, 1, (2, 2))
# 标准正太分布。定均值为0、标准差为1的正太分布
t5 = np.random.standard_normal(size=(2, 2))

4. Access arrays
can be accessed by indexing or slicing

import numpy as np

# 创建 5*4 二维数组(5行4列)
c = np.array([[0, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22, 23], [30, 31, 32, 33], [40, 41, 42, 43]])

print(c)

# 按照索引取 第1行,第2列的元素
print(c[1, 2])  # 12

# 切片,取 1~2 行,第2~3列元素(数组)
print(c[1:3, 2:4])

# 切片,取 1~2 行,第2列的元素(数组)
print(c[1:3, 2])

# 步长=2,取到第 1, 3 行,第2~3列元素
print(c[1:6:2, 2:4])

# 取最后一维,下标为2的元素
print(c[:, 2])

# 维度比较多,需要写很多:,提供...可以代表之前或之后的任意维度
print(c[..., 2])

2. Array operation

1. Modify the array shape
insert image description here

import numpy as np

a = np.arange(8)
print('原始数组:')
print(a)
b = a.reshape(4, 2)
print('修改后的数组:')
print(b)

insert image description here

er, Matplotlib

1. Getting Started

import matplotlib.pyplot as plt
import numpy as np

x = np.linspace(-1, 1, 50)   # x为(-1,1)区间上的50个等差点构成的数组/列表/元组
y = 2 * x + 1   # y与x的函数关系
plt.plot(x, y)   # 用于画图,它可以绘制点和线, 并且对其样式进行控制
plt.show()  # 显示图像

insert image description here

2. Data incoming

1.x is the x-axis data, y is the y-axis data

import matplotlib.pyplot as plt

x = [3, 4, 5]  # [列表]
y = [2, 3, 2]  # x,y元素个数N应相同
plt.plot(x, y)
plt.show()

insert image description here

2.x, y can be passed in (tuple), [list], np.array, pd.Series

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

x=(3,4,5) # (元组)
y1=np.array([3,4,3]) # np.array
y2=pd.Series([4,5,4]) # pd.Series

plt.plot(x,y1)
plt.plot(y2)  # x可省略,默认[0,1..,N-1]递增
plt.show() # plt.show()前可加多个plt.plot(),画在同一张图上

insert image description here

3. Multiple sets of x, y can be passed in

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

x=(3,4,5)
y1=np.array([3,4,3])
y2=pd.Series([4,5,4])

plt.plot(x,y1,x,y2) # 此时x不可省略
plt.show()

insert image description here
4.x or y is passed into a two-dimensional array

import matplotlib.pyplot as plt
import numpy as np

lst1 = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
x = np.array(lst1)
lst2 = [[2, 3, 2], [3, 4, 3], [4, 5, 4]]
y = np.array(lst2)
print(x)
print(y)
plt.plot(x, y)
plt.show()

insert image description here
Blue: x1(0,3,6) y1(2,3,4)
Orange: x2(1,4,7) y2(3,4,5)
Green: x3(2,5,8) y3(2 ,3,4)

3. Graphics Control

1.plt.plot(x, y, "Format Control String")

import matplotlib.pyplot as plt
import numpy as np

lst1 = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
x = np.array(lst1)
lst2 = [[2, 3, 2], [3, 4, 3], [4, 5, 4]]
y = np.array(lst2)

plt.plot(x, y, "ob:")  # "b"为蓝色, "o"为圆点, ":"为点线
plt.show()

insert image description here
2. "Format control string" can include up to three parts, "color", "point type", "line type"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w']
line_style = ['-', '--', '-.', ':']
dic1 = [[0, 1, 2], [3, 4, 5]]
x = pd.DataFrame(dic1)
dic2 = [[2, 3, 2], [3, 4, 3], [4, 5, 4], [5, 6, 5]]
y = pd.DataFrame(dic2)
# 循环输出所有"颜色"与"线型"
for i in range(2):
    for j in range(4):
        plt.plot(x.loc[i], y.loc[j], color[i * 4 + j] + line_style[j])
plt.show()

insert image description here

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

marker = ['.', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_',
          '.', ',']
dic1 = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 13, 14], [15, 16, 17]]
x = pd.DataFrame(dic1)
dic2 = [[2, 3, 2.5], [3, 4, 3.5], [4, 5, 4.5], [5, 6, 5.5]]
y = pd.DataFrame(dic2)
# 循环输出所有"点型"
for i in range(6):
    for j in range(4):
        plt.plot(x.loc[i], y.loc[j], "b" + marker[i * 4 + j] + ":")  # "b"蓝色,":"点线
plt.show()

insert image description here
Color
"c" cyan
"r" red red
"g" green green
"b" blue blue
"w" white white
"k" black black
"y" yellow yellow
"m" magenta magenta

Line type
":" dotted line
"-." dotted line
"–" dashed line
"-" solid line

Point type
insert image description here
3.plt.plot(x, y, "format control string", keyword=parameter)

In addition to "format control string", you can also add keywords = parameters after

import matplotlib.pyplot as plt

y = [2, 3, 2]
# 蓝色,线宽20,圆点,点尺寸50,点填充红色,点边缘宽度6,点边缘灰色
plt.plot(y, color="blue", linewidth=20, marker="o", markersize=50,
         markerfacecolor="red", markeredgewidth=6, markeredgecolor="grey")
plt.show()

insert image description here

4. Advanced

insert image description here
Line Chart
insert image description here
Histogram
insert image description here
Bar
insert image description here
Chart Scatter Chart
Method: plt.scatter(x,y)

5. Three-dimensional surface

import matplotlib.pyplot as plt
import numpy as np

fig = plt.figure()
ax = fig.add_subplot(projection="3d")

x = np.arange(-5, 5, 0.25)
y = np.arange(-5, 5, 0.25)
x, y = np.meshgrid(x, y)
z = np.sqrt(x ** 2 + y ** 2)

ax.set_xlabel("X")
ax.set_ylabel("Y")
ax.set_zlabel("Z")
ax.set_title("3D surface plot")

ax.plot_surface(x, y, z,
                       rstride=1,
                       cstride=1,
                       cmap=plt.cm.coolwarm,
                       linewidth=0,
                       antialiased=False)

plt.show()

insert image description here

3. Pandas

pandas is a NumPy-based tool created to solve data analysis tasks. Pandas incorporates a large number of libraries and some standard data models, providing the tools needed to efficiently manipulate large datasets. pandas provides a large number of functions and methods that allow us to process data quickly and easily.

Two different data structures are built on the basis of ndarray arrays (arrays in NumPy), namely Series (one-dimensional data structure) and DataFrame (two-dimensional data structure).

  • Series is a labeled one-dimensional array, where the label can be understood as an index, but this index is not limited to integers, it can also be a character type, such as a, b, c, etc.;
  • A DataFrame is a tabular data structure that has both row and column labels.

insert image description here

1.Series sequence

It is a structure similar to a one-dimensional array, consisting of a set of data values ​​(value) and a set of labels, where there is a one-to-one correspondence between labels and data values.

Series can save any data type, such as integers, strings, floating-point numbers, Python objects, etc., and its labels default to integers, starting from 0 and increasing sequentially.insert image description here

insert image description here

import pandas as pd
import numpy as np

print(pd.Series([], dtype='float64'))  # 1.空白数据

# 使用列表创建
data = np.array(['a', 'b', 'c', 'd', 'e'])
print(pd.Series(data))  # 2.标签为默认从0开始递增
print(pd.Series(data, index=[101, 102, 103, 104, 105]))  # 3.自定义标签

# 使用键值对创建
data_dict = {
    
    'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}
print(pd.Series(data_dict))  # 4.标签为键,数值为值
print(pd.Series(data_dict, index=['b', 'd', 'a', 'f']))  # 5.当传递的索引值无法找到与其对应的值时,使用 NaN(非数字)填充。

insert image description here
insert image description here

import pandas as pd

s = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])

# 索引访问
print(s[0])  # 1.单个元素访问
print(s[:3])  # 2.切片访问
print(s[[1, 4, 3]])  # 3.多个非连续元素访问
# 标签访问
print(s['a'])  # 4.单个元素访问
print(s[:'d'])  # 5.切片访问
print(s[['a', 'd', 'e']])  # 6.多个非连续元素访问

insert image description here
insert image description here
insert image description here
head()&tail() view data

import pandas as pd
import numpy as np

s = pd.Series(np.random.randint(1, 100, 8))

print(s)

print(s.head())  # 默认访问前5个数据
print(s.head(3))  # 自定义访问前n个数据

print(s.tail())  # 默认访问后5个数据
print(s.tail(3))  # 自定义访问后n个数据

isnull()&nonull() detects missing values

import pandas as pd

s = pd.Series(['a', 'b', 'c', None])

print(pd.isnull(s))  # 如果为值不存在或者缺失,则返回 True
print(pd.notnull(s))  # 如果值不为空,则返回 True

2. DataFrame table structure

insert image description here
insert image description here
insert image description here

import pandas as pd

# 1.创建空的DataFrame对象
print(pd.DataFrame())
# 2.单一列表创建 DataFrame
print(pd.DataFrame(['a', 'b', 'c', 'd']))
# 3.使用嵌套列表创建 DataFrame 对象
print(pd.DataFrame([['Alex', 10], ['Bob', 12], ['Clarke', 13]], columns=['name', 'age']))
# 4.字典嵌套列表创建,字典的键被用作列名
print(pd.DataFrame({
    
    'Name': ['Tom', 'Jack', 'Steve', 'Ricky'], 'Age': [28, 34, 29, 42]}))
# 5.列表嵌套字典创建DataFrame对象,字典的键被用作列名
print(pd.DataFrame([{
    
    'a': 1, 'b': 2}, {
    
    'a': 5, 'b': 10, 'c': 20}]))

insert image description here
insert image description here
insert image description here
insert image description here

Guess you like

Origin blog.csdn.net/m0_46692607/article/details/126728324