table of Contents
A, pandas module
pandas is BSD licensed open source library for Python provides high performance programming language, data structure and easy to use data analysis tools.
pandas module: operating excel / json / sql / ini / csv ( profiles)
using Excel documents given pandas processing contents of the installation according to two plug, pd is read from Excel DataFrame data type.
import numpy as np
import pandas as pd
np.random.seed(10)
index = pd.date_range('2019-01-01',periods=6,freq='M')#产生以月为间隔的的时间(periods时间,freq频率相当于间隔的时间单位)
print(index)
columns = ['c1','c2','c3','c4']
print(columns)
val = np.random.randn(6,4)
print(val)
df = pd.DataFrame(index=index,columns=columns,data=val)#索引,列,值
print(df)
#保存文件
df.to_excel('date_c1.xls')
#读取文件
df = pd.read_excel('date_c.xls',index_col=[0])#index_col[0]第零列
print(df)
#接下来我们可以打印出来行和列的索引,然后根据索引打印出对应的行和列的数据然后对它们进行处理
print(df.index)#打印出行索引
print(df.columns)#打印出列索引
print(df.values)#打印出所有的值
#loc[]按照index取值
print(df.loc['2019-01-31'])#取出2019-01-31对应的数据(注意loc是中括号)
print(df.loc['2019-01-31':'2019-05-31'])#取出这两个时间段之间的所有数据
print(df)
Two, matplotlib module
Matplotlib is a Python 2D graphics library, which generate publication quality graphics in a variety of hardcopy formats and interactive cross-platform environment. Matplotlib can be used Python scripting, Python and IPython Shell, Jupyter notebook, Web application servers, and four graphical user interface toolkit.
Matplotlib try to make easy things easy, it becomes possible to make things difficult. You just a few lines of code can be generated graph, histogram, power spectrum, bar, error, scatter and the like.
For drawing simplicity, the pyplot
module provides interface similar to MATLAB, especially when used in combination with IPython. For advanced users, you can completely control the line style, font attributes, and other properties of the shaft through an object-oriented interface familiar to users of MATLAB or a set of functions.
matplotlib module: used to draw various charts
1. Bar Chart
from matplotlib import pyplot as plt
#约定俗成这样写
from matplotlib.font_manager import FontProperties
#修改字体
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot')#设置背景
clas = ['3班','4班','5班','6班']
stuents = [50,45,55,60]
clas_index = range(len(clas))
plt.bar(clas_index,stuents,color = 'darkblue')#使用bar()函数生成条形图
plt.xlabel('学生',fontproperties=font)
plt.ylabel('学生人数',fontproperties=font,fontsize=20,fontweight=25)#更改y轴的字体大小
plt.xticks(clas_index,clas,fontproperties=font)#获取或设置x轴的当前刻度位置和标签。
plt.show()#显示绘制的图形
2. Histogram
import numpy as np
from matplotlib import pyplot as plt # 约定俗成
from matplotlib.font_manager import FontProperties # 修改字体
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot')
x1 = np.random.randn(10000)
x2 = np.random.randn(10000)
fig = plt.figure() # 生成一张画布
ax1 = fig.add_subplot(1, 2, 1) # 将画布分成两块,取第一块
ax2 = fig.add_subplot(1, 2, 2)
ax1.hist(x1, bins=50,color='darkblue')
ax2.hist(x2, bins=50,color='y')
fig.suptitle('两个正态分布',fontproperties=font,fontsize=20)
ax1.set_title('x1的正态分布',fontproperties=font) # 加子标题
ax2.set_title('x2的正态分布',fontproperties=font)
plt.show()
3. Line Chart
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot')
np.random.seed(10)
x1 = np.random.randn(40).cumsum()
x2 = np.random.randn(40).cumsum()
x3 = np.random.randn(40).cumsum()
x4 = np.random.randn(40).cumsum()
plt.plot(x1,c='r',linestyle='-',marker='o',label='红圆线')
plt.plot(x2,color='y',linestyle='--',marker='*',label='黄虚线')
plt.plot(x3,color = 'b',linestyle='-.',marker='s',label='蓝方线')
plt.plot(x4,color='black',linestyle=':',marker='s',label='黑方线')
plt.legend(loc='best',prop=font)#显示label(标签)
plt.show()
4. FIG straight scattergram +
import numpy as np
from matplotlib import pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
plt.style.use('ggplot')
fig = plt.figure()#创建一块新的画布
ax1 = fig.add_subplot(1,2,1)#将画布分成两块,取第一块
ax2 = fig.add_subplot(1,2,2)
x = np.arange(20)#x的范围
y = x**2
x2 = np.arange(20)
y2 = x2
ax1.scatter(x,y,c='r',label='红')
ax1.scatter(x2,y2,c='b',label='蓝')
ax2.plot(x,y)#将y与x作图作为线和/或标记
ax2.plot(x2,y2)
fig.suptitle('两张图',FontProperties=font,fontsize=15)
ax1.set_title('散点图',fontproperties=font)
ax2.set_title('折线图',fontproperties=font)
ax1.legend(prop=font)#Legend(显示图中的标签)
plt.show()
Three, numpy
NumPy scientific computing with Python is the basic package. It contains the following:
- Powerful N-dimensional array object
- Sophisticated (broadcasting) functions
- Integrated C / C ++ and Fortran code tools
- Useful linear algebra, Fourier transform and the random number function
In addition to its obvious scientific purposes, NumPy container may also serve as highly efficient general multidimensional data. You may define an arbitrary data type. This allows NumPy seamlessly and quickly integrate with a variety of databases.
NumPy has obtained license BSD license , which can be reused indefinitely.
numpy: means for analysis of data, matrix calculation can be performed,
two arithmetic multiplication array
lt1 = [1,2,3]
lt2 = [4,5,6]
lt = []
for i in range(len(lt1)):
lt.append(lt1[i]*lt2[i])
print(lt)
import numpy as np
arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])
print(arr1*arr2)
[4, 10, 18]
[ 4 10 18]数组和列表是不一样的
一维数组
arr3 = np.array([1,2,3])
#二维数组
arr4 = np.array([[1,2,3],
[4,5,6]])
#三维数组
arr5 = np.array([[[1,2,3],
[4,5,6]],
[[1,2,3],
[4,5,6]]])
print(arr3)#[1 2 3]
print(arr4)
# [[1 2 3]
# [4 5 6]]
print(arr5)
[[[1 2 3]
[4 5 6]]
[[1 2 3]
[4 5 6]]]
这里只讨论二维数组
numpy property
Transposition T is a transposed matrix of
arr = np.array([[1,2,3],
[4,5,6]])
print(arr,'\n',arr.T)
# [[1 4]
# [2 5]
# [3 6]
Dtype data type array element, the array belongs numpy Python interpreter; int32 / float64 belong to numpy
print(arr.dtype)#int32
The number of elements in the array size
print(arr.size)#6
Dimension array ndim
print(arr.ndim)#2
The size of the array dimensions shape (in the form of tuples)
print(arr.shape[0])#2
print(arr.shape[1])#3就是指数组的行数和列数
Data type conversion astype
arr = arr.astype(np.float64)
print(arr)
[[1. 2. 3.]
[4. 5. 6.]]#浮点数据类型小数点后为零所以省略不写
Slice numpy array
lt = [1,23,4]
print(lt[1:])
arr = np.array([[11,2,3],
[4,5,6]])
print(arr[:,0])#数组的切片和列表的切片相似,但是这个是[行,列]也是通过索引取值,这个更类似与坐标,如果要取整行或者整列需要用冒号代替数字,如[:,0]就是取得第零列
#逻辑取值
print(arr[arr>4])#[11 5 6]去除所有大于4的数值构成一维数组
Assignment
lt = [1,2,3]
lt[:] = [0,0,0]
print(lt)
arr = np.array([[1,2,3],
[4,5,6]])
arr[0,0] = 0
print(arr)
# [[0 2 3]
# [4 5 6]]
arr[:,0] = [2,5]
print(arr)
# [[2 2 3]
# [5 5 6]]
#数组的赋值可以通过坐标的方式一个一个的对其元素赋值,也可以整行整列对其元素进行互换
Merge array: the combined left and right can be combined vertically, provided the corresponding row and column is equal to
arr1 = np.array([[1,2,3],
[4,5,6]])
arr2 = np.array([[7,8,9],
['a','s','g']])
print(np.hstack((arr1,arr2)))#只能放元组行合并
print(np.vstack((arr1,arr2)))#列合并
print(np.concatenate((arr1,arr2),axis=1))#默认为列合并,0为列合并,1为行合并
# [['1' '2' '3' '7' '8' '9']
# ['4' '5' '6' 'a' 's' 'g']]
# [['1' '2' '3']
# ['4' '5' '6']
# ['7' '8' '9']
# ['a' 's' 'g']]
# [['1' '2' '3' '7' '8' '9']
# ['4' '5' '6' 'a' 's' 'g']]
Create a numpy array by function
print(np.ones((2,3)))#创建一个两行三列的元素都为一的矩阵数据类型是float类型
# [[1. 1. 1.]
# [1. 1. 1.]]
print(np.zeros((2,3)))#创建一个元素都为零的矩阵
# [[0. 0. 0.]
# [0. 0. 0.]]
print(np.eye(3))#创建一个单位阵这里括号里面不需要元组
# [[1. 0. 0.]
# [0. 1. 0.]
# [0. 0. 1.]]
print(np.linspace(1,100,10))#将0-100平均分成10份
#[ 1. 12. 23. 34. 45. 56. 67. 78. 89. 100.]
print(np.arange(2,10))#构造一个2-9的一维数组
#[2 3 4 5 6 7 8 9]
print(np.arange(1,20,2))#构造一个2-19的数组,步长为2
#[ 1 3 5 7 9 11 13 15 17 19]
arr1 = np.zeros((1,12))
print(arr1.reshape((3,4)))#将原来的数组重构形状,reshape((行,列))
# [[0. 0. 0. 0.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]
numpy array operation
array operation here is actually a mathematical matrix operations follow the rules of operation of all matrices
arr1 = np.ones((3,4))*4#数乘
print(arr1)
# [[4. 4. 4. 4.]
# [4. 4. 4. 4.]
# [4. 4. 4. 4.]]
#numpy数组还可以进行正余弦等三角函数运算
print(np.sin(arr1))#对所有的元素都会进行运算
#数组的矩阵运算--》点乘
arr1 = np.array([[1,2,3],
[4,5,6]])
arr2 = np.array([[1,2],
[4,5],
[8,7]])
print(np.dot(arr1,arr2))
# [[33 33]
# [72 75]]
#求逆运算
arr3 = np.dot(arr1,arr2)
print(np.linalg.inv(arr3))
# [[ 0.75757576 -0.33333333]
# [-0.72727273 0.33333333]]
#numpy的数学和统计方法
print(np.sum(arr3[0,:]))#对第一行元素求和
# np.random.seed(1)#让随机数暂停
# print(np.random.random((3,4)))#生成一个三行四列的随机数
print(np.random.rand(3,4))#产生均匀分布的随机数
s = np.random.RandomState(1)#让随机数暂停,和seed(1)得到的结果相同
print(s.random((3,4)))
arr = np.array([[1,2,3,4],[4,5,6,7],[7,8,9,10],[11,23,45,67]])
np.random.shuffle(arr)#整行整行的乱序(将每一行都当做一个整体然后在乱序)
print(arr)
# [[7 8 9]
# [4 5 6]
# [1 2 3]]
print(np.random.choice([1,2,3],1))#随机选择一个数
print(np.random.randint(1,100,(3,4)))#对1-100内的整数进行随机,生成一个三行四列的矩阵