所使用的csv文件数据:
duration,protocol_type,service,flag,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,num_failed_logins,logged_in,num_compromised,root_shell,su_attempted,num_root,num_file_creations,num_shells,num_access_files,num_outbound_cmds,is_hot_login,is_guest_login,count,srv_count,serror_rate,srv_serror_rate,rerror_rate,srv_rerror_rate,same_srv_rate,diff_srv_rate,srv_diff_host_rate,dst_host_count,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,
0,tcp,http,SF,215,45076,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,normal.0,tcp,http,SF,162,4528,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,1,0,0,1,1,1,0,1,0,0,0,0,0,normal.
0,tcp,http,SF,236,1228,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,2,2,1,0,0.5,0,0,0,0,0,normal.
0,tcp,http,SF,233,2032,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,1,0,0,3,3,1,0,0.33,0,0,0,0,0,normal.
0,tcp,http,SF,239,486,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,1,0,0,4,4,1,0,0.25,0,0,0,0,0,normal.
0,tcp,http,SF,238,1282,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,1,0,0,5,5,1,0,0.2,0,0,0,0,0,normal.
0,tcp,http,SF,235,1337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0,0,0,0,1,0,0,6,6,1,0,0.17,0,0,0,0,0,normal.
0,tcp,http,SF,234,1364,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0,0,0,0,1,0,0,7,7,1,0,0.14,0,0,0,0,0,normal.
0,tcp,http,SF,239,1295,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,1,0,0,8,8,1,0,0.12,0,0,0,0,0,normal.
0,tcp,http,SF,181,5450,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0,0,0,0,1,0,0,9,9,1,0,0.11,0,0,0,0,0,normal.
0,tcp,http,SF,184,124,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,10,10,1,0,0.1,0,0,0,0,0,normal.
0,tcp,http,SF,185,9020,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,1,0,0,11,11,1,0,0.09,0,0,0,0,0,normal.
0,tcp,http,SF,239,1295,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,0,12,12,1,0,0.08,0,0,0,0,0,normal.
0,tcp,http,SF,181,5450,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,1,0,0,13,13,1,0,0.08,0,0,0,0,0,normal.
0,tcp,http,SF,236,1228,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0,1,0,0,14,14,1,0,0.07,0,0,0,0,0,normal.
0,tcp,http,SF,233,2032,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,4,0,0,0,0,1,0,0,15,15,1,0,0.07,0,0,0,0,0,normal.
0,tcp,http,SF,238,1282,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,5,5,0,0,0,0,1,0,0,16,16,1,0,0.06,0,0,0,0,0,normal.
0,tcp,http,SF,235,1337,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6,6,0,0,0,0,1,0,0,17,17,1,0,0.06,0,0,0,0,0,normal.
0,tcp,http,SF,234,1364,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,1,0,0,18,18,1,0,0.06,0,0,0,0,0,normal.
0,tcp,http,SF,239,486,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,8,0,0,0,0,1,0,0,19,19,1,0,0.05,0,0,0,0,0,normal.
1.创建数组与向量与处理
import numpy
vector=numpy.array([5,7,8,9]) #创建一个向量
matrix=numpy.array([[12,21,55],[99,65,75],[25,74,36]]) #创建一个数组
print(vector)
print("===============================================================================================")
print(matrix)
运行结果:
[5 7 8 9] =============================================================================================== [[12 21 55] [99 65 75] [25 74 36]]
import numpy as np
numbers=np.array([1,5,8,6,9])
matrix=np.array([[12,21,55],[99,65,75],[25,74,36],[27,74,66]])
print(numbers)
print("===============================================================================================")
print (numbers.shape) #显示向量元素的个数
print (matrix.shape) #显示数组行和列的个数
运行结果:
[1 5 8 6 9] =============================================================================================== (5,) (4, 3)
2.csv文件读取与数据处理
cup99=numpy.genfromtxt("F:\study.csv",delimiter=",",dtype=str,skip_header=1) #第二个参数表示以","为分隔符,第三个参数表示以字符串格式读取,#第四个参数表示去除第一行数据
print(cup99)
输出结果:
[['0' 'tcp' 'http' 'SF' '215' '45076' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '162' '4528' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '2' '2' '0' '0' '0' '0' '1' '0' '0' '1' '1' '1' '0' '1' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '236' '1228' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '1' '0' '0' '2' '2' '1' '0' '0.5' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '233' '2032' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '2' '2' '0' '0' '0' '0' '1' '0' '0' '3' '3' '1' '0' '0.33' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '239' '486' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '3' '3' '0' '0' '0' '0' '1' '0' '0' '4' '4' '1' '0' '0.25' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '238' '1282' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '4' '4' '0' '0' '0' '0' '1' '0' '0' '5' '5' '1' '0' '0.2' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '235' '1337' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '5' '5' '0' '0' '0' '0' '1' '0' '0' '6' '6' '1' '0' '0.17' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '234' '1364' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '6' '6' '0' '0' '0' '0' '1' '0' '0' '7' '7' '1' '0' '0.14' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '239' '1295' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '7' '7' '0' '0' '0' '0' '1' '0' '0' '8' '8' '1' '0' '0.12' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '181' '5450' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '8' '8' '0' '0' '0' '0' '1' '0' '0' '9' '9' '1' '0' '0.11' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '184' '124' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '1' '0' '0' '10' '10' '1' '0' '0.1' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '185' '9020' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '2' '2' '0' '0' '0' '0' '1' '0' '0' '11' '11' '1' '0' '0.09' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '239' '1295' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0' '1' '0' '0' '12' '12' '1' '0' '0.08' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '181' '5450' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '2' '2' '0' '0' '0' '0' '1' '0' '0' '13' '13' '1' '0' '0.08' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '236' '1228' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '3' '3' '0' '0' '0' '0' '1' '0' '0' '14' '14' '1' '0' '0.07' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '233' '2032' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '4' '4' '0' '0' '0' '0' '1' '0' '0' '15' '15' '1' '0' '0.07' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '238' '1282' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '5' '5' '0' '0' '0' '0' '1' '0' '0' '16' '16' '1' '0' '0.06' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '235' '1337' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '6' '6' '0' '0' '0' '0' '1' '0' '0' '17' '17' '1' '0' '0.06' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '234' '1364' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '7' '7' '0' '0' '0' '0' '1' '0' '0' '18' '18' '1' '0' '0.06' '0' '0' '0' '0' '0' 'normal.'] ['0' 'tcp' 'http' 'SF' '239' '486' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '8' '8' '0' '0' '0' '0' '1' '0' '0' '19' '19' '1' '0' '0.05' '0' '0' '0' '0' '0' 'normal.']]
筛选列表中的元素
#cup99=numpy.genfromtxt("F:\study.csv",delimiter=",",dtype=str,skip_header=1) #第二个参数表示以","为分隔符,第三个参数表示以字符串格式读取,
cup99_sel1=cup99[0,5] #取出第0行第4列
cup99_sel2=cup99[2,5] #取出第2行第4列
print (cup99_sel1)
print (cup99_sel2)
输出结果:
45076 1228
3.筛选与处理向量与数组中的数据
向量中筛选
vector = numpy.array([5,10,15,87,66])
print(vector[2:-1]) #选择numpy中的数 第一个参数指起始位置,第二个参数指终止位置
输出结果:
[15 87]
数组中筛选
matrix = numpy.array([
[51,8,12],
[87,12,89],
[78,65,19]
])
print(matrix[:,-1]) #在矩阵中筛选元素 “:”表示取所有 第一个参数表示行 第二个参数表示列
print("-----------------------------------------------------------------------------------")
print(matrix[0:2,1:2]) #表示取0-2行 中的 1-2 列
输出结果:
[12 89 19] ----------------------------------------------------------------------------------- [[ 8] [12]]
数组与向量进行判断:
matrix = numpy.array([
[51,8,12],
[87,12,89],
[78,65,19]
])
matrix_eight=numpy.array([matrix[:,1:2]])
print(matrix_eight)
print("-----------------------------------------------------------------------------------")
equal_to_eight=((matrix_eight ==12) ) # = = 相当于判断是否相等 相等为true 不相等为false
equal_to_one=((matrix_eight ==8) | (matrix_eight ==16))
print(equal_to_eight)
print("-----------------------------------------------------------------------------------")
print(matrix_eight[equal_to_eight])
print("-----------------------------------------------------------------------------------")
print(matrix_eight[equal_to_one]) # 将判断好的Boolean进行查找会对于输出匹配的值
print("-----------------------------------------------------------------------------------")
matrix_eight[equal_to_one]=9999 #更改已经选择的值
print(matrix_eight)
输出结果:
[[[ 8] [12] [65]]] ----------------------------------------------------------------------------------- [[[False] [ True] [False]]] ----------------------------------------------------------------------------------- [12] ----------------------------------------------------------------------------------- [8] ----------------------------------------------------------------------------------- [[[9999] [ 12] [ 65]]]
更改数据中的type
vector =numpy.array(["1","2","3"])
print (vector.dtype)
print (vector)
vector=vector.astype(float) #更改numpy类型
print (vector.dtype)
print (vector)
输出结果:
<U1 ['1' '2' '3'] float64
[1. 2. 3.]
查找最大值最小值与求和:
vector =numpy.array([18,98,58,11,12,33])
print (vector.min()) #查找最小值
print(vector.max()) #查找最大值
print("-----------------------------------------------------------------------------------")
admire = numpy.array([
[51,8,12],
[87,12,89],
[78,65,19]
])
matrix.sum(axis=1)
matrix.sum(axis=0)#axis=0是以列为维度做求和 ,axis=1是以行为维度做求和
输出结果:
11 98 ----------------------------------------------------------------------------------- [216 85 120]
创建数组或向量:
import numpy as np
print(np.arange(15)) #制造一个从0-15的数
print("-----------------------------------------------------------------------------------")
a=np.arange(15).reshape(3,5) #将arange(15)转化成一个3行5列的数组
print(a)
a.shape #打印行列数
np.arange(55,105,6) #创建一个从55开始到105结束,间隔为6 的矩阵
a.ndim #打印矩阵的维度
a.size #打印矩阵内元素个数
输出结果:
Out[7]:2
Out[7]:array([ 55, 61, 67, 73, 79, 85, 91, 97, 103])
Out[7]:2
Out[7]:15
4.初始化矩阵
np.zeros((3,4)) #初始化一个为三行四列的0矩阵
输出结果:
array([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]])
np.ones((2,3,4),dtype=np.int32) #创建一个维度为三,全为1,数据类型为int32的矩阵
输出结果:
array([[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]], [[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]])
np.random.random((3,3)) #创建一个三行三列,0至1 为元素的随机矩阵
输出结果:
array([[0.82787562, 0.16647778, 0.175306 ], [0.23084511, 0.73509222, 0.42879961], [0.74894781, 0.25325923, 0.59442485]])
from numpy import pi
np.linspace(0,2*pi,100) #0-2pi 为区间平均的去找数
输出结果:
array([0. , 0.06346652, 0.12693304, 0.19039955, 0.25386607, 0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866, 0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126, 0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385, 1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644, 1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903, 1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162, 2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421, 2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 , 2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939, 3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199, 3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458, 3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717, 4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976, 4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235, 4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494, 5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753, 5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012, 5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272, 6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531])
5.矩阵的加减乘与判断
a=np.array([20,30,40,50])
b=np.arange(4)
c=a-b #矩阵加减法
print(c)
print("-----------------------------------------------------------------------------------")
c=c-1 #减去一个单位矩阵
print(c)
print("-----------------------------------------------------------------------------------")
b**2 #对b平方
print(b)
print("-----------------------------------------------------------------------------------")
print(a<35)#对矩阵进行判断
输出结果:
[20 29 38 47] ----------------------------------------------------------------------------------- [19 28 37 46] ----------------------------------------------------------------------------------- [0 1 2 3] ----------------------------------------------------------------------------------- [ True True False False]
a=np.arange(9).reshape(3,3)
b=np.arange(9).reshape(3,3)
#print(a.dot(b))
c=np.dot(a,b) #进行矩阵相乘
c
输出结果:
array([[ 15, 18, 21], [ 42, 54, 66], [ 69, 90, 111]])
6.numpy中常用的函数
a = np.random.random((3,4))
b = np.floor(a*10) #向下取整
print(a)
print("-----------------------------------------------------------------------------------")
print(b)
print("-----------------------------------------------------------------------------------")
print(b.ravel()) #将矩阵转为向量
b.shape = (6,2)#将向量转为矩阵(行,列)
c=b.T #转置矩阵
print("-----------------------------------------------------------------------------------")
print(c)
输出结果:
[[0.9312249 0.85143506 0.45119295 0.06834031] [0.14747723 0.09853935 0.79532103 0.21991663] [0.30461187 0.42756043 0.94496506 0.42769361]] ----------------------------------------------------------------------------------- [[9. 8. 4. 0.] [1. 0. 7. 2.] [3. 4. 9. 4.]] ----------------------------------------------------------------------------------- [9. 8. 4. 0. 1. 0. 7. 2. 3. 4. 9. 4.] ----------------------------------------------------------------------------------- [[9. 4. 1. 7. 3. 9.] [8. 0. 0. 2. 4. 4.]]
矩阵拼接
A=np.floor(10*np.random.random((2,2)))
B=np.floor(10*np.random.random((2,2)))
print(A)
print("-----------------------------------------------------------------------------------")
print(B)
print("-----------------------------------------------------------------------------------")
print(np.vstack((A,B))) #对矩阵进行拼接纵向
print("-----------------------------------------------------------------------------------")
print(np.hstack((A,B))) #对矩阵进行拼接横向
输出结果:
[[7. 4.] [7. 3.]] ----------------------------------------------------------------------------------- [[9. 7.] [1. 2.]] ----------------------------------------------------------------------------------- [[7. 4.] [7. 3.] [9. 7.] [1. 2.]] ----------------------------------------------------------------------------------- [[7. 4. 9. 7.] [7. 3. 1. 2.]]
矩阵切分
a=np.floor(10*np.random.random((2,12)))
print(a)
print("-----------------------------------------------------------------------------------")
print(np.hsplit(a,3)) #对a进行横向切分(要操作的矩阵,几份)
print("-----------------------------------------------------------------------------------")
print(np.hsplit(a,(3,4))) #对3,和4 位置切分
print("-----------------------------------------------------------------------------------")
b=np.floor(10*np.random.random((12,2)))
print(np.vsplit(b,3))#对列切分
输出结果:
[[2. 9. 3. 1. 6. 4. 0. 8. 2. 7. 1. 8.] [8. 8. 7. 3. 7. 1. 0. 1. 7. 5. 1. 1.]] ----------------------------------------------------------------------------------- [array([[2., 9., 3., 1.], [8., 8., 7., 3.]]), array([[6., 4., 0., 8.], [7., 1., 0., 1.]]), array([[2., 7., 1., 8.], [7., 5., 1., 1.]])] ----------------------------------------------------------------------------------- [array([[2., 9., 3.], [8., 8., 7.]]), array([[1.], [3.]]), array([[6., 4., 0., 8., 2., 7., 1., 8.], [7., 1., 0., 1., 7., 5., 1., 1.]])] ----------------------------------------------------------------------------------- [array([[1., 3.], [6., 2.], [0., 8.], [0., 8.]]), array([[2., 9.], [7., 5.], [0., 6.], [9., 0.]]), array([[3., 8.], [6., 2.], [6., 0.], [8., 7.]])]
查找最大值
import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print(data)
ind = data.argmax(axis=0) #寻找最大数的序号索引值,axis=0按列查找,axis=1按行查找
ind
输出结果:
矩阵的复制:
a=np.arange(0,40,10)
print(a)
b=np.tile(a,(4,2)) #对矩阵进行复制(行,列)
print(b)
输出结果:
[ 0 10 20 30] [[ 0 10 20 30 0 10 20 30] [ 0 10 20 30 0 10 20 30] [ 0 10 20 30 0 10 20 30] [ 0 10 20 30 0 10 20 30]]