发现机器学习就根本停不下来
今天来用RNN算法来爽爽僵尸网络宿主预测
首先我们下载好数据,然后打开我们可爱的熊猫
import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from subprocess import check_output df = pd.read_csv('F:\\machine_learning\\network.csv') df.head(2) print(df.head())
下面继续处理数据:
按每天的数据包总量来分
df['date']= pd.to_datetime(df['date']) df = df.groupby(['date','l_ipn'],as_index=False).sum() df['yday'] = df['date'].dt.dayofyear df['wday'] = df['date'].dt.dayofweek
对每个ip进行分类:
ip0 = df[df['l_ipn']==0] max0 = np.max(ip0['f']) ip1 = df[df['l_ipn']==1] max1 = np.max(ip1['f']) ip2 = df[df['l_ipn']==2] max2 = np.max(ip2['f']) ip3 = df[df['l_ipn']==3] max3 = np.max(ip3['f']) ip4 = df[df['l_ipn']==4] max4 = np.max(ip4['f']) ip5 = df[df['l_ipn']==5] max5 = np.max(ip5['f']) ip6 = df[df['l_ipn']==6] max6 = np.max(ip6['f']) ip7 = df[df['l_ipn']==7] max7 = np.max(ip7['f']) ip8 = df[df['l_ipn']==8] max8 = np.max(ip8['f']) ip9 = df[df['l_ipn']==9] max9 = np.max(ip9['f']) ip0.head(2)
然后我们输出一下ip0的头
很棒,我们已经成功按ip分类了
然后我们按每年的数据包总量来对每个ip进行图形化计数
首先举个例子:
count, division = np.histogram(ip0['f'],bins=10) division
他会输出这些
array([ 68., 810., 1552., 2294., 3036., 3778., 4520., 5262., 6004., 6746., 7488.])
然后我们开始对每个ip下手
f,axarray = plt.subplots(5,2,figsize=(15,20)) count, division = np.histogram(ip0['f'],bins=10) g = sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[0,0]) axarray[0,0].set_title("Local IP 0 Flow") count, division = np.histogram(ip1['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[0,1]) axarray[0,1].set_title("Local IP 1 Flow") count, division = np.histogram(ip2['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[1,0]) axarray[1,0].set_title("Local IP 2 Flow") count, division = np.histogram(ip3['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[1,1]) axarray[1,1].set_title("Local IP 3 Flow") count, division = np.histogram(ip4['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[2,0]) axarray[2,1].set_title("Local IP 4 Flow") count, division = np.histogram(ip5['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[2,1]) axarray[2,1].set_title("Local IP 5 Flow") count, division = np.histogram(ip6['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[3,0]) axarray[3,0].set_title("Local IP 6 Flow") count, division = np.histogram(ip7['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[3,1]) axarray[3,1].set_title("Local IP 7 Flow") count, division = np.histogram(ip8['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[4,0]) axarray[4,0].set_title("Local IP 8 Flow") count, division = np.histogram(ip9['f'],bins=10) sns.barplot(x=division[0:len(division)-1],y=count,ax=axarray[4,1]) axarray[4,1].set_title("Local IP 9 Flow")