logstash处理HDFS与Hive日志

1.建立虚拟机共享文件夹

打开VMware->编辑虚拟机设置->选项->共享文件夹->添加->D:\workspace\share

在linux中:

mkdir /mnt/hgfs
vmhgfs-fuse .host:/ /mnt/hgfs #如果不设置自动挂载每次打开虚拟机都要执行这条语句

将hdfs-audit.log与hive日志放入windows的share中

2.处理hdfs-audit.log日志

2.1 新建配置文件/config/hdfs.conf
input {
	file {
		path => "/mnt/hgfs/share/hdfs-audit.log"
		start_position => "beginning"
	}
}
filter {
	grok {
        match => {
			"message" => "%{DATESTAMP:data_time}%{SPACE}*%{WORD:level}%{SPACE}*%{NOTSPACE}%{SPACE}*%{NOTSPACE:allowed}%{SPACE}*%{NOTSPACE:ugi}%{SPACE}*%{NOTSPACE}%{SPACE}*%{NOTSPACE:ip}%{SPACE}*%{NOTSPACE:cmd}%{SPACE}*%{NOTSPACE:src}%{SPACE}*%{NOTSPACE:dst}%{SPACE}*%{NOTSPACE:perm}%{SPACE}*%{NOTSPACE:proto}"
        }
    }
	mutate {
		split => ["allowed","="]
		add_field =>   {"allowed_value" => "%{[allowed][1]}"}
		split => ["ugi","="]
		add_field =>   {"ugi_value" => "%{[ugi][1]}"}
		split => ["ip","="]
		add_field =>   {"ip_value" => "%{[ip][1]}"}
		split => ["cmd","="]
		add_field =>   {"cmd_value" => "%{[cmd][1]}"}
		split => ["src","="]
		add_field =>   {"src_value" => "%{[src][1]}"}
		split => ["dst","="]
		add_field =>   {"dst_value" => "%{[dst][1]}"}
		split => ["perm","="]
		add_field =>   {"perm_value" => "%{[perm][1]}"}
		split => ["proto","="]
		add_field =>   {"proto_value" => "%{[proto][1]}"}
	}
	mutate {
		rename => ["allowed_value", "allowed" ]
		rename => ["ugi_value", "ugi" ]
		rename => ["ip_value", "ip" ]
		rename => ["cmd_value", "cmd" ]
		rename => ["src_value", "src" ]
		rename => ["dst_value", "dst" ]
		rename => ["perm_value", "perm" ]
		rename => ["proto_value", "proto" ]
		remove_field => ["message"]
	}
	grok {
		match => {
			"ip" => "%{IP:client}"
		}
	}
	mutate {
		rename => ["client", "ip" ]
	}
}
output {
	stdout{}
	if [cmd] == "delete" or [cmd] == "create" or [cmd] == "mkdirs" or [cmd] == "setOwner" or [cmd] == "setPermission" or [cmd] == "setStoragePolicy" {
		jdbc {
			driver_jar_path => "/var/local/mysql-connector-java-8.0.13.jar"
			driver_class => "com.mysql.jdbc.Driver"
			connection_string => "jdbc:mysql://10.0.77.136:3306/logstash?user=root&password=123456&serverTimezone=GMT%2B8"
			statement => [ "insert into log_hdfs (TIME,level,allowed,ugi,IP,cmd,src,dst,perm,proto) values (?,?,?,?,?,?,?,?,?,?)","%{data_time}","%{level}","%{allowed}","%{ugi}","%{ip}","%{cmd}","%{src}","%{dst}","%{perm}","%{proto}" ]			
		}
	}
}
2.2 log_hdfs建表
CREATE TABLE log_hdfs(
    ID INT NOT NULL AUTO_INCREMENT,
    TIME VARCHAR(25) NOT NULL,
    level VARCHAR(20) NOT NULL,
    allowed VARCHAR(20) NOT NULL,
	ugi VARCHAR(20) NOT NULL,
	IP VARCHAR(20) NOT NULL,
	cmd VARCHAR(50) NOT NULL,
	src VARCHAR(200),
	dst VARCHAR(200),
	perm VARCHAR(50),
	proto VARCHAR(20),
    PRIMARY KEY (ID)
);
2.3 清空表
truncate table log_hdfs;
2.4 下载jdbc输出
./bin/logstash-plugin install logstash-output-jdbc
2.5 启动logstash
./bin/logstash -f ./config/hdfs.conf --path.data=/root/logstash

3. 处理hive日志

3.1 新建配置文件/config/hive.conf
input {
	file {
		path => "/mnt/hgfs/share/hiveServer2/*.log"
		start_position => "beginning"
	}
}
filter {
	json {
		source => "message"
		remove_field => ["message"]
		remove_field => ["id"]
		remove_field => ["additional_info"]
	}        
}
output {
	stdout{}
	jdbc {
			driver_jar_path => "/var/local/mysql-connector-java-8.0.13.jar"
			driver_class => "com.mysql.jdbc.Driver"
			connection_string => "jdbc:mysql://10.0.77.136:3306/logstash?user=root&password=123456&serverTimezone=GMT%2B8"
			statement => [ "insert into log_hive (repoType,repo,reqUser,evtTime,access,resource,resType,action,result,agent,policy,enforcer,sess,cliType,cliIP,reqData,agentHost,logType,seq_num,event_count,event_dur_ms,cluster_name,policy_version) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)","%{repoType}","%{repo}","%{reqUser}","%{evtTime}","%{access}","%{resource}","%{resType}","%{action}","%{result}","%{agent}","%{policy}","%{enforcer}","%{sess}","%{cliType}","%{cliIP}","%{reqData}","%{agentHost}","%{logType}","%{seq_num}","%{event_count}","%{event_dur_ms}","%{cluster_name}","%{policy_version}" ]
	}
}
3.2 log_hive建表
CREATE TABLE log_hive(
    ID INT NOT NULL AUTO_INCREMENT,
    repoType VARCHAR(20),
    repo VARCHAR(25),
    reqUser VARCHAR(20),
	evtTime VARCHAR(50),
	access VARCHAR(20),
	resource text,
	resType VARCHAR(20),
	action VARCHAR(20),
	result VARCHAR(20),
	agent VARCHAR(20),
	policy VARCHAR(20),
	enforcer VARCHAR(50),
	sess VARCHAR(100),
	cliType VARCHAR(50),
	cliIP VARCHAR(20),
	reqData text,
	agentHost VARCHAR(50),
	logType VARCHAR(50),
	seq_num VARCHAR(20),
	event_count VARCHAR(20),
	event_dur_ms VARCHAR(20),
	cluster_name VARCHAR(20),
	policy_version VARCHAR(20),
    PRIMARY KEY (ID)
);
3.3 修改logstash运行内存
vim config/jvm.option
3.4 启动logstash
./bin/logstash -f ./config/hive.conf --path.data=/root/logstash/hive

猜你喜欢

转载自blog.csdn.net/weixin_44224087/article/details/109157704