Logstash processes HDFS and Hive logs

1. Create a virtual machine shared folder

Open VMware->Edit virtual machine settings->Options->Shared folder->Add->D:\workspace\share

In linux:

mkdir /mnt/hgfs
vmhgfs-fuse .host:/ /mnt/hgfs #如果不设置自动挂载每次打开虚拟机都要执行这条语句

Put hdfs-audit.log and hive log into windows share

2. Process hdfs-audit.log log

2.1 New configuration file /config/hdfs.conf
input {
	file {
		path => "/mnt/hgfs/share/hdfs-audit.log"
		start_position => "beginning"
	}
}
filter {
	grok {
        match => {
			"message" => "%{DATESTAMP:data_time}%{SPACE}*%{WORD:level}%{SPACE}*%{NOTSPACE}%{SPACE}*%{NOTSPACE:allowed}%{SPACE}*%{NOTSPACE:ugi}%{SPACE}*%{NOTSPACE}%{SPACE}*%{NOTSPACE:ip}%{SPACE}*%{NOTSPACE:cmd}%{SPACE}*%{NOTSPACE:src}%{SPACE}*%{NOTSPACE:dst}%{SPACE}*%{NOTSPACE:perm}%{SPACE}*%{NOTSPACE:proto}"
        }
    }
	mutate {
		split => ["allowed","="]
		add_field =>   {"allowed_value" => "%{[allowed][1]}"}
		split => ["ugi","="]
		add_field =>   {"ugi_value" => "%{[ugi][1]}"}
		split => ["ip","="]
		add_field =>   {"ip_value" => "%{[ip][1]}"}
		split => ["cmd","="]
		add_field =>   {"cmd_value" => "%{[cmd][1]}"}
		split => ["src","="]
		add_field =>   {"src_value" => "%{[src][1]}"}
		split => ["dst","="]
		add_field =>   {"dst_value" => "%{[dst][1]}"}
		split => ["perm","="]
		add_field =>   {"perm_value" => "%{[perm][1]}"}
		split => ["proto","="]
		add_field =>   {"proto_value" => "%{[proto][1]}"}
	}
	mutate {
		rename => ["allowed_value", "allowed" ]
		rename => ["ugi_value", "ugi" ]
		rename => ["ip_value", "ip" ]
		rename => ["cmd_value", "cmd" ]
		rename => ["src_value", "src" ]
		rename => ["dst_value", "dst" ]
		rename => ["perm_value", "perm" ]
		rename => ["proto_value", "proto" ]
		remove_field => ["message"]
	}
	grok {
		match => {
			"ip" => "%{IP:client}"
		}
	}
	mutate {
		rename => ["client", "ip" ]
	}
}
output {
	stdout{}
	if [cmd] == "delete" or [cmd] == "create" or [cmd] == "mkdirs" or [cmd] == "setOwner" or [cmd] == "setPermission" or [cmd] == "setStoragePolicy" {
		jdbc {
			driver_jar_path => "/var/local/mysql-connector-java-8.0.13.jar"
			driver_class => "com.mysql.jdbc.Driver"
			connection_string => "jdbc:mysql://10.0.77.136:3306/logstash?user=root&password=123456&serverTimezone=GMT%2B8"
			statement => [ "insert into log_hdfs (TIME,level,allowed,ugi,IP,cmd,src,dst,perm,proto) values (?,?,?,?,?,?,?,?,?,?)","%{data_time}","%{level}","%{allowed}","%{ugi}","%{ip}","%{cmd}","%{src}","%{dst}","%{perm}","%{proto}" ]			
		}
	}
}
2.2 log_hdfs build table
CREATE TABLE log_hdfs(
    ID INT NOT NULL AUTO_INCREMENT,
    TIME VARCHAR(25) NOT NULL,
    level VARCHAR(20) NOT NULL,
    allowed VARCHAR(20) NOT NULL,
	ugi VARCHAR(20) NOT NULL,
	IP VARCHAR(20) NOT NULL,
	cmd VARCHAR(50) NOT NULL,
	src VARCHAR(200),
	dst VARCHAR(200),
	perm VARCHAR(50),
	proto VARCHAR(20),
    PRIMARY KEY (ID)
);
2.3 Clear the table
truncate table log_hdfs;
2.4 Download jdbc output
./bin/logstash-plugin install logstash-output-jdbc
2.5 Start logstash
./bin/logstash -f ./config/hdfs.conf --path.data=/root/logstash

3. Processing hive logs

3.1 New configuration file /config/hive.conf
input {
	file {
		path => "/mnt/hgfs/share/hiveServer2/*.log"
		start_position => "beginning"
	}
}
filter {
	json {
		source => "message"
		remove_field => ["message"]
		remove_field => ["id"]
		remove_field => ["additional_info"]
	}        
}
output {
	stdout{}
	jdbc {
			driver_jar_path => "/var/local/mysql-connector-java-8.0.13.jar"
			driver_class => "com.mysql.jdbc.Driver"
			connection_string => "jdbc:mysql://10.0.77.136:3306/logstash?user=root&password=123456&serverTimezone=GMT%2B8"
			statement => [ "insert into log_hive (repoType,repo,reqUser,evtTime,access,resource,resType,action,result,agent,policy,enforcer,sess,cliType,cliIP,reqData,agentHost,logType,seq_num,event_count,event_dur_ms,cluster_name,policy_version) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)","%{repoType}","%{repo}","%{reqUser}","%{evtTime}","%{access}","%{resource}","%{resType}","%{action}","%{result}","%{agent}","%{policy}","%{enforcer}","%{sess}","%{cliType}","%{cliIP}","%{reqData}","%{agentHost}","%{logType}","%{seq_num}","%{event_count}","%{event_dur_ms}","%{cluster_name}","%{policy_version}" ]
	}
}
3.2 log_hive build table
CREATE TABLE log_hive(
    ID INT NOT NULL AUTO_INCREMENT,
    repoType VARCHAR(20),
    repo VARCHAR(25),
    reqUser VARCHAR(20),
	evtTime VARCHAR(50),
	access VARCHAR(20),
	resource text,
	resType VARCHAR(20),
	action VARCHAR(20),
	result VARCHAR(20),
	agent VARCHAR(20),
	policy VARCHAR(20),
	enforcer VARCHAR(50),
	sess VARCHAR(100),
	cliType VARCHAR(50),
	cliIP VARCHAR(20),
	reqData text,
	agentHost VARCHAR(50),
	logType VARCHAR(50),
	seq_num VARCHAR(20),
	event_count VARCHAR(20),
	event_dur_ms VARCHAR(20),
	cluster_name VARCHAR(20),
	policy_version VARCHAR(20),
    PRIMARY KEY (ID)
);
3.3 Modify logstash running memory
vim config/jvm.option
3.4 Start logstash
./bin/logstash -f ./config/hive.conf --path.data=/root/logstash/hive

Guess you like

Origin blog.csdn.net/weixin_44224087/article/details/109157704