Disclaimer: This article is a blogger original article, follow the CC 4.0 BY-SA copyright agreement, reproduced, please attach the original source link and this statement.
1, the project labeled jar package into linux to run.
2, start nginx purpose is to generate logging, as well as load balancing and reverse proxy future updates, focusing on profiles
#user nobody;
worker_processes 1;
#error_log logs/error.log;
#error_log logs/error.log notice;
#error_log logs/error.log info;
#pid logs/nginx.pid;
events {
worker_connections 1024;
}
http {
include mime.types;
default_type application/octet-stream;
log_format main '$remote_addr';
#access_log logs/access.log main;
sendfile on;
#tcp_nopush on;
#keepalive_timeout 0;
keepalive_timeout 65;
#gzip on;
upstream frame-tomcat {
server hdp-3:8888;
##指明nginx转发地址
}
server {
listen 80;
server_name hdp-1;
##nginx的服务地址
#charset koi8-r;
access_log logs/log.frame.access.log main;
##输出生成的日志文件的路径和格式
location / {
# root html;
# index index.html index.htm;
proxy_pass http://frame-tomcat;
##代理传递(转发)
}
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
}
server {
listen 80;
server_name localhost;
#charset koi8-r;
#access_log logs/host.access.log main;
location / {
root html;
index index.html index.htm;
}
#error_page 404 /404.html;
# redirect server error pages to the static page /50x.html
#
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root html;
}
# proxy the PHP scripts to Apache listening on 127.0.0.1:80
#
#location ~ \.php$ {
# proxy_pass http://127.0.0.1;
#}
# pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000
#
#location ~ \.php$ {
# root html;
# fastcgi_pass 127.0.0.1:9000;
# fastcgi_index index.php;
# fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name;
# include fastcgi_params;
#}
# deny access to .htaccess files, if Apache's document root
# concurs with nginx's one
#
#location ~ /\.ht {
# deny all;
#}
}
# another virtual host using mix of IP-, name-, and port-based configuration
#
#server {
# listen 8000;
# listen somename:8080;
# server_name somename alias another.alias;
# location / {
# root html;
# index index.html index.htm;
# }
#}
# HTTPS server
#
#server {
# listen 443;
# server_name localhost;
# ssl on;
# ssl_certificate cert.pem;
# ssl_certificate_key cert.key;
# ssl_session_timeout 5m;
# ssl_protocols SSLv2 SSLv3 TLSv1;
# ssl_ciphers HIGH:!aNULL:!MD5;
# ssl_prefer_server_ciphers on;
# location / {
# root html;
# index index.html index.htm;
# }
#}
}
3, the log flume file collection nginx generated to kafka, focusing on profiles
a1.sources = source1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.source1.type = exec
##指明数据源来自于一个可执行指令
a1.sources.source1.command = tail -F /usr/local/nginx/logs/log.frame.access.log
##可执行指令,跟踪一个文件中的内容
# Describe the sink
##下沉到kafka的下沉类型
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = animal
a1.sinks.k1.brokerList = hdp-2:9092, hdp-3:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.source1.channels = c1
a1.sinks.k1.channel = c1
4, start kafka, zookeeper before the start, receive data among consumers and generate temporary path in the local (log4j rolling file storage)
### \u8BBE\u7F6E###
#log4j.rootLogger=debug,stdout,genlog
log4j.rootLogger=logRollingFile,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=[%-5p] %d{yyyy-MM-dd HH:mm:ss,SSS} method:%l%n%m%n
###
log4j.logger.logRollingFile= DEBUG,test1
log4j.appender.test1 = org.apache.log4j.RollingFileAppender
log4j.appender.test1.layout = org.apache.log4j.PatternLayout
log4j.appender.test1.layout.ConversionPattern = %m%n
log4j.appender.test1.Threshold = DEBUG
log4j.appender.test1.ImmediateFlush = TRUE
log4j.appender.test1.Append = TRUE
log4j.appender.test1.File = f:/testlog/access.log
log4j.appender.test1.MaxFileSize = 10KB
log4j.appender.test1.MaxBackupIndex = 200
### log4j.appender.test1.Encoding = UTF-8
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collections;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.log4j.Logger;
public class ConsumerDemo {
private static KafkaConsumer<String, String> consumer;
private static Properties props;
//static静态代码块,在main之前先执行
static {
props = new Properties();
//消费者kafka地址
props.put("bootstrap.servers", "hdp-1:9092");
//key反序列化
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
//组
props.put("group.id", "yangk");
}
/**
* 从kafka中获取数据(SpringBoot也集成了kafka)
*/
private static void ConsumerMessage() {
Logger logger = Logger.getLogger("logRollingFile");
//允许自动提交位移
props.put("enable.auto.commit", true);
consumer = new KafkaConsumer<String, String>(props);
consumer.subscribe(Collections.singleton("first_kafka"));
//使用轮询拉取数据--消费完成之后会根据设置时长来清除消息,被消费过的消息,如果想再次被消费,可以根据偏移量(offset)来获取
try {
File file = new File("F:/xiu.txt");
FileOutputStream fos = new FileOutputStream(file);
while (true) {
//从kafka中读到了数据放在records中
ConsumerRecords<String, String> records = consumer.poll(100);
for (ConsumerRecord<String, String> r : records) {
String a ="topic:"+ r.topic()+",offset:"+ r.offset()+",key:"+ r.key()+",value:"+ r.value() + "\r\n";
//将数据写到F:/xiu.txt
// fos.write(a.getBytes());
System.out.printf("topic = %s, offset = %s, key = %s, value = %s", r.topic(), r.offset(),
r.key(), r.value() + "\n");
logger.debug(r.value());
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
consumer.close();
}
}
public static void main(String[] args) {
//调用接收消息的方法
ConsumerMessage();
}
}
5, upload hdfshive table, hive in the establishment of an external table
建表语句:create external table xiangmu1(ip string ) row format delimited location '/usr/';
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.net.URI;
import java.util.TimerTask;
public class UploadData extends TimerTask{
public void run() {
URI uri = null;
try {
uri = new URI("hdfs://hdp-1:9000");
Configuration conf = new Configuration();
conf.set("dfs.replication", "2");//name,value 副本个数
conf.set("dfs.blocksize", "64m");//块的大小
String user = "root";
FileSystem fs = FileSystem.get(uri,conf,user);
Path src = new Path("F:/testlog/access.log");
Path dst = new Path("/usr/a.txt");
fs.copyFromLocalFile(src,dst);
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
6, analyze pv gross: selelct count (*) from xiangmu1;