用Nginx采集日志通过flume将日志文件保存到HDFS上

  1. 安装Tomcat

到官网下载apache-tomcat-7.0.69

打开eclipse->window->preferences->server->runtime environments

编写项目

  1. Nginx的介绍及其安装部署(所有操作以root用户执行)
  • 创建目录:
    mkdir /opt/modules/nginx

  • 解压nginx
    tar -zxvf tengine-2.1.0.tar.gz -C ./

  • 安装部署

  1. 下载依赖
yum install -y gcc openssl-devel pcre-devel zlib-devel
cd tengine-2.1.0
  1. 编译安装
    预编译:./configure
./configure --prefix=/opt/modules/nginx/tengine-2.1.0/ --error-log-path=/var/log/nginx/error.log --http-log-path=/var/log/nginx/access.log --pid-path=/var/run/nginx/nginx.pid  --lock-path=/var/lock/nginx.lock --with-http_ssl_module --with-http_flv_module --with-http_stub_status_module --with-http_gzip_static_module --http-client-body-temp-path=/var/tmp/nginx/client/ --http-proxy-temp-path=/var/tmp/nginx/proxy/ --http-fastcgi-temp-path=/var/tmp/nginx/fcgi/ --http-uwsgi-temp-path=/var/tmp/nginx/uwsgi --http-scgi-temp-path=/var/tmp/nginx/scgi --with-pcre

编译

make

安装

make install
  1. 启动服务
cd /opt/modules/nginx/tengine-2.1.0/
sbin/nginx
  1. 第一次报错:
mkdir -p /var/tmp/nginx/client/
  1. 重新启动即可:
sbin/nginx -s stop
  • nginx使用:
    查看帮助:
sbin/nginx -h
  • 关闭:
sbin/nginx -s stop

配置使用service命令管理nginx服务
将提供的nginx脚本放入/etc/init.d目录下

#!/bin/bash
#
# chkconfig: - 85 15
# description: nginx is a World Wide Web server. It is used to serve
# Source function library.
. /etc/rc.d/init.d/functions
 
# Source networking configuration.
. /etc/sysconfig/network
 
# Check that networking is up.
[ "$NETWORKING" = "no" ] && exit 0
 
nginx="/opt/modules/nginx/tengine-2.1.0/sbin/nginx"
prog=$(basename $nginx)
 
NGINX_CONF_FILE="/opt/modules/nginx/tengine-2.1.0/conf/nginx.conf"
 
#[ -f /etc/sysconfig/nginx ] && . /etc/sysconfig/nginx
 
lockfile=/var/lock/subsys/nginx
 
#make_dirs() {
#   # make required directories
#   user=`nginx -V 2>&1 | grep "configure arguments:" | sed 's/[^*]*--user=\([^ ]*\).*/\1/g' -`
#   options=`$nginx -V 2>&1 | grep 'configure arguments:'`
#   for opt in $options; do
#       if [ `echo $opt | grep '.*-temp-path'` ]; then
#           value=`echo $opt | cut -d "=" -f 2`
#           if [ ! -d "$value" ]; then
#               # echo "creating" $value
#               mkdir -p $value && chown -R $user $value
#           fi
#       fi
#   done
#}
 
start() {
    [ -x $nginx ] || exit 5
    [ -f $NGINX_CONF_FILE ] || exit 6
#    make_dirs
    echo -n $"Starting $prog: "
    daemon $nginx -c $NGINX_CONF_FILE
    retval=$?
    echo
    [ $retval -eq 0 ] && touch $lockfile
    return $retval
}
 
stop() {
    echo -n $"Stopping $prog: "
    killproc $prog -QUIT
    retval=$?
    echo
    [ $retval -eq 0 ] && rm -f $lockfile
    return $retval
}
 
restart() {
    configtest || return $?
    stop
    sleep 1
    start
}
 
reload() {
    configtest || return $?
    echo -n $"Reloading $prog: "
#  -HUP是nginx平滑重启参数  
    killproc $nginx -HUP
    RETVAL=$?
    echo
}
 
force_reload() {
    restart
}
 
configtest() {
  $nginx -t -c $NGINX_CONF_FILE
}
 
rh_status() {
    status $prog
}
 
rh_status_q() {
    rh_status >/dev/null 2>&1
}
 
case "$1" in
    start)
        rh_status_q && exit 0
        $1
        ;;
    stop)
        rh_status_q || exit 0
        $1
        ;;
    restart|configtest)
        $1
        ;;
    reload)
        rh_status_q || exit 7
        $1
        ;;
    force-reload)
        force_reload
        ;;
    status)
        rh_status
        ;;
    condrestart|try-restart)
        rh_status_q || exit 0
            ;;
    *)
        echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload|configtest}"
        exit 2
esac

注意:

  • 14:配置nginx命令的地址,手动修改
  • 17:配置nginx配置文件地址,手动修改
    修改权限
chmod 755 /etc/init.d/nginx

设置开机启动

chkconfig nginx on

nginx的配置文件

more /opt/modules/nginx/tengine-2.1.0/conf/nginx.conf
  • log_format:用于配置nginx记录哪些字段
  • access_log:定义请求访问的日志位置及格式
  • location:用于标记用户请求的资源信息

项目中的SDK

  • SDK的功能:用于监听用户的行为,将用户行为数据进行收集发送给nginx
  • SDK的设计:
    • 尽量减少对业务系统的侵入
    • 尽量收集多的字段,避免数据丢失
  • 本次项目中的SDK:
  • 监听事件的类型
    • launch事件:用户第一次打开网页,就会触发launch事件
    • pageView事件:用户每访问一个页面就会触发pageview事件
    • event事件:其他事件类型
    • chargeRequest事件:订单请求事件

根据业务需求配置nginx接收的日志

  • 修改日志记录格式
    • $remote_addr:客户端的IP地址
    • $msec:服务器的时间,timestamp格式
    • $http_host:请求主机地址
    • $request_uri:客户端所发送过来的URI
log_format   user_log  '$remote_addr^A$msec^A$http_host^A$request_uri';

定义请求资源的日志记录

mkdir -p /opt/datas/nginx/user_log/
location =/hpsk.jpg{
	#配置请求资源的类型
    default_type image/jpg;
	#配置请求资源的日志的存储位置和格式
    access_log  /opt/datas/nginx/user_log/access.log  user_log;
	#配置返回的资源
    root /opt/datas/nginx/html;
}

重启nginx服务,重新加载配置文件

service nginx restart

在网页上输入主机名/hpsk.jpg:

       http://192.168.235.xx/hpsk.jpg
# The configuration file needs to define the sources, 
# the channels and the sinks.
# Sources, channels and sinks are defined per agent, 
# in this case called 'a1'

a1.sources = s1
a1.channels = c1
a1.sinks = k1

# For each one of the sources, the type is defined
# source的来源为命令
a1.sources.s1.type = exec
# 命令为Nginx的日志文件
a1.sources.s1.command = tail -F  /opt/datas/nginx/user_log/access.log

# The channel can be defined as follows.
a1.sources.s1.channels = c1

# Each sink's type must be defined
# 输出到HDFS上
a1.sinks.k1.type = hdfs
# 分区目录:必须先在HDFS上创建目录
a1.sinks.k1.hdfs.path =  /flume/nginx/user_log/part/daystr=%Y%m%d/hour=%H
# 文件类型为日期
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 0
# 文件大小,单位byte
a1.sinks.k1.hdfs.rollSize = 131072000
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.useLocalTimeStamp = true

#Specify the channel the sink should use
a1.sinks.k1.channel = c1

# Each channel's type is defined.
# channels的类型,这里时内存,速度快,不安全
a1.channels.c1.type = memory

# Other config values specific to each type of channel(sink or source)
# can be defined as well
# In this case, it specifies the capacity of the memory channel
# 最大存储大小
a1.channels.c1.capacity = 1000
# 每次取出大小
a1.channels.c1.transactionCapacity = 100 
cd /opt/modules/flume-1.6.0-cdh5.7.6-bin/

bin/flume-ng agent --conf conf/ --name a1 --conf-file case/nginx-mem-part.properties -Dflume.root.logger=INFO,console

运行配置文件

格式:
bin/flume-ng agent --conf|-c flume_conf_dir --name|-n agent_name --conf-file|-f  file_path

--conf|-c:指定flume的配置文件目录
--conf-file|-f:运行的文件地址
--name|-n:运行agent的名称
-Dflume.root.logger=INFO,console:调整flume的日志级别

猜你喜欢

转载自blog.csdn.net/lz6363/article/details/82470343