后面写一个分析
文件先贴出来。
#! /bin/sh ###file_ver=2.0.3 PATH=$PATH:. #monitor the application #create by leonlaili,2006-12-6 ####### Custom variables begin ######## ##todo: add custom variables here #get script path dir_pre=$(dirname $(which $0)) ####### Custom variables end ######## #load common functions load_lib() { common_file=$dir_pre/common.sh if [ -f $common_file ];then . $common_file fi } #check current user check_user() { if [ "$user" != "`whoami`" ];then echo "Only $user can execute this script" exit 1 fi } #print help information print_help() { ##todo: output help information here # echo .... return } #check script parameters check_params() { ok="true" ##todo: add addition parameters checking statement here... if [ "$ok" != "true" ];then echo "Some of the parameters are invalid. " print_help exit 1 fi } get_app_num() { numbers=`echo $app_name | sed -e "s:[ \t]:\n:g" | grep "^$1[:$]" | awk -F: '{print $2}'` num1=`echo $numbers|awk -F, '{print $1}'` num2=`echo $numbers|awk -F, '{print $2}'` if [ "${num1}" = "" ];then num1=1 fi if [ "${num2}" = "" ];then num2=999999999 fi } #check port check_port() { nc_cmd="/usr/bin/nc" if [ ! -f $nc_cmd ];then nc_cmd="/usr/bin/netcat" fi $nc_cmd -zn -w4 $1 $2 if [ $? -ne 0 ];then for (( i=0 ; i<5 ; i++ )) do $nc_cmd -zn -w4 $1 $2 if [ $? -eq 0 ];then return 0;fi sleep 1 done #check VIP again if [ "$vip" != "" ];then for (( i=0 ; i<5 ; i++ )) do $nc_cmd -zn -w4 $vip $2 if [ $? -eq 0 ];then return 0;fi sleep 1 done fi err_port="$err_port $p" return 1 fi return 0 } #check process check_process() { get_app_num $1 app=`echo $1 | awk -F: '{print $1}'` num=`ps -C $app | sed -e "1d" | wc -l` if [ $num -lt $num1 -o $num -gt $num2 ];then err_app="$err_app $app" return 1 fi return 0 } #check if application is ok check_app() { if [ ! -f $runing_file ];then return 0 fi if [ "$ip_type" = "0" ];then bind_ip=$ip_inner elif [ "$ip_type" = "1" ];then bind_ip=$ip_outer elif [ "$ip_type" = "2" ];then bind_ip="0.0.0.0" elif [ "$ip_type" = "3" ];then bind_ip=$vip elif [ "$ip_type" = "4" ];then bind_ip=127.0.0.1 fi ##todo: add application checking statement here err_app="" err_port="" run_config "monitor" } #resolve the problems of application resolve_app() { #发送告警信息 report "Monitor: restart [process:${err_app}][port:${err_port}]" ##todo: add custom statement here run_config "resolve" return } #report monitor result infomation rpt_info() { local rtype="$1" local elem="$2" local action="$3" report_ip=172.16.211.50 url_head="http://$report_ip/pkg/monitor_rpt.php" response_file="/tmp/.monitor_report.tmp" wget_options="-T 10 -O $response_file --connect-timeout=5" wget $wget_options "${url_head}?ip=${ip_inner}&install_path=${install_path}&type=${rtype}&elem=${elem}&action=${action}" > /dev/null 2>&1 rm $response_file 2>/dev/null } ###### Main Begin ######## if [ "$1" = "--help" ];then print_help exit 0 fi load_lib check_user check_params check_app if [ "$err_app" != "" -o "$err_port" != "" ];then resolve_app fi if [ "$err_app" != "" ];then err_app_list=`echo "$err_app" | sed -e 's/ /,/g' -e 's/^,//' -e 's/,$//'` rpt_info 'app' "$err_app_list" "restart" fi if [ "$err_port" != "" ];then err_port_list=`echo "$err_port" | sed -e 's/ /,/g' -e 's/^,//' -e 's/,$//'` rpt_info 'port' "$err_port_list" "restart" fi ###### Main End ########
framework_ver=2.0.7 <!--程序包基本信息--> <base_info> #程序作者 author="funnychen" #所属产品 product="third" #模块 module="" #软件包名称 name="nginx" #进程名不能超过15字符,多个进程空格间隔,可指定进程数量.app:min,max,如app_name:1,20 app_name="nginx" #进程监听TCP端口!仅限TCP端口,多个端口使用空格间隔 port="80" #TCP端口绑定ip类型!仅限TCP端口 0:内网;1:外网;2:0.0.0.0;3:VIP;4:127.0.0.1 ip_type="2" #软件包版本 version="0.7.67" #所属用户 user="root" #停止进程信号量 kill_sig="KILL" #是否开机启动[0|1] auto_start="1" #开机启动脚本存放文件 boot_path="/usr/local/services/etc/svc.sh" #日志目录(日志直接存放在安装目录请设为空) log_dir="/data/log/$name-$version" #告警特性ID(默认不需要修改) rpt_port="5570" #基础安装目录(请勿修改) install_base="/usr/local/services" #系统使用变量请勿修改 install_path="/usr/local/services/nginx-0.7.67" #是否强制在老目录安装(请勿修改) force_install="false" </base_info> <!--安装时创建文件链接--> <link> cd $install_path ln -s log logs rm /usr/local/nginx 2>/dev/null ln -s $install_path /usr/local/nginx </link> <!--程序启动方式,请使用相对bin目录的路径--> <start> #限制core文件大小为4k,用于进程coredump监控 ulimit -c 4 -S ulimit -n 100001 #-----单进程名程序包请在这添加启动命令----- ./nginx #-----多进程名程序包请在这添加启动命令----- #每个进程复制一份if语句,注意保留if判断逻辑并修改内部的示例进程名app_test #变量app_to_start为需要启动的进程名,不需要修改 #if [ "$app_to_start" = "app_test" -o "$app_to_start" = "all" ];then # ./app_test & #fi </start> <!--自定义crontab调度,请使用/usr/local/services/nginx-0.7.67占位符--> <!--Example: "0 0 * * * /usr/local/services/nginx-0.7.67/admin/start.sh &" --> <crontab> */3 * * * * /usr/local/services/nginx-0.7.67/admin/monitor.sh > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 & */15 * * * * /usr/local/services/nginx-0.7.67/admin/clear.sh file > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 & 15 8 * * * /usr/local/services/nginx-0.7.67/admin/md5sum.sh check > /usr/local/services/nginx-0.7.67/log/crontab.log 2>&1 & 0 0 * * * /usr/local/services/nginx-0.7.67/tools/log_clear.sh > /usr/local/services/nginx-0.7.67/log/log_clear.log 2>&1 & </crontab> <!--程序停止方式--> <stop> #停止进程后等待时间(秒) sleep_count=1 #变量app_to_stop为需要启动的进程名 if [ $app_count -gt 1 -a "$app_to_stop" != "all" ];then kill_app $app_to_stop $kill_sig sleep $sleep_count return fi for app_info in $app_name do app=`echo $app_info | awk -F: '{print $1}'` kill_app $app $kill_sig sleep $sleep_count done </stop> <!--程序状态检测,若检测异常则添加异常进程名到err_app或添加异常端口到err_port--> <monitor> #进程数量检测 for app_info in `cat $runing_file` do check_process "$app_info" done #端口检测 for p in `echo $port` do check_port "$bind_ip" "$p" done </monitor> <!--程序异常处理,根据状态检测是否正常决定--> <resolve> for app in `echo $err_app` do $install_path/admin/restart.sh $app done </resolve> <!--需要进行md5校验的文件列表,请使用相对安装目录的路径,带"<空格>-"后缀为不需要进行校验--> <md5> bin/* lib/* admin/* bin/*.pid - bin/*core* - bin/*.tmp - bin/*.stat - bin/*.log - bin/*.data - bin/*.dat - bin/*.bin - bin/*.info - </md5> <!--需要进行安装时替换内容的文件列表,请使用相对安装目录的路径--> <substitute> init.xml conf/* etc/* </substitute> <!--数据清理配置--> <clear_file> #目录 #阀值 #命令 #参数 #目标 log 85%:2000M tar 10 *.log admin/data/backup 90%:50M tar 10 * admin/data/tmp 90%:10M delete 1 * #----说明----- #目录:需要监控的目录,使用相对安装目录路径 #阀值:触发清理操作的条件[分区使用百分比:目录最大空间<M|m>] #命令:delete(删除指定时间前文件),tar(压缩指定时间前文件),clear(清空超过指定大小文件) #参数:delete,tar(默认天数,后缀h为小时,m为分钟),clear(文件大小k) #目标:可以清理的文件,接受通配符 #----示例----- #目录 #阀值 #命令 #参数 #目标 #log 80%:10M delete 30 stat*.log #data 90%:10M tar 30 */*.dat #log 90%:10M clear 50000 debug/err*.log </clear_file> <install_on_complete> chown user_00.users $install_path/conf/* </install_on_complete>