python监控mysql连接数 批量杀进程 解决too many connections问题

线上django服务偶尔会因为机器访问mysql过多,造成too many connections 问题,导致服务挂掉。

之前调大了最大连接数,有点治标不治本。

所以今天抽空写个监控mysql连接数的服务,如果连接数超过某个阈值,就杀掉一部分连接。

核心代码。 

20191928更新:

之前直接time.sleep有点low,这回换了个Python定时任务的第三方框架——schedule,还挺好用的

参考:

https://schedule.readthedocs.io/en/stable/

https://schedule.readthedocs.io/en/stable/faq.html#what-if-my-task-throws-an-exception

更新后的核心代码如下:

# -*- coding: utf-8 -*-
import getpass
import time
import functools

import MySQLdb
import schedule

from config import *
from log_setup import set_log

if getpass.getuser() == 'wangjinyu':
    from loc_config import *

logger = set_log(__name__)


def catch_exceptions(cancel_on_failure=False):
    def catch_exceptions_decorator(job_func):
        @functools.wraps(job_func)
        def wrapper(*args, **kwargs):
            try:
                return job_func(*args, **kwargs)
            except:
                import traceback
                logger.info(traceback.format_exc())
                if cancel_on_failure:
                    return schedule.CancelJob
        return wrapper
    return catch_exceptions_decorator

@catch_exceptions()
def handle_process_num():
    conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD)
    cur = conn.cursor()
    cur.execute('show full processlist')
    process_list = list(cur.fetchall())
    cur_process_num = len(process_list)
    cur.close()
    if cur_process_num > MAX_CON_NUM:
        logger.info('current process num: %d' % cur_process_num)
        logger.info('start kill process')
        try:
            kill_process(process_list, cur_process_num)
        except Exception:
            logger.exception('process error ocurr')


def kill_process(process_list, cur_process_num):
    killed_num = 0
    # order by connected time
    process_list.sort(key=lambda x: x[5], reverse=True)
    conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD)
    cur = conn.cursor()
    # try to kill 1/3 of processes
    for i in range(cur_process_num // 2):
        command = process_list[i][4]
        time = int(process_list[i][5])
        # if command is Sleep and Sleep time over 600s then kill process
        if command == 'Sleep' and time > MAX_SLEEP_TIME:
            try:
                cur.execute('kill %s' % process_list[i][0])
            except Exception:
                logger.exception('kill process %d fail' % process_list[i][0])
                continue
            killed_num += 1
    cur.execute('show full processlist')
    post_process_num = len(cur.fetchall())
    cur.close()
    logger.info('this time kill %d process' % killed_num)
    logger.info('remain process num:%d' % post_process_num)


if __name__ == '__main__':
    # run job every 30 minutes
    schedule.every(INTERVAL_TIME).seconds.do(handle_process_num)
    logger.info('mysql monitor service startup success')
    while True:
        schedule.run_pending()
        time.sleep(1)
# -*- coding: utf-8 -*-
import time

import MySQLdb
import getpass

from config import *
from log_setup import set_log


logger = set_log(__name__)


def hanlde_process_num():
    conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD)
    cur = conn.cursor()
    cur.execute('show full processlist')
    process_list = list(cur.fetchall())
    cur_process_num = len(process_list)
    cur.close()
    if cur_process_num > MAX_CON_NUM:
        logger.info('current process num: %d' % cur_process_num)
        logger.info('start kill process')
        try:
            kill_process(process_list)
        except Exception:
            logger.exception('process error ocurr')


def kill_process(process_list):
    killed_num = 0
    #order by connected time
    process_list.sort(key=lambda x:x[5],reverse=True)
    conn = MySQLdb.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, password=MYSQL_PASSWORD)
    cur = conn.cursor()
    #try to kill 1/3 of processes
    for i in range(MAX_CON_NUM // 3):
        Command = process_list[i][4]
        Time = int(process_list[i][5])
        #if command is Sleep and Sleep time over 600s then kill process
        if Command == 'Sleep' and Time > MAX_SLEEP_TIME:
            try:
                cur.execute('kill %s' % process_list[i][0])
            except Exception:
                logger.exception('kill process %d fail'%process_list[i][0])
                continue
            killed_num += 1
    cur.execute('show full processlist')
    post_process_num = len(cur.fetchall())
    cur.close()
    logger.info('this time kill %d process' % killed_num)
    logger.info('remain process num:%d' % post_process_num)


def sever():
    logger.info('mysql monitor startup success')
    while True:
        try:
            hanlde_process_num()
        except Exception:
            logger.exception("hanlde process num error")
            time.sleep(10)
            continue
        #check mysql connect num every an hour
        time.sleep(3600)


if __name__ == '__main__':
    sever()
发布了115 篇原创文章 · 获赞 34 · 访问量 9万+

猜你喜欢

转载自blog.csdn.net/u011519550/article/details/102724113
今日推荐