AWS RDS Slowlog collection

  • Overall structure

image.png

  • mysql

    Because mysql slowlog is integrated with cloudwatch log, you only need to configure RDS in the console, and output the slowlog log directly to the cloudwatch log.
    Configure in the cloudwatch logs console to output cloudwatch logs to elasticsearch, AWS will automatically generate the corresponding lambda function, and finally transmit the logs to elasticsearch through Lambda

  • postgresql

    pg does not support importing slowlog into cloudwatch, so you need to implement the Lambda function yourself. The general logic of the function is to use boto3 to download slowlog and enter it into elasticsearh, and
    finally trigger the lambda function every hour through Cloudwatch Event

#/usr/bin/env python 
#coding utf-8 
#put pg slowlog to elasticsearch 

import re 
import os 
import boto3 
from datetime import datetime,timedelta 
from elasticsearch import Elasticsearch,RequestsHttpConnection 
from requests_aws4auth import AWS4Auth 

endpoint ='xxx' 
# Because of es cluster It is not set in the vpc when it is created, and the public network interface is exposed to the outside, so the access-key needs to be verified when 
requesting awsauth = AWS4Auth(os.environ['ACCESS_KEY'], os.environ['SECRET_KEY'], 'us-west-2','es') 
index_name ='logstash-pg-slowlog-{0}'.format(datetime.now().strftime('%Y.%m.%d')) 

indexDoc = { 
   "dataRecord": { 
      "properties": { 
          "timestamp": { {
             "type" : "date"
          },
          "message" : {
             "type" :  "string",
          }
      }
   }
}

def get_pg_slowlog(rds_instance_id):
    client = boto3.client('rds')
    Marker = '0'
    additionalDataPending = True
    logfile_prefix = 'error/postgresql.log.'
    logfile_name = logfile_prefix+(datetime.now()-timedelta(hours=1)).strftime('%Y-%m-%d-%H')
    while additionalDataPending:
        response = client.download_db_log_file_portion(
            DBInstanceIdentifier = rds_instance_id, 
            LogFileName = logfile_name, 
            Marker = Marker,
            NumberOfLines = 4000
        )
        Marker = response['Marker']
        additionalDataPending = response['AdditionalDataPending']
        p = r'.*duration.*\n(?:(?:\t){1,}.*\n){0,}'
        try:
            content = re.findall(p,response['LogFileData'])
            yield ''.join(content)
        except Exception as e:
            print(f'no slowlog found in file {logfile} [Error]{e}')
    return slow_log_list       

def connect_es(EndPoint):
    try:
        esclient = Elasticsearch(
            hosts=[{'host':EndPoint, 'port':443}],
            use_ssl=True,
            http_auth=awsauth,
            verify_certs=True,
            connection_class=RequestsHttpConnection
        )
        print('connect es ok')
        return esclient
    except Exception as e:
        print(f'Unable to connect to{0esEndPoint:{e}')
        exit(1)


def put_slowlog(message):
    body = {
        'message': message,
        'timestamp': datetime.now().isoformat()
    }
    es_client = connect_es(endpoint)
    es_client.indices.create(
        index = index_name,
        ignore = 400,
        body = indexDoc
    )
    try:
        es_client.index(
            index = index_name,
            doc_type = 'pg-slowlog',
            body = body
        )
        print('put slowlog successed')
    except Exception as e:
        print(f"put slowlog failed:{e}")

def lambda_handler(event, context):
    for rds in ['webrdsprod','account-prod']:
        slow_log = get_pg_slowlog(rds)
        try: 
            for line in slow_log:
                put_slowlog(line)
        except Exception as e:
            print(f'No Slow Log:{e}')


Guess you like

Origin blog.51cto.com/3379770/2638134