Python 解析 hadoop-namenode.log

Python 解析 hadoop-namenode.log 入mysql

# -*- coding:utf-8 -*-

import sys
import pandas as pd
from sqlalchemy import create_engine
import MySQLdb

reload(sys)
sys.setdefaultencoding("utf8")

def log():
    # 读取 hadoop-hadoop-namenode-HadoopdeMacBook-Pro.local.log
    fr = open("/Users/hadoop/software/hadoop-2.7.3/logs/hadoop-hadoop-namenode-HadoopdeMacBook-Pro.local.log")

    # 定义字典,接收数据
    log_dict = {
        'date': []
        , 'time': []
        , 'date_time': []
        , 'web_name': []
    }

    # 逐行解析
    for line in fr:
        st = line.split(',')[0]
        if len(st) == 19:
            log_dict['date'].append(st.split(' ')[0])
            log_dict['time'].append(st.split(' ')[1])
            log_dict['date_time'].append(st.split(' ')[0] + '   ' + st.split(' ')[1])
            log_dict['web_name'].append("www.baidu.com")

    dt = pd.DataFrame(data=log_dict)
    return dt

def to_mysql(dataframe):
    connect = create_engine('mysql+mysqldb://root:[email protected]:3306/mysql_python?charset=utf8')
    pd.io.sql.to_sql(dataframe
                     , "namenode_log"
                     , con=connect
                     , schema="mysql_python"
                     , if_exists="append"
                     )


def delete_data():
    db = MySQLdb.connect(host='127.0.0.1'
                         , user='root'
                         , passwd='20180423'
                         , db='mysql_python'
                         , port=3306
                         , charset='utf8')
    cursor = db.cursor()
    cursor.execute("delete from mysql_python.namenode_log;")
    cursor.close()
    db.commit()
    db.close()


if __name__ == '__main__':
    delete_data()
    dataframe = log()
    to_mysql(dataframe)
    print "finish!!!"

返回mysql查看数据: 

猜你喜欢

转载自www.cnblogs.com/RHadoop-Hive/p/9445045.html