1.python 根据 oracle字段名和类型 生成 hive建表语句

一、问题

遇到一个场景,要根据oracle表结构创建hive表
本来想写一个根据oracle信息和表名自动在hive中创建表的脚本。如果在集群运行,python操作oracle的cx_Oracle在集群的每个结点都需要安装,时间紧急,干活要紧,先凑合写个脚本本地用着吧

二、解决

# -*- coding=utf8 -*-
class GetHiveDDLFromOracleMeta:

    def get_oralce_2_hive_datatype_map(self):
        type_map = {
    
    "INTEGER": "double",
                    "NUMBER": "double",
                    "NUMBER(10)": "bigint",
                    "NUMBER(11)": "bigint",
                    "NUMBER(12)": "bigint",
                    "NUMBER(13)": "bigint",
                    "NUMBER(14)": "bigint",
                    "NUMBER(15)": "bigint",
                    "NUMBER(16)": "bigint",
                    "NUMBER(17)": "bigint",
                    "NUMBER(18)": "bigint",
                    "NUMBER(5)": "int",
                    "NUMBER(6)": "int",
                    "NUMBER(7)": "int",
                    "NUMBER(8)": "int",
                    "NUMBER(9)": "int",
                    "NUMBER(2)": "smallint",
                    "NUMBER(3)": "smallint",
                    "NUMBER(4)": "smallint",
                    "NUMBER(P,S)": "decimal",
                    "NUMBER(1)": "tinyint",
                    "FLOAT": "double",
                    "BINARY_FLOAT": "double",
                    "BINARY_DOUBLE": "double",
                    "DATE": "timestamp",
                    "TIMESTAMP(1)": "timestamp",
                    "TIMESTAMP(2)": "timestamp",
                    "TIMESTAMP(3)": "timestamp",
                    "TIMESTAMP(4)": "timestamp",
                    "TIMESTAMP(5)": "timestamp",
                    "TIMESTAMP(6)": "timestamp",
                    "CHAR": "string",
                    "NCHAR": "string",
                    "NVARCHAR2": "string",
                    "VARCHAR2": "string",
                    "NVARCHAR": "string",
                    "BLOB": "", "BFILE": "", "NCLOB": "","CLOB": "string", "ROWID": "", "UROWID": "", "TIMESTAMP with time zone": "",
                    "TIMESTAMP with local time zone": "", "ANYDATA": "", "VARRAY": "", "NESTEDTAB": "", "Object": ""
            , "REF": "", "RAW": ""}
        return type_map

    def split_oracl_meta(self, oracle_meta):
        name_types = []
        for name_type in oracle_meta.split(","):
            column_name = name_type.split(" ")[0]
            column_type = name_type.split(" ")[1]
            name_types.append((column_name, column_type))
        return name_types

    def transfrom_from_oracle_to_hive(self, oracle_meta_maps):
        oracle_hive_maps = self.get_oralce_2_hive_datatype_map()
        hive_name_type_maps = []
        for name_type in oracle_meta_maps:
            column_name = name_type[0]
            oracle_type = name_type[1]
            hive_type = oracle_hive_maps[oracle_type]
            if hive_type:
                print(oracle_hive_maps[oracle_type])
                hive_name_type_maps.append((column_name, hive_type))
            else:
                raise Exception(" ERROR!!!  no map column and type is " + str(name_type))
        return hive_name_type_maps

    def get_dml_from_hive_name_type_maps(self, hive_name_type_maps, hive_table_name):
        dml_sql_prefix = "create external table " + hive_table_name + " ("
        dml_sql_middle = ""
        for name_type in hive_name_type_maps:
            column_name = name_type[0]
            column_type = name_type[1]
            dml_sql_middle = dml_sql_middle + column_name + " " + column_type + ", "
        dml_sql_middle = dml_sql_middle[0:len(dml_sql_middle)-2]
        dml_sql_suffix = ") " \
                         " row format delimited fields terminated by '\t' stored as textfile ;"
        sql = dml_sql_prefix + dml_sql_middle + dml_sql_suffix
        return sql

    def create_hive_table_dml_by_oracle_meta(self, oracle_meta, hive_table_name):
        oracle_name_types = self.split_oracl_meta(oracle_meta)
        hive_name_types = self.transfrom_from_oracle_to_hive(oracle_name_types)
        hive_table_dml_sql = self.get_dml_from_hive_name_type_maps(hive_name_types,hive_table_name)
        return hive_table_dml_sql

if __name__ == "__main__":
    this_obj = GetHiveDDLFromOracleMeta()
    oracle_meta = "BIAOZHIPAIDIANNAOBIANHAO NUMBER,BIAOZHIPAIFUKAYOUXIAOQICONG TIMESTAMP(6),BIAOZHIPAIFUKAYOUXIAOQIZHI TIMESTAMP(6),BIAOZHIPAIHAOMA VARCHAR2,BIAOZHIPAILEIXING VARCHAR2,CHELIANGDIANNAOBIANHAO NUMBER,CHELIANGTUICHURIQI TIMESTAMP(6),CHEXINGXIANZHI VARCHAR2,CUSTOM_OPER_TYPE VARCHAR2,CUSTOM_UPDATE_TIME TIMESTAMP(6),DAIYONGKAYOUXIAOQICONG TIMESTAMP(6),DAIYONGKAYOUXIAOQIZHI TIMESTAMP(6),FUKAHAO VARCHAR2,GUANLIJIBIE VARCHAR2,JINGYINGQIXIAN TIMESTAMP(6),JINGYQUANDANWEIDIANNAOBIANHAO NUMBER,NITOURUCHELIANGRIQI TIMESTAMP(6),PIWENHAO VARCHAR2,PIZHUNJIGUAN VARCHAR2,PIZHUNSHIJIAN TIMESTAMP(6),PM_PRIMARYKEY NUMBER,QIDIANDI VARCHAR2,SHENQINGRIQI TIMESTAMP(6),SHIFOUYIFAPAI VARCHAR2,SHIYONGDANWEIDIANNAOBIANHAO NUMBER,SHOUCITOURUSHIJIAN TIMESTAMP(6),SHUJUZHUANGTAI VARCHAR2,SYS_XINZENGREN VARCHAR2,SYS_XINZENGSHIJIAN TIMESTAMP(6),SYS_ZUIJINXIUGAIREN VARCHAR2,SYS_ZUIJINXIUGAISHIJIAN TIMESTAMP(6),XIAQUSHENG VARCHAR2,XIAQUSHI VARCHAR2,XIAQUXIAN VARCHAR2,XIAQUZHEN VARCHAR2,XUKERIQI TIMESTAMP(6),YUANCHEPAIHAO VARCHAR2,ZHIBIAODIANNAOBIANHAO NUMBER,ZHIBIAOYOUXIAORIQI TIMESTAMP(6),ZHIBIAOZHONGLEI VARCHAR2,ZHUANGBEITIAOJIAN VARCHAR2,ZHUANGTAI VARCHAR2,ZUIDICHELIANGJISHUDENGJI VARCHAR2,ZUIDICHELIANGZHUANGBEIDENGJI VARCHAR2"
    hive_table_name = "ST_DLYS_KEYUNBAOCHEXIANLU"
    results = this_obj.create_hive_table_dml_by_oracle_meta(oracle_meta, hive_table_name)
    print(type(results))
    print(results)

三、使用

1.根据自己需求调整get_oralce_2_hive_datatype_map方法中的类型映射

2.在能连oracle的工具中,例如plsql,navicat,dbeaver等工具,连接到oracle库,执行下面的sql

注意:TABLE_NAME请改成创建hive表依据的oracle表名

select Listagg(concat(concat(A.COLUMN_NAME,' '),A.DATA_TYPE), ',') WITHIN GROUP(ORDER BY column_name)
from user_tab_columns A
where table_name = upper('TABLE_NAME');

查询结果类似这样

BIAOZHIPAIDIANNAOBIANHAO NUMBER,BIAOZHIPAIFUKAYOUXIAOQICONG TIMESTAMP(6),BIAOZHIPAIFUKAYOUXIAOQIZHI TIMESTAMP(6),BIAOZHIPAIHAOMA VARCHAR2,BIAOZHIPAILEIXING VARCHAR2,CHELIANGDIANNAOBIANHAO NUMBER,CHELIANGTUICHURIQI TIMESTAMP(6),CHEXINGXIANZHI VARCHAR2,CUSTOM_OPER_TYPE VARCHAR2,CUSTOM_UPDATE_TIME TIMESTAMP(6),DAIYONGKAYOUXIAOQICONG TIMESTAMP(6),DAIYONGKAYOUXIAOQIZHI TIMESTAMP(6),FUKAHAO VARCHAR2,GUANLIJIBIE VARCHAR2,JINGYINGQIXIAN TIMESTAMP(6),JINGYQUANDANWEIDIANNAOBIANHAO NUMBER,NITOURUCHELIANGRIQI TIMESTAMP(6),PIWENHAO VARCHAR2,PIZHUNJIGUAN VARCHAR2,PIZHUNSHIJIAN TIMESTAMP(6),PM_PRIMARYKEY NUMBER,QIDIANDI VARCHAR2,SHENQINGRIQI TIMESTAMP(6),SHIFOUYIFAPAI VARCHAR2,SHIYONGDANWEIDIANNAOBIANHAO NUMBER,SHOUCITOURUSHIJIAN TIMESTAMP(6),SHUJUZHUANGTAI VARCHAR2,SYS_XINZENGREN VARCHAR2,SYS_XINZENGSHIJIAN TIMESTAMP(6),SYS_ZUIJINXIUGAIREN VARCHAR2,SYS_ZUIJINXIUGAISHIJIAN TIMESTAMP(6),XIAQUSHENG VARCHAR2,XIAQUSHI VARCHAR2,XIAQUXIAN VARCHAR2,XIAQUZHEN VARCHAR2,XUKERIQI TIMESTAMP(6),YUANCHEPAIHAO VARCHAR2,ZHIBIAODIANNAOBIANHAO NUMBER,ZHIBIAOYOUXIAORIQI TIMESTAMP(6),ZHIBIAOZHONGLEI VARCHAR2,ZHUANGBEITIAOJIAN VARCHAR2,ZHUANGTAI VARCHAR2,ZUIDICHELIANGJISHUDENGJI VARCHAR2,ZUIDICHELIANGZHUANGBEIDENGJI VARCHAR2

复制查询结果

3.修改main函数里的参数

  1. 用上面复制的字段名和类型拼接成的字符串替换main函数里的参数oracle_meta=右侧的字符串
  2. 修改参数hive_table_name=右侧的表名为想要创建的hive表名
  3. (以py文件名是GetHiveDDLFromOracleMeta为例)在pycharm里run或者打开cmd输入$PYTHON_HOME/python GetHiveDDLFromOracleMeta.py
  4. pycharm控制台或者cmd输出的create语句即为所需的hive建表语句了

4.优化(TODO)

后面有时间会把复制sql查询结果这一步用查询oracle的方法替代。hive建表语句也不需要粘贴出来自己运行,而是直接自动创建。

猜你喜欢

转载自blog.csdn.net/qq_39945938/article/details/108674346