DataX之读取Hive数据

1、编写脚本read-hive.json

{
    
    
    "job": {
    
    
        "setting": {
    
    
            "speed": {
    
    
                "channel": 3
            }
        },
        "content": [
            {
    
    
                "reader": {
    
    
                    "name": "hdfsreader",
                    "parameter": {
    
    
                        "path": "/user/hive/warehouse/ods.db/ods_emp/*",
					"defaultFS": "hdfs://hdfs-ha",
					"hadoopConfig":{
    
    
						"dfs.nameservices": "hdfs-ha",
						"dfs.ha.namenodes.hdfs-ha": "nn1,nn2",
						"dfs.namenode.rpc-address.hdfs-ha.nn1": "node01:8020",
						"dfs.namenode.rpc-address.hdfs-ha.nn2": "node02:8020",
						"dfs.client.failover.proxy.provider.hdfs-ha": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
					},
                        "column": ["*"],
                        "fileType": "text",
                        "encoding": "UTF-8",
                        "fieldDelimiter": ","
                    }
                },
                "writer": {
    
    
                    "name": "streamwriter",
                    "parameter": {
    
    
                        "print": true
                    }
                }
            }
        ]
    }
}

2、执行脚本

/datax/bin/datax.py ./read-hive.json

猜你喜欢

转载自blog.csdn.net/docsz/article/details/116303786