1、编写脚本read-hive.json
{
"job": {
"setting": {
"speed": {
"channel": 3
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/ods.db/ods_emp/*",
"defaultFS": "hdfs://hdfs-ha",
"hadoopConfig":{
"dfs.nameservices": "hdfs-ha",
"dfs.ha.namenodes.hdfs-ha": "nn1,nn2",
"dfs.namenode.rpc-address.hdfs-ha.nn1": "node01:8020",
"dfs.namenode.rpc-address.hdfs-ha.nn2": "node02:8020",
"dfs.client.failover.proxy.provider.hdfs-ha": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
},
"column": ["*"],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"print": true
}
}
}
]
}
}
2、执行脚本
/datax/bin/datax.py ./read-hive.json