mapReduce 输出结果导入Oracle,无效字符错误
使用map 读取数据 ,然后输入到Oracle,相信下面这段代码大家都不陌生,但是一直报错 无效字符。
Job job = new Job(conf, "Query_Job");
job.setJarByClass(ImportDriver.class);
job.setMapperClass(ImportMapper.class);
job.setOutputKeyClass(ActiveIpD.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(input));
//结果,输出到oracle
job.setOutputFormatClass(DBOutputFormat.class);
DBConfiguration.configureDB(xxx,xxx,xxx,xxx)
DBOutputFormat.setOutput(job, "tableName",
"id",
"name",
);
job.setNumReduceTasks(0);
后来观察DBConfiguration源码发现,源码 构造sql结束后,sql加了分号;Oracle不能识别分号。导致错误
public String constructQuery(String table, String[] fieldNames) {
if(fieldNames == null) {
throw new IllegalArgumentException("Field names may not be null");
}
StringBuilder query = new StringBuilder();
query.append("INSERT INTO ").append(table);
if (fieldNames.length > 0 && fieldNames[0] != null) {
query.append(" (");
for (int i = 0; i < fieldNames.length; i++) {
query.append(fieldNames[i]);
if (i != fieldNames.length - 1) {
query.append(",");
}
}
query.append(")");
}
query.append(" VALUES (");
for (int i = 0; i < fieldNames.length; i++) {
query.append("?");
if(i != fieldNames.length - 1) {
query.append(",");
}
}
query.append(");");
return query.toString();
}
重写方法 去掉分号
package com.boco.querymr.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
public class MyDBOutputFormat<K extends DBWritable, V> extends DBOutputFormat<K, V> {
private static final Log LOG = LogFactory.getLog(MyDBOutputFormat.class);
//原来我只重写了这个方法,运行发现方法没有被调用,最后发现下面两个方法 也需要重写
@Override
public String constructQuery(String table, String[] fieldNames) {
if (fieldNames == null) {
throw new IllegalArgumentException("Field names may not be null");
}
StringBuilder query = new StringBuilder();
query.append("INSERT INTO ").append(table);
if (fieldNames.length > 0 && fieldNames[0] != null) {
query.append(" (");
for (int i = 0; i < fieldNames.length; i++) {
query.append(fieldNames[i]);
if (i != fieldNames.length - 1) {
query.append(",");
}
}
query.append(")");
}
query.append(" VALUES (");
for (int i = 0; i < fieldNames.length; i++) {
query.append("?");
if (i != fieldNames.length - 1) {
query.append(",");
}
}
query.append(")");
LOG.info(query.toString());
System.err.println("导入sql" + query.toString());
return query.toString();
}
public static void setOutput(Job job, String tableName,
String... fieldNames) throws IOException {
if (fieldNames.length > 0 && fieldNames[0] != null) {
DBConfiguration dbConf = setOutput(job, tableName);
dbConf.setOutputFieldNames(fieldNames);
} else {
if (fieldNames.length > 0) {
setOutput(job, tableName, fieldNames.length);
} else {
throw new IllegalArgumentException(
"Field names must be greater than 0");
}
}
}
/**
* * Initializes the reduce-part of the job * with the appropriate output
* settings * * @param job The job * @param tableName The table to insert
* data into * @param fieldCount the number of fields in the table.
*/
public static void setOutput(Job job, String tableName, int fieldCount)
throws IOException {
DBConfiguration dbConf = setOutput(job, tableName);
dbConf.setOutputFieldCount(fieldCount);
}
private static DBConfiguration setOutput(Job job,
String tableName) throws IOException {
job.setOutputFormatClass(MyDBOutputFormat.class);
job.setReduceSpeculativeExecution(false);
DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());
dbConf.setOutputTableName(tableName);
return dbConf;
}
}