mapReduce 输出结果导入Oracle,无效字符错误

mapReduce 输出结果导入Oracle,无效字符错误

使用map 读取数据 ,然后输入到Oracle,相信下面这段代码大家都不陌生,但是一直报错 无效字符。

 Job job = new Job(conf, "Query_Job");

        job.setJarByClass(ImportDriver.class);
        job.setMapperClass(ImportMapper.class);
        job.setOutputKeyClass(ActiveIpD.class);
        job.setOutputValueClass(Text.class);
        FileInputFormat.addInputPath(job, new Path(input));
        
        //结果,输出到oracle
         job.setOutputFormatClass(DBOutputFormat.class);
        DBConfiguration.configureDB(xxx,xxx,xxx,xxx)
        DBOutputFormat.setOutput(job, "tableName",
                "id",
                "name",
          );
        job.setNumReduceTasks(0);

后来观察DBConfiguration源码发现,源码 构造sql结束后,sql加了分号;Oracle不能识别分号。导致错误

 public String constructQuery(String table, String[] fieldNames) {
    if(fieldNames == null) {
      throw new IllegalArgumentException("Field names may not be null");
    }

    StringBuilder query = new StringBuilder();
    query.append("INSERT INTO ").append(table);

    if (fieldNames.length > 0 && fieldNames[0] != null) {
      query.append(" (");
      for (int i = 0; i < fieldNames.length; i++) {
        query.append(fieldNames[i]);
        if (i != fieldNames.length - 1) {
          query.append(",");
        }
      }
      query.append(")");
    }
    query.append(" VALUES (");

    for (int i = 0; i < fieldNames.length; i++) {
      query.append("?");
      if(i != fieldNames.length - 1) {
        query.append(",");
      }
    }
    query.append(");");

    return query.toString();
  }

重写方法 去掉分号

package com.boco.querymr.util;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;


public class MyDBOutputFormat<K extends DBWritable, V> extends DBOutputFormat<K, V> {
    private static final Log LOG = LogFactory.getLog(MyDBOutputFormat.class);

 //原来我只重写了这个方法,运行发现方法没有被调用,最后发现下面两个方法 也需要重写
 @Override
    public String constructQuery(String table, String[] fieldNames) {
        if (fieldNames == null) {
            throw new IllegalArgumentException("Field names may not be null");
        }

        StringBuilder query = new StringBuilder();
        query.append("INSERT INTO ").append(table);

        if (fieldNames.length > 0 && fieldNames[0] != null) {
            query.append(" (");
            for (int i = 0; i < fieldNames.length; i++) {
                query.append(fieldNames[i]);
                if (i != fieldNames.length - 1) {
                    query.append(",");
                }
            }
            query.append(")");
        }
        query.append(" VALUES (");

        for (int i = 0; i < fieldNames.length; i++) {
            query.append("?");
            if (i != fieldNames.length - 1) {
                query.append(",");
            }
        }
        query.append(")");
        LOG.info(query.toString());
        System.err.println("导入sql" + query.toString());
        return query.toString();
    }



  
    public static void setOutput(Job job, String tableName,
                                 String... fieldNames) throws IOException {
        if (fieldNames.length > 0 && fieldNames[0] != null) {
            DBConfiguration dbConf = setOutput(job, tableName);
            dbConf.setOutputFieldNames(fieldNames);
        } else {
            if (fieldNames.length > 0) {
                setOutput(job, tableName, fieldNames.length);
            } else {
                throw new IllegalArgumentException(
                        "Field names must be greater than 0");
            }
        }
    }
    /**
     * * Initializes the reduce-part of the job * with the appropriate output
     * settings * * @param job The job * @param tableName The table to insert
     * data into * @param fieldCount the number of fields in the table.
     */
    public static void setOutput(Job job, String tableName, int fieldCount)
            throws IOException {
        DBConfiguration dbConf = setOutput(job, tableName);
        dbConf.setOutputFieldCount(fieldCount);
    }
    private static DBConfiguration setOutput(Job job,
                                             String tableName) throws IOException {
        job.setOutputFormatClass(MyDBOutputFormat.class);
        job.setReduceSpeculativeExecution(false);

        DBConfiguration dbConf = new DBConfiguration(job.getConfiguration());

        dbConf.setOutputTableName(tableName);
        return dbConf;
    }

}
发布了21 篇原创文章 · 获赞 6 · 访问量 9312

猜你喜欢

转载自blog.csdn.net/qq1032350287/article/details/104952815