任务目标:
有一个informix数据库,其中有一个表有100万的数据,任务完成通过MapRed的方式将这100万数据导入到HDFS中,可以使用informix的分页sql达到并发查询数据库的目的。
任务开始:
1、编写MapRed的实现类起名称为InformixLoader。
大致代码如下
LOG.info("SqlMapper"); String url = context.getConfiguration().get("informix.url"); String[] str = value.toString().split("_"); long start = Long.parseLong(str[0]); long length = Long.parseLong(str[1]); Connection conn = null; Statement st = null; ResultSet rs = null; String sql = ""; try { Class.forName("com.informix.jdbc.IfxDriver"); conn = DriverManager.getConnection(url, "niosuser", "db"); st = conn.createStatement(); sql = "select skip " + start + " first " + length + " int_id from tpa_sts_cell_ne"; LOG.info("SqlMapper sql:" + sql); rs = st.executeQuery(sql); dataKey.set(value.toString()); Path file = new Path(StringUtils.unEscapeString(TMP_MAP_DIR + value.toString())); FileSystem fs = file.getFileSystem(context.getConfiguration()); fs.createNewFile(file); FSDataOutputStream output = fs.create(file); LOG.info("SqlMapper createNewFile OK!"); while (rs.next()) { String c1 = rs.getString(1)+"\n"; output.write(c1.getBytes()); output.flush(); } output.close(); // fs.close(); data.set(value.toString()); context.write(dataKey, value); LOG.info("SqlMapper OK!"); } catch (Exception e) { throw new IOException(sql, e.fillInStackTrace()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } if (st != null) { try { st.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } }
2、编写InputFormater类
代码如下:
LOG.info("InputFormarter"); String url = context.getConfiguration().get("informix.url"); Connection conn = null; Statement st = null; ResultSet rs = null; try { Class.forName("com.informix.jdbc.IfxDriver"); conn = DriverManager.getConnection(url, "niosuser", "db"); st = conn.createStatement(); String sql = "select count(*) from tpa_sts_cell_ne"; rs = st.executeQuery(sql); rs.next(); int count = rs.getInt(1); List<InputSplit> splits = new ArrayList<InputSplit>(); int size = 50000; int inv = count / size; int last = count % size; for (int i = 0; i < inv; i++) { SqlSplit s = new SqlSplit(i * size, size); splits.add(s); } if (last!=0){ SqlSplit s = new SqlSplit(inv * size, last); splits.add(s); } return splits; } catch (Exception e) { throw new IOException(e.fillInStackTrace()); } finally { if (rs != null) { try { rs.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } if (st != null) { try { st.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } if (conn != null) { try { conn.close(); } catch (SQLException e) { throw new IOException(e.fillInStackTrace()); } } }
3、编写reducer类
大致代码如下
String keyStr = key.toString(); Path outFile = new Path(StringUtils.unEscapeString(TMP_RED_DIR + keyStr)); LOG.info("SqlReducer outfile:"+outFile.getName()); FileSystem outfs = outFile .getFileSystem(context.getConfiguration()); outfs.createNewFile(outFile); FSDataOutputStream output = outfs.create(outFile); for (Text val : values) { LOG.info("SqlReducer"); String str = val.toString(); LOG.info("file:"+str); Path inputFile = new Path(StringUtils .unEscapeString(TMP_MAP_DIR + str)); FileSystem fs = inputFile.getFileSystem(context .getConfiguration()); FSDataInputStream input = fs.open(inputFile); BufferedInputStream bi = new BufferedInputStream(input); byte[] buffer=new byte[1024]; int length=bi.read(buffer); while (length!=-1) { if (length==1024){ output.write(buffer); }else{ byte[] tmp=new byte[length]; for(int i=0;i<tmp.length;i++){ tmp[i]=buffer[i]; } output.write(buffer); } length=bi.read(buffer); } bi.close(); input.close(); // fs.close(); output.flush(); } output.close(); result.set(key.toString()); context.write(key, result);
4、编写outformat类
大致代码如下:
Path outFilePath = getDefaultWorkFile(context, ""); final FileSystem fs = outFilePath.getFileSystem(context .getConfiguration()); final FSDataOutputStream output = fs.create(outFilePath); return new RecordWriter<Text, Text>() { @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { output.flush(); output.close(); // fs.close(); } @Override public void write(Text key, Text value) throws IOException, InterruptedException { LOG.info("RecordWriter filename:"+value.toString()); Path file = new Path(StringUtils.unEscapeString(TMP_RED_DIR + value.toString())); FileSystem fs = file.getFileSystem(context .getConfiguration()); FSDataInputStream input = fs.open(file); BufferedInputStream bi = new BufferedInputStream(input); byte[] buffer=new byte[1024]; int length=bi.read(buffer); while (length!=-1) { if (length==1024){ output.write(buffer); }else{ byte[] tmp=new byte[length]; for(int i=0;i<tmp.length;i++){ tmp[i]=buffer[i]; } output.write(buffer); } length=bi.read(buffer); } bi.close(); input.close(); // fs.close(); } };
5、编写启动代码
大致代码如下:
File jarFile = EJob.createTempJar("bin"); EJob.addClasspath("I:\\work\\hadoop\\hadoop\\hadoop-site.xml"); ClassLoader classLoader = EJob.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); args = new String[] { "/tmp/sqlloader10/" }; Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 1) { System.err.println("Usage: informixLoader <out>"); System.exit(2); } conf .set("informix.url", "jdbc:informix-sqli://10.0.2.36:8001/niosdb:INFORMIXSERVER=niosserver"); Job job = new Job(conf, "informix loader"); // And add this statement. XXX ((JobConf) job.getConfiguration()).setJar(jarFile.toString()); job.setInputFormatClass(InputFormarter.class); job.setJarByClass(InformixLoader.class); job.setMapperClass(SqlMapper.class); job.setReducerClass(SqlReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(OutputFormater.class); // FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[0])); System.exit(job.waitForCompletion(true) ? 0 : 1);
5、详细分析后续说明