import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; public class ImportDataFromMongoReducer extends Reducer<Text, Text, Text, Text> { private static final Log LOG = LogFactory .getLog(ImportDataFromMongoReducer.class); private MultipleOutputs out; public void setup(Context context) { out = new MultipleOutputs(context); } private String generateFileName(Text k) { return k.toString() + "/part"; } @Override public void reduce(final Text pKey, final Iterable<Text> pValues, final Context pContext) throws IOException, InterruptedException { for (final Text value : pValues) { // pContext.write(pKey, value); out.write(NullWritable.get(), value, generateFileName(pKey)); } } protected void cleanup(Context context) throws IOException, InterruptedException { out.close(); } }
扫描二维码关注公众号,回复:
599670 查看本文章
References
http://hadoop.apache.org/docs/current/api/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.html
http://www.infoq.com/articles/HadoopOutputFormat