1.Configuration
package com.mapreduce.test1; import org.apache.hadoop.conf.Configuration; public class Test1 { public static void main(String[] args) { Configuration conf = new Configuration(); conf.addResource("config-default.xml"); conf.addResource("config-site.xml"); System.out.println(conf.get("hadoop.tmp.dir")); System.out.println(conf.get("height")); } }
多个配置文件顺序加进来,后面覆盖前面属性,但不能覆盖final为true的属性
2.Mapper处理的数据是由InputFormat分解过来的数据集InputSplit,RecordReader将split分解成<key, value>
package com.mapReducce.test; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println(line); StringTokenizer token = new StringTokenizer(line, " "); String symb1 = token.nextToken(); String symb2 = token.nextToken(); context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2))); } }
3.Reduce
package com.mapReducce.test; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); System.out.println(line); StringTokenizer token = new StringTokenizer(line, " "); String symb1 = token.nextToken(); String symb2 = token.nextToken(); context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2))); } }