Hadoop MapReduce程序开发（一）

1.Configuration

package com.mapreduce.test1;

import org.apache.hadoop.conf.Configuration;


public class Test1 {
	public static void main(String[] args) {
		Configuration conf = new Configuration();
		conf.addResource("config-default.xml");
		conf.addResource("config-site.xml");
		System.out.println(conf.get("hadoop.tmp.dir"));
		System.out.println(conf.get("height"));
	}
}

多个配置文件顺序加进来，后面覆盖前面属性，但不能覆盖final为true的属性

2.Mapper处理的数据是由InputFormat分解过来的数据集InputSplit，RecordReader将split分解成<key, value>

package com.mapReducce.test;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {

	@Override
	protected void map(LongWritable key, Text value,
			Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		System.out.println(line);
		StringTokenizer token = new StringTokenizer(line, " ");
		String symb1 = token.nextToken();
		String symb2 = token.nextToken();
		
		context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2)));
	}
	
}

3.Reduce

package com.mapReducce.test;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<LongWritable, Text, Text, FloatWritable> {

	@Override
	protected void map(LongWritable key, Text value,
			Context context)
			throws IOException, InterruptedException {
		String line = value.toString();
		System.out.println(line);
		StringTokenizer token = new StringTokenizer(line, " ");
		String symb1 = token.nextToken();
		String symb2 = token.nextToken();
		
		context.write(new Text(symb1), new FloatWritable(Float.valueOf(symb2)));
	}
	
}

Hadoop MapReduce程序开发（一）

猜你喜欢