Hadoop2.8.5详细教程(十一)MapReduce编程

MapReduce编程

前置 在hadoop2.8.5/bin中加上winutils.exe hadoop.dll
配置HADOOP_HOME、PATH环境变量
注意以下如果在win10开发,要以管理员方式打开Eclipse
1创建Java项目,
2 在项目下新建lib文件夹,将jar包拷贝进入
3 在java bulid path中引入lib文件夹中所有的jar包
4 编写 map、reduce、job代码

map类:

package a;

import java.io.IOException;


import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
    
    
	final IntWritable one = new IntWritable(1);
	Text word = new Text();
	public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
    
    
		String[] str = ivalue.toString().split(" ");
		for (String s : str) {
    
    
			word.set(s);
			context.write(word, one);
		}
	}
}

reduce类:

package a;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReduce extends Reducer<Text, IntWritable, Text, LongWritable> {
    
    
	public void reduce(Text _key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
    
    
		long sum = 0;
		for (IntWritable val : values) {
    
    
			sum += val.get();

		}
		LongWritable result = new LongWritable(sum);
		context.write(_key, result);

	}

}

job类:

package a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountDriver {
    
    
	public static void main(String[] args) {
    
    
		try {
    
    
			Configuration conf = new Configuration();
			String[] files = {
    
     "F:/mr_data/wordcount/input", "F:/mr_data/wordcount/output3" };
			Job job = Job.getInstance(conf, "Word Count");
			// 指定入口类
			job.setJarByClass(WordCountDriver.class);
			// 指定Map
			job.setMapperClass(WordCountMap.class);
			// 指定Reduce
			job.setReducerClass(WordCountReduce.class);
			// 指定本次job map阶段的输出数据类型
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(IntWritable.class);
			// 指定本次job reduce阶段的输出数据类型 也就是整个mr任务的最终输出类型
			job.setOutputKeyClass(Text.class);
			job.setOutputValueClass(LongWritable.class);

			// 指定Job的输入路径
			FileInputFormat.addInputPath(job, new Path(files[0]));
			// 指定Job的输出路径
			FileOutputFormat.setOutputPath(job, new Path(files[1]));

			System.exit(job.waitForCompletion(true) ? 0 : 1);
		} catch (Exception e) {
    
    
			e.printStackTrace();
		}

	}

}

5 运行job
7查看F盘文件夹中的运行结果
8修改在linux上执行的代码
9使用eclipse的export功能导出jar包
10 在linux集群中启动hdfs、yarn
11 将jar上传到linux中
12 使用 hadoop jar XXXX.jar 运行程序
13 在hdfs中查看运行结果

问题汇总:
在这里插入图片描述
如果在win10下运行遇到以上错误,请使用管理员方式打开eclipse运行。

Guess you like

Origin blog.csdn.net/GodBlessYouAndMe/article/details/120432286