MapReduce编程
前置 在hadoop2.8.5/bin中加上winutils.exe hadoop.dll
配置HADOOP_HOME、PATH环境变量
注意以下如果在win10开发,要以管理员方式打开Eclipse
1创建Java项目,
2 在项目下新建lib文件夹,将jar包拷贝进入
3 在java bulid path中引入lib文件夹中所有的jar包
4 编写 map、reduce、job代码
map类:
package a;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {
final IntWritable one = new IntWritable(1);
Text word = new Text();
public void map(LongWritable ikey, Text ivalue, Context context) throws IOException, InterruptedException {
String[] str = ivalue.toString().split(" ");
for (String s : str) {
word.set(s);
context.write(word, one);
}
}
}
reduce类:
package a;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReduce extends Reducer<Text, IntWritable, Text, LongWritable> {
public void reduce(Text _key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
long sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
LongWritable result = new LongWritable(sum);
context.write(_key, result);
}
}
job类:
package a;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCountDriver {
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
String[] files = {
"F:/mr_data/wordcount/input", "F:/mr_data/wordcount/output3" };
Job job = Job.getInstance(conf, "Word Count");
// 指定入口类
job.setJarByClass(WordCountDriver.class);
// 指定Map
job.setMapperClass(WordCountMap.class);
// 指定Reduce
job.setReducerClass(WordCountReduce.class);
// 指定本次job map阶段的输出数据类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 指定本次job reduce阶段的输出数据类型 也就是整个mr任务的最终输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
// 指定Job的输入路径
FileInputFormat.addInputPath(job, new Path(files[0]));
// 指定Job的输出路径
FileOutputFormat.setOutputPath(job, new Path(files[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (Exception e) {
e.printStackTrace();
}
}
}
5 运行job
7查看F盘文件夹中的运行结果
8修改在linux上执行的代码
9使用eclipse的export功能导出jar包
10 在linux集群中启动hdfs、yarn
11 将jar上传到linux中
12 使用 hadoop jar XXXX.jar 运行程序
13 在hdfs中查看运行结果
问题汇总:
如果在win10下运行遇到以上错误,请使用管理员方式打开eclipse运行。