Hadoop——Java编写MapReduce代码并运行

1、导入Hadoop的核心依赖包

\share\hadoop\common
\share\hadoop\common\lib
\share\hadoop\hdfs
\share\hadoop\hdfs\lib
\share\hadoop\mapreduce
\share\hadoop\mapreduce\lib

2、Map类

package MapReduce;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;

import java.io.IOException;
// 四个泛型，前两个是输入数据的类型，后两个是输出数据的类型
public class Map extends Mapper<LongWritable, Text,Text,LongWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        // 将数据转换为String类型
        String line = value.toString();
        // 分割字符串
        String[] words =  StringUtils.split(line,' ');
        // 遍历这个数组
        for (String word:words){
            context.write(new Text(word),new LongWritable(1));
        }
    }
}

3、Red类

package MapReduce;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class Red extends Reducer<Text,LongWritable,Text,LongWritable>{
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long count = 0;
        for (LongWritable value:values){
            count+=value.get();
        }
        context.write(key,new LongWritable(count));
    }
}

4、Run类

package MapReduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class Run {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf =new Configuration();
        Job job = Job.getInstance(conf);

        // 设置job所用的那些类在那个jar包
        job.setJarByClass(Run.class);

        //设置使用的Map和Reduced的类
        job.setMapperClass(Map.class);
        job.setReducerClass(Red.class);

        // 设置Reduce输出的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        //设置map输出数据的类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        // 设置原始数据存放路径
        FileInputFormat.setInputPaths(job,new Path("/input/"));

        //设置处理结果输出路径
        FileOutputFormat.setOutputPath(job,new Path("/output/"));

        // 将job提交给集群运行
        job.waitForCompletion(true);
    }
}

5、打包为Jar包

https://blog.csdn.net/Asdzxc968/article/details/89057455

6、运行命令

创建input目录

hadoop fs -mkdir /input

提交文件

hadoop fs -put 文件路径 /input

运行

hadoop jar MapReduce.jar MapReduce.Run

Ys2025

发布了49 篇原创文章 · 获赞 5 · 访问量 8779

私信关注

Hadoop——Java编写MapReduce代码并运行

1、导入Hadoop的核心依赖包

2、Map类

3、Red类

4、Run类

5、打包为Jar包

6、运行命令

猜你喜欢