大数据技术之Hadoop之MapReduce(3)——KeyValueTextInputFormat使用案例

3.1.7 KeyValueTextInputFormat使用案例

1.需求

统计输入文件中每一行的第一个单词相同的行数。
(1)输入数据

banzhang ni hao
xihuan hadoop banzhang
banzhang ni hao
xihuan hadoop banzhang

(2)期望结果数据

banzhang	2
xihuan	2
2.需求分析

在本地的Hadoop3.1.2运行输入的数据,得到相应的结果,

3.代码实现

(1)编写Mapper类

/**
 * @Author zhangyong
 * @Date 2020/3/6 9:00
 * @Version 1.0
 * Mapper类
 */
public class KVTextMapper extends Mapper<Text, Text, Text, LongWritable> {
    // 1 设置value
    LongWritable v = new LongWritable (1);
    @Override
    protected void map(Text key, Text value, Context context)
            throws IOException, InterruptedException {
    // banzhang ni hao
    // 2 写出
        context.write (key, v);
    }
}

(2)编写Reducer类

/**
 * @Author zhangyong
 * @Date 2020/3/6 9:00
 * @Version 1.0
 * Reducer类 统计量
 */
public class KVTextReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    LongWritable v = new LongWritable ();
    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
        long sum = 0L;
        // 1 汇总统计
        for (LongWritable value : values) {
            sum += value.get ();
        }
        v.set (sum);
        // 2 输出
        context.write (key, v);
    }
}

(3)编写Driver类

/**
 * @Author zhangyong
 * @Date 2020/3/4 9:00
 * @Version 1.0
 * Driver类 Hadoop入口程序
 */
public class KVTextDriver {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        // 数据输入路径和输出路径
        args = new String[2];
        args[0] = "src/main/resources/kv/kvi2/";
        args[1] = "src/main/resources/kv/kvo2";

        Configuration cfg = new Configuration();// 读取配置文件
        cfg.set("mapreduce.framework.name", "local");
        cfg.set("fs.defaultFS", "file:///");
        // 设置切割符
        cfg.set(KeyValueLineRecordReader.KEY_VALUE_SEPERATOR, " ");

        final FileSystem filesystem = FileSystem.get(cfg);
        if (filesystem.exists(new Path(args[0]))) {
            filesystem.delete(new Path(args[1]), true);
        }

        // 1 获取job对象
        Job job = Job.getInstance (cfg);

        // 2 设置jar包位置,关联mapper和reducer
        job.setJarByClass (KVTextDriver.class);
        job.setMapperClass (KVTextMapper.class);
        job.setReducerClass (KVTextReducer.class);

        // 3 设置map输出kv类型
        job.setMapOutputKeyClass (Text.class);
        job.setMapOutputValueClass (LongWritable.class);

        // 4 设置最终输出kv类型
        job.setOutputKeyClass (Text.class);
        job.setOutputValueClass (LongWritable.class);

        // 5 设置输入输出数据路径
        FileInputFormat.setInputPaths (job, new Path (args[0]));

        // 设置输入格式
        job.setInputFormatClass (KeyValueTextInputFormat.class);

        // 6 设置输出数据路径
        FileOutputFormat.setOutputPath (job, new Path (args[1]));

        // 7 提交job
        job.waitForCompletion (true);
    }
}

(4)项目结构及运行结果
在这里插入图片描述

发布了37 篇原创文章 · 获赞 7 · 访问量 1173

猜你喜欢

转载自blog.csdn.net/zy13765287861/article/details/104688911