Mapreduce's wordcount word frequency statistics

1. Description of requirements

1. Input file icon

insert image description here

2. Demand

Count the number of occurrences of each letter in the data file, output in the form of letter-times, for example (a 14).

2. Code implementation

1. Writing ideas

In the map stage, each line of data separated by spaces is read and output to the reduce stage in the form of (letter, 1). Before the output, the bottom layer will implement the partition function, divide the results with the same letter into a range, and pass it to the reduce stage , count the number of occurrences of letters in each partition in the reduce phase. Finally output the result to the specified folder.

2. Code

Mapper:

package com.worldcount.zqd;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
    
    
    Text k = new Text();
    LongWritable l = new LongWritable();
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
    
        String line = value.toString(); //读取一行数据
        String[] words = StringUtils.split(line, " ");  //利用工具类对读取的数据的空格进行切割,返回数组
        for (String word:words) {
    
    
            k.set(word);    // 获取word
            l.set(1);       // 默认为1
            context.write(k, l);    //写入键值对
        }

    }
}

Reducer:

package com.worldcount.zqd;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;

public class WCreducer extends Reducer<Text, LongWritable, Text, LongWritable> {
    
    

    LongWritable l = new LongWritable();

    @Override
    protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
    
    
        long count = 0;     //计数变量
        // 增强for拿到分区的所有value值, 进行累加
        for (LongWritable value : values) {
    
    
            count += value.get();
        }
        l.set(count);
        context.write(key, l);
    }
}

Main program:

package com.worldcount.zqd;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class WCrunner {
    
    
    public static void main(String[] args) throws Exception {
    
    
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);
        job.setJarByClass(WCrunner.class);
        job.setMapperClass(WCMapper.class);
        job.setReducerClass(WCreducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        FileInputFormat.setInputPaths(job, new Path("C:\\Users\\Lenovo\\Desktop\\hadoop_mr\\wc_input"));
        FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Lenovo\\Desktop\\hadoop_mr\\wc_out"));
        job.waitForCompletion(true);
    }
}

The result is as follows:
insert image description here

Guess you like

Origin blog.csdn.net/me_1984/article/details/109005346