KeyValueTextInputFormat use case

Count the number of lines where the first word of each line in the input file is the same.
(1) Input data
banzhang ni hao
xihuan hadoop banzhang
banzhang ni hao
xihuan hadoop banzhang
(2) Expected result data
banzhang 2
xihuan 2

map class

package KVText;


import java.io.IOException;

//banzhang ni hao

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class KVTextMapper extends Mapper<Text, Text, Text, LongWritable>{
    
    
	
	//设置value
	LongWritable v = new LongWritable(1);
	
	@Override
	protected void map(Text key, Text value, Mapper<Text, Text, Text, LongWritable>.Context context)
			throws IOException, InterruptedException {
    
    
		
			//写出
		context.write(key, v);
	}

}

reducer class

package KVText;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class KVTextReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
    
    
	
	LongWritable v = new LongWritable();
	
	@Override
protected void reduce(Text key, Iterable<LongWritable> values,
		Context context) throws IOException, InterruptedException {
    
    

		//累计求和
		Long sum = 0L;
		for (LongWritable value : values) {
    
    
			sum += value.get();
		}
		
		v.set(sum);
		
		//写出
		context.write(key, v);
}
}

dirver class

package KVText;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class KVTextDriver {
    
    
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    
	args=new String[] {
    
    "e:input2","e:/output8"};
	//设置切割符
	Configuration conf = new Configuration();
	
	//获取job对象
	Job job = Job.getInstance(conf);
	
	//设置jar包路径
	job.setJarByClass(KVTextDriver.class);
	
	//关联mapper和reduce
	job.setMapperClass(KVTextMapper.class);
	job.setReducerClass(KVTextReducer.class);
	
	//设置map输出kv类型
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(LongWritable.class);
	
	//设置最终输出kv类型
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);
	
	//设置输入输出路径
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	
	//设置输入格式
	job.setInputFormatClass(KeyValueTextInputFormat.class);
	
	//设置输出数据路径
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	
	//提交job
	boolean result = job.waitForCompletion(true);
	System.exit(result ? 0:1);
}
}

Guess you like

Origin blog.csdn.net/weixin_46457946/article/details/114132025