Caso de uso de KeyValueTextInputFormat

Cuente el número de líneas donde la primera palabra de cada línea en el archivo de entrada es la misma.
(1) Datos de entrada
banzhang ni hao
xihuan hadoop banzhang
banzhang ni hao
xihuan hadoop banzhang
(2) Datos de resultados esperados
banzhang 2 xihuan
2

clase de mapa

package KVText;


import java.io.IOException;

//banzhang ni hao

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class KVTextMapper extends Mapper<Text, Text, Text, LongWritable>{
    
    
	
	//设置value
	LongWritable v = new LongWritable(1);
	
	@Override
	protected void map(Text key, Text value, Mapper<Text, Text, Text, LongWritable>.Context context)
			throws IOException, InterruptedException {
    
    
		
			//写出
		context.write(key, v);
	}

}

clase reductora

package KVText;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class KVTextReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
    
    
	
	LongWritable v = new LongWritable();
	
	@Override
protected void reduce(Text key, Iterable<LongWritable> values,
		Context context) throws IOException, InterruptedException {
    
    

		//累计求和
		Long sum = 0L;
		for (LongWritable value : values) {
    
    
			sum += value.get();
		}
		
		v.set(sum);
		
		//写出
		context.write(key, v);
}
}

clase dirver

package KVText;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class KVTextDriver {
    
    
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    
    
	args=new String[] {
    
    "e:input2","e:/output8"};
	//设置切割符
	Configuration conf = new Configuration();
	
	//获取job对象
	Job job = Job.getInstance(conf);
	
	//设置jar包路径
	job.setJarByClass(KVTextDriver.class);
	
	//关联mapper和reduce
	job.setMapperClass(KVTextMapper.class);
	job.setReducerClass(KVTextReducer.class);
	
	//设置map输出kv类型
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(LongWritable.class);
	
	//设置最终输出kv类型
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(LongWritable.class);
	
	//设置输入输出路径
	FileInputFormat.setInputPaths(job, new Path(args[0]));
	
	//设置输入格式
	job.setInputFormatClass(KeyValueTextInputFormat.class);
	
	//设置输出数据路径
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	
	//提交job
	boolean result = job.waitForCompletion(true);
	System.exit(result ? 0:1);
}
}

Supongo que te gusta

Origin blog.csdn.net/weixin_46457946/article/details/114132025
Recomendado
Clasificación