百度面试总结:MapReduce中的超类有哪些

MapReduce中的类以及超类:

package wordcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;

public class WordCount {	

	public static void main(String[] args) throws IOException {
		//设置配置文件
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		//设置输入的路径
		Path filein = new Path("dfdd");
		FileInputFormat.addInputPath(job, filein );
		
		//设置输出的路径
		Path fileout = new Path("kdfslk");
			//如果这个输出的路径已经存在,那么就把它删除
		if(fileout.getFileSystem(conf).exists(fileout)) {
			fileout.getFileSystem(conf).delete(fileout,true);
		}
		FileOutputFormat.setOutputPath(job, fileout );
		
		//设置mapperclass,需要继承Mapper超类
		job.setMapperClass(MyMapper.calss);
		//设置map端key的输出类型
		job.setMapOutputKeyClass(LongWritable.class);
		//设置map端value的输出类型
		job.setMapOutputValueClass(Text.class);
		//设置map端的预聚合,combiner,需要继承自reducer.class
		/**
		 Multiple markers at this line
		 The method setCombinerClass(Class<? extends Reducer>) in the type Job is not applicable for the arguments 
		 (Class<MyCombiner>)
		 */
		job.setCombinerClass(MyCombiner.class);
		//设置reduce类,需要继承Reducer类
//		Multiple markers at this line
//		- The method setReducerClass(Class<? extends Reducer>) in the type Job is not applicable for the arguments 
//		 (Class<MyReducer>)
		job.setReducerClass(MyReducer.class);
		//设置reduce端key的输出类型
		job.setOutputKeyClass(Text.class);
		//设置reduce端value的输出类型
		job.setOutputValueClass(IntWritable.class);
		
		/**
		 * 在MapReduce中,需要实现的超类有:
		 * map extends mymapper.class
		 * combiner extends myReducer.class
		 * reducer extends myreducer.calss
		 * inputformat extends InputFormat
		 */
		job.setInputFormatClass(MyInputFormat.class);		
		//继承自:inputFormat
		
//		Multiple markers at this line
//		- The method setInputFormatClass(Class<? extends InputFormat>) in the type Job is not applicable for the arguments 
//		 (Class<MyInputFormat>)
//		- MyInputFormat cannot be resolved to a type
		job.setOutputFormatClass(MyOutputFormat.class);
		//继承自:outputFormat
//		Multiple markers at this line
//		- MyOutputFormat cannot be resolved to a type
//		- The method setOutputFormatClass(Class<? extends OutputFormat>) in the type Job is not applicable for the arguments 
//		 (Class<MyOutputFormat>)
		
		job.setPartitionerClass(MyPartition.class);
		//设置分区mypartition.class extends  Partitioner
//		Multiple markers at this line
//		- The method setPartitionerClass(Class<? extends Partitioner>) in the type Job is not applicable for the arguments 
//		 (Class<MyPartition>)
//		- MyPartition cannot be resolved to a type
		
		job.setSortComparatorClass(MySort.class);
		//MySort.class extends rawComparator.class
//		Multiple markers at this line
//		- MySort cannot be resolved to a type
//		- The method setSortComparatorClass(Class<? extends RawComparator>) in the type Job is not applicable for the arguments 
//		 (Class<MySort>)
		
		job.setNumReduceTasks(2);
		//设置reduce端task的个数
		
		
	}
}

在MapReduce中的超类:

/**
		 * 在MapReduce中有以下几个超类:
		 * 一:设置mapper:MyMapper extends Mapper
		 * 二:设置commbiner,也就是map端的预聚合,MyCombiner extends Reducer
		 * 三:设置reducer,MyReducer extends Reducer
		 * 四:设置排序规则 setSortComparatorClass,MySort extends RawComparator
		 * 五:设置分区函数:setPartitionerClass,MyPartitioner extends Partitioner
		 * 六:设InputFormat extends InputFormat
		 * 七:设置OutputFormat extends outputFormat
		 * 此外还可以设置reduce task的个数
		 * job.setNumReduceTask()
		 */

猜你喜欢

转载自blog.csdn.net/wyqwilliam/article/details/84671617
今日推荐