MapReduce中的类以及超类:
package wordcount;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
public class WordCount {
public static void main(String[] args) throws IOException {
//设置配置文件
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
//设置输入的路径
Path filein = new Path("dfdd");
FileInputFormat.addInputPath(job, filein );
//设置输出的路径
Path fileout = new Path("kdfslk");
//如果这个输出的路径已经存在,那么就把它删除
if(fileout.getFileSystem(conf).exists(fileout)) {
fileout.getFileSystem(conf).delete(fileout,true);
}
FileOutputFormat.setOutputPath(job, fileout );
//设置mapperclass,需要继承Mapper超类
job.setMapperClass(MyMapper.calss);
//设置map端key的输出类型
job.setMapOutputKeyClass(LongWritable.class);
//设置map端value的输出类型
job.setMapOutputValueClass(Text.class);
//设置map端的预聚合,combiner,需要继承自reducer.class
/**
Multiple markers at this line
The method setCombinerClass(Class<? extends Reducer>) in the type Job is not applicable for the arguments
(Class<MyCombiner>)
*/
job.setCombinerClass(MyCombiner.class);
//设置reduce类,需要继承Reducer类
// Multiple markers at this line
// - The method setReducerClass(Class<? extends Reducer>) in the type Job is not applicable for the arguments
// (Class<MyReducer>)
job.setReducerClass(MyReducer.class);
//设置reduce端key的输出类型
job.setOutputKeyClass(Text.class);
//设置reduce端value的输出类型
job.setOutputValueClass(IntWritable.class);
/**
* 在MapReduce中,需要实现的超类有:
* map extends mymapper.class
* combiner extends myReducer.class
* reducer extends myreducer.calss
* inputformat extends InputFormat
*/
job.setInputFormatClass(MyInputFormat.class);
//继承自:inputFormat
// Multiple markers at this line
// - The method setInputFormatClass(Class<? extends InputFormat>) in the type Job is not applicable for the arguments
// (Class<MyInputFormat>)
// - MyInputFormat cannot be resolved to a type
job.setOutputFormatClass(MyOutputFormat.class);
//继承自:outputFormat
// Multiple markers at this line
// - MyOutputFormat cannot be resolved to a type
// - The method setOutputFormatClass(Class<? extends OutputFormat>) in the type Job is not applicable for the arguments
// (Class<MyOutputFormat>)
job.setPartitionerClass(MyPartition.class);
//设置分区mypartition.class extends Partitioner
// Multiple markers at this line
// - The method setPartitionerClass(Class<? extends Partitioner>) in the type Job is not applicable for the arguments
// (Class<MyPartition>)
// - MyPartition cannot be resolved to a type
job.setSortComparatorClass(MySort.class);
//MySort.class extends rawComparator.class
// Multiple markers at this line
// - MySort cannot be resolved to a type
// - The method setSortComparatorClass(Class<? extends RawComparator>) in the type Job is not applicable for the arguments
// (Class<MySort>)
job.setNumReduceTasks(2);
//设置reduce端task的个数
}
}
在MapReduce中的超类:
/**
* 在MapReduce中有以下几个超类:
* 一:设置mapper:MyMapper extends Mapper
* 二:设置commbiner,也就是map端的预聚合,MyCombiner extends Reducer
* 三:设置reducer,MyReducer extends Reducer
* 四:设置排序规则 setSortComparatorClass,MySort extends RawComparator
* 五:设置分区函数:setPartitionerClass,MyPartitioner extends Partitioner
* 六:设InputFormat extends InputFormat
* 七:设置OutputFormat extends outputFormat
* 此外还可以设置reduce task的个数
* job.setNumReduceTask()
*/