版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
数据:
Input:
hello world hello world dog fish
dog fish
hadoop
spark hello world dog fish
hello world spark hello world dog fish
dog fish spark hello world hadoop dog fish spark hello world
hadoop dog fish hello world dog fish hadoop
hadoop hadoop dog fish
spark dog fish hello world
hello world spark dog fish dog fish
Output:
dog 12
fish 12
hello 10
world 10
hadoop 6
spark 6
代码:
package MapReducer05;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.BasicConfigurator;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Set;
import java.util.TreeSet;
public class WordCount {
public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] str = value.toString().split(" ");
for(String s : str){
context.write(new Text(s), new IntWritable(1));
}
}
}
public static class MyReducer extends Reducer<Text, IntWritable, WordWritable, NullWritable>{
Set<WordWritable> set = new TreeSet<WordWritable>();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for(IntWritable iw : values){
count += iw.get();
}
WordWritable ww = new WordWritable();
ww.setWord(key.toString());
ww.setCount(count);
set.add(ww);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for(WordWritable w : set){
context.write(w, NullWritable.get());
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//System.setProperty("hadoop.home.dir", "F:\\hadoop-2.6.4");
Configuration conf = new Configuration();
BasicConfigurator.configure();
Job job = Job.getInstance(conf, "mr");
job.setJarByClass(WordCount.class);
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(WordWritable.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("C:\\Users\\Chen\\Desktop\\input\\wordcount"));
FileOutputFormat.setOutputPath(job, new Path("C:\\Users\\Chen\\Desktop\\20"));
if(job.waitForCompletion(true)){
System.exit(0);
} else{
System.exit(1);
}
}
}
class WordWritable implements WritableComparable<WordWritable>{
private String word;
private int count;
public WordWritable(){}
public WordWritable(String word, int count) {
this.word = word;
this.count = count;
}
public int compareTo(WordWritable o) {
int ff = o.getCount()-this.getCount();
if(ff==0){
return this.getWord().compareTo(o.getWord());
} else{
return ff;
}
}
@Override
public String toString() {
return this.getWord()+" "+this.getCount();
}
public void write(DataOutput out) throws IOException {
out.writeUTF(word);
out.writeInt(count);
}
public void readFields(DataInput in) throws IOException {
this.word = in.readUTF();
this.count = in.readInt();
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
}