根据例WordCount写的一个单词计数器
Map类
package com.wordcount.map; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class Map extends Mapper<Object, Text, Text, IntWritable> { private static Text word = new Text(); private final static IntWritable one = new IntWritable(1); @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { //value是每一行数据 StringTokenizer token = new StringTokenizer(value.toString()); while(token.hasMoreTokens()) { word.set(token.nextToken().toLowerCase()); context.write(word, one); } } }
reduce类
package com.wordcount.reduce; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> { private static IntWritable result = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int total = 0; for (IntWritable value : values) { total += value.get(); } result.set(total); context.write(key, result); } }
配置好eclipse后运行下面类
package com.wordcount.main; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import com.wordcount.map.Map; import com.wordcount.reduce.Reduce; public class WordCount { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] params = new GenericOptionsParser(conf, args). getRemainingArgs(); if(params.length != 2) { System.err.println("params error!"); System.exit(2); } Job job = new Job(conf, "WordCount"); job.setJarByClass(WordCount.class); job.setMapperClass(Map.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(params[0])); FileOutputFormat.setOutputPath(job, new Path(params[1])); System.exit((job.waitForCompletion(true) ? 0 : 1)); } }
文件words
hello hadoop hello hello world
world cup
just do it
it a test
just try
My World
上传 hadoop fs -put .words /data/input
Run Configuration:
Arguments:
hdfs://master:9000/data/input/words hdfs://master:9000/data/output
结果
a 1
cup 1
do 1
hadoop 1
hello 3
it 2
just 2
my 1
test 1
try 1
world 3