Introduction: When just beginning to see the contents inside the map and reduce class today, to be honest a confused, who are you ?, and finally there is no way, the B station to see someone else's commentary video, plus wrapped himself go online to check the java interpretation, finally WordCount example to understand, ready to write back again! Honestly, it really does not move the liver, and every night only a little time to learn the code Tieshanglai sleep!
Body: really do not want to write too much to explain in comments inside the code, give me a break!
Stick speak of a good URL: https: //www.cnblogs.com/houji/p/7161468.html
code show as below:
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hadoop; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; Import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; Import org.apache.hadoop.util.GenericOptionsParser; public class the WordCount { // the WordCount class is name, use public class is modified, java program by the class (class) composition, a source file can contain multiple classes public static class TokenizerMapper the extends Mapper <Object, the Text, the Text, IntWritable> { / ** * Mapper <the KEYIN, VALUEIN, KEYOUT, VALUEOUT> row offset input value the output value of the output key ** * / Private Final staticOne = IntWritable new new IntWritable (. 1 ); Private the Text Word = new new the Text (); public void Map (Object Key, the Text value, the Context context ) throws IOException, InterruptedException { StringTokenizer ITR = new new StringTokenizer (value.toString ()); // value.toString () acquires the input value, and separated by StringTokenizer (space by default) the while (itr.hasMoreTokens ()) { // determines whether there itr string, returns true or to false word.set (itr.nextToken ()) ; // SET to a method of assigning word, nextToken () returns the next token context.write (word, One); // output < 'word', 1> } } } Public static class IntSumReducer the extends the Reducer <the Text, IntWritable, the Text, IntWritable> { Private IntWritable Result = new new IntWritable (); public void the reduce (the Text Key, the Iterable <IntWritable> values, // values which stored data map output format 'Word list <1,1,1,1,1>' the context context ) throws IOException, InterruptedException { int SUM = 0; // custom a counter for (IntWritable Val: values) { // cycle list which the value sum + = val.get (); // sum } result.set (SUM); // assigned to Result context.write (Key, Result); } } public static void main (String [] args) throws Exception { / / . 1, the Java program entry, public static void main (String [ ] args) {} is a fixed usage, public static void are keywords. 2, throws: declare an exception may be thrown / ** * the Create the Job A new new ** * / the Configuration the conf = new new the Configuration (); // instantiate the Configuration, reads the configuration information Hadoop String [] = otherArgs new new GenericOptionsParser ( the conf, args) .getRemainingArgs (); // read the argument fill address information Hadoop IF (otherArgs.length <2) { // if the address filled in less than 2, and outputs an error "Usage: wordcount <in > [<in> ...] <OUT> " System.err.println (" the Usage: WordCount <in> [<in> ...] <OUT> " ); System.exit ( 2 ); } the Job Job Job.getInstance = (the conf, "Word COUNT"); // singleton the getInstance (), called the main function begins to return an instance of the object that is static, and retains it in memory referenced job .setJarByClass (the WordCount. class ); // set Map (split) the Job processing, Combiner (intermediate merge results) and Reduce (combined) correlation process based job.setMapperClass (TokenizerMapper.class); job.setCombinerClass (IntSumReducer. class ); Job.setReducerClass (IntSumReducer.class ); // set the job output <key, value> in the key and value data types job.setOutputKeyClass (the Text. class ); . job.setOutputValueClass (IntWritable class ); / ** * call addInputPath () and setOutputPath ( ) O path is provided opposite the InputFormat () method is used to generate a map for the processing of <key, value> pairs ** * / for ( int I = 0; I <otherArgs.length -. 1; ++ I) { FileInputFormat.addInputPath (Job, new new the Path (otherArgs [I])); } FileOutputFormat.setOutputPath (Job, new new the Path (otherArgs [otherArgs.length -. 1 ])); System.exit(job.waitForCompletion(true) ? 0 : 1); //运行job } }