[Read] WordCount example hadoop

Introduction: When just beginning to see the contents inside the map and reduce class today, to be honest a confused, who are you ?, and finally there is no way, the B station to see someone else's commentary video, plus wrapped himself go online to check the java interpretation, finally WordCount example to understand, ready to write back again! Honestly, it really does not move the liver, and every night only a little time to learn the code Tieshanglai sleep!

Body: really do not want to write too much to explain in comments inside the code, give me a break!

Stick speak of a good URL: https: //www.cnblogs.com/houji/p/7161468.html

code show as below:

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.hadoop;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
Import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 Import org.apache.hadoop.util.GenericOptionsParser; 

public  class the WordCount { // the WordCount class is name, use public class is modified, java program by the class (class) composition, a source file can contain multiple classes 

  public  static  class TokenizerMapper 
        the extends Mapper <Object, the Text, the Text, IntWritable> {
 / ** * 
               Mapper <the KEYIN, VALUEIN, KEYOUT, VALUEOUT> 
                      row offset input value the output value of the output key 
** * / 
    
    Private  Final  staticOne = IntWritable new new IntWritable (. 1 );
     Private the Text Word = new new the Text (); 
      
    public  void Map (Object Key, the Text value, the Context context 
                    ) throws IOException, InterruptedException { 
      StringTokenizer ITR = new new StringTokenizer (value.toString ()); // value.toString () acquires the input value, and separated by StringTokenizer (space by default) 
      the while (itr.hasMoreTokens ()) { // determines whether there itr string, returns true or to false 
        word.set (itr.nextToken ()) ; // SET to a method of assigning word, nextToken () returns the next token 
        context.write (word, One); // output < 'word', 1>
      } 
    } 
  } 
  
  Public  static  class IntSumReducer 
        the extends the Reducer <the Text, IntWritable, the Text, IntWritable> {
     Private IntWritable Result = new new IntWritable (); 

    public  void the reduce (the Text Key, the Iterable <IntWritable> values,   // values which stored data map output format 'Word list <1,1,1,1,1>' 
                       the context context 
                       ) throws IOException, InterruptedException { 

      int SUM = 0; // custom a counter 
      for (IntWritable Val: values) { // cycle list which the value 
        sum + = val.get (); // sum 
      } 
      result.set (SUM); // assigned to Result 
      context.write (Key, Result); 
    } 
  } 

  public  static  void main (String [] args) throws Exception { / / . 1, the Java program entry, public static void main (String [ ] args) {} is a fixed usage, public static void are keywords. 2, throws: declare an exception may be thrown 
    / ** * 
    the Create the Job A new new 
    ** * / 
    the Configuration the conf = new new the Configuration ();   // instantiate the Configuration, reads the configuration information Hadoop 
    String [] = otherArgs new new GenericOptionsParser ( the conf, args) .getRemainingArgs (); // read the argument fill address information Hadoop  
     IF (otherArgs.length <2) { // if the address filled in less than 2, and outputs an error "Usage: wordcount <in > [<in> ...] <OUT> " 
      System.err.println (" the Usage: WordCount <in> [<in> ...] <OUT> " ); 
      System.exit ( 2 ); 
    } 
    the Job Job Job.getInstance = (the conf, "Word COUNT"); // singleton the getInstance (), called the main function begins to return an instance of the object that is static, and retains it in memory referenced 
    job .setJarByClass (the WordCount. class );
     // set Map (split) the Job processing, Combiner (intermediate merge results) and Reduce (combined) correlation process based 
    job.setMapperClass (TokenizerMapper.class);
    job.setCombinerClass (IntSumReducer. class ); 
    Job.setReducerClass (IntSumReducer.class );
     // set the job output <key, value> in the key and value data types 
    job.setOutputKeyClass (the Text. class ); 
    . job.setOutputValueClass (IntWritable class );
     / ** * 
    call addInputPath () and setOutputPath ( ) O path is provided opposite 
    the InputFormat () method is used to generate a map for the processing of <key, value> pairs 
    ** * / 
    for ( int I = 0; I <otherArgs.length -. 1; ++ I) { 
      FileInputFormat.addInputPath (Job, new new the Path (otherArgs [I])); 
    } 
    FileOutputFormat.setOutputPath (Job, 
      new new the Path (otherArgs [otherArgs.length -. 1 ]));
    System.exit(job.waitForCompletion(true) ? 0 : 1); //运行job
  }
}

 

Guess you like

Origin www.cnblogs.com/CQ-LQJ/p/11504072.html