Count occurrences of words mapreduce

1, the new Java project

2, guide package
E: \ tools \ big data \ big data to enhance data \ 01- software data \ 06-Hadoop \ installer \ Java1.8
compiler environment \ hadoop-2.7.3 \ hadoop-2.7.3 \ share \ hadoop \ MapReduce
+ HSFS of those packages + common

3, write project

3.1 WCMapper

package com.zy.wc;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
//map
    /*
     * 输入<0,"tom lili tom"> 输出<"tom",1>
     * */

//public class WCMapper extends Mapper<KEYIN, VALUEIN, KEYOUT,VALUEOUT>
// 输入的key  long  value  String    输出的  key String  value long类型

@Override            //数字              // String 
protected  void Map (LongWritable Key, the Text value, Mapper <LongWritable, the Text, the Text, LongWritable> .context context)
         throws IOException, InterruptedException {
     // value of the input character string is a line of "Tom Lili Tom"
     // segmentation 
    . String [] = value.toString Split () Split ( "\ T" ); // Tab keys spaced tabs
     for (String name: Split) {
         // Mapper outputting content 
        context.write ( new new the Text (name) , new new LongWritable (. 1 )); 
        
    } 
} 
}

3.2 WCReduce

package com.zy.wc;

import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.WordCount.Reduce;
import org.apache.hadoop.mapreduce.Reducer;

public class WCReduce extends  Reducer<Text,LongWritable,Text,LongWritable>{
    //输入<"tom",{1,1,1,1,1,1,1}>   输出<"tom",7>
           
@Override                 // input key         // input values 
protected  void the reduce (the Text Key, the Iterable <LongWritable> value, 
        the Reducer <the Text, LongWritable, the Text, LongWritable> .context context) throws IOException, InterruptedException {
     // iterative calculation in which the accumulation of 1 value 
    Long SUM = 0 ;
     for (LongWritable LongWritable: value) { 
        SUM + =. 1 ; 
        
    } 
    // key output 
    context.write (Key, new new LongWritable (SUM)); 
} 


}

3.3 WCApp

package com.zy.wc;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WCApp {

    public static void main(String[] args) throws Exception {
        //创建配置对象
        Configuration = the Configuration new new the Configuration ();
         // get job instance 
        the Job job = Job.getInstance (Configuration);
         // specified to run job class 
        job.setJarByClass (WCApp. Class ); 
        
        // specified in the job Mapper 
        job.setMapperClass (WCMapper . class );
         // specified mapper keys and values of the output type 
        job.setMapOutputKeyClass (the Text. class ); 
        job.setMapOutputValueClass (LongWritable. class ); 
        
        // specified in the reducer job 
        . job.setReducerClass (WCReduce class );
        job.setMapOutputKeyClass (the Text. class ); 
        job.setMapOutputValueClass (LongWritable. class ); 
        
        // specified input file 
        FileInputFormat.setInputPaths (Job, new new the Path ( "/ wc.txt" ));
         // the specified output file 
        FileOutputFormat.setOutputPath ( job, new new the Path ( "/ myWCResult" ));
         // submit jobs 
        job.waitForCompletion ( to true ); 
        
    } 

}

4, package upload

The project package (java labeled jar package, web projects labeled as war package), upload it to linux, and then run the jar package hadoop jar WCAPP.jar

 

Guess you like

Origin www.cnblogs.com/qfdy123/p/11403837.html