Hadoop-1.2.1 word count example

package com.bjsxt.mr;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop. io.Text;
import org.apache.hadoop.mapreduce.Mapper;



/**
* word statistics
* @author tingyu
* @date 2016-02-29 00:44
*/

/**
* KEYIN: the bottom of a sentence or word Label
* VALUEIN: The input VALUE is text
* KEYOUT: The output KEY is text
* VALUEOUT: The output is a number
*/
public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
/*
* Each time the map method is called, Pass in a row of data in split (shard), and the key is the subscript position of the row of data in the shard
*
*/
protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException {
String line=value.toString();
StringTokenizer st=new StringTokenizer(line); //The default is to split by spaces
while(st.hasMoreTokens( )){
String world=st.nextToken();
context.write(new Text(world), new IntWritable(1)); //map output
}

};
}








package com.bjsxt.mr;

import org.apache.hadoop .io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


/**
*word count
* @author tingyu
* @date 2016-02-29 00:44
*/
/*
* KEYIN: the output key of the map
* VALUEIN: 即map输出的value
* KEYOUT: 文本
* VALUEOUT: 数值
*/
public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, java.lang.Iterable<IntWritable> iterable, Context context)
throws java.io.IOException ,InterruptedException {
int sum=0;
for(IntWritable val:iterable){
sum+=val.get();
}
context.write(key, new IntWritable(sum));

};
}





package com.bjsxt.mr;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


/**
* word count
* @author tingyu
* @date 2016-02-29 00:44
*/
public class JobRun {
public static void main(String[] args) {
Configuration config=new Configuration();
config.set("mapred.job.tracker", " 192.168.0.200:9001"); //The configuration in hadoop-1.2/conf/mapred-site.xml
config.set("fs.default.name", "hdfs://192.168.0.200:9000");
//If the local Eclipse does not work, you need to set the location of the jar file
//config.set("mapred.jar", "C:\\Users\\tingyu\\Desktop\\hadoop\\wordCount.jar");
try {
Job job=new Job(config,"world count");
job.setJarByClass(JobRun.class);
job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//设置reduceTask的任务数
//job.setNumReduceTasks(2); 

FileInputFormat.addInputPath(job, new Path("/opt/input/wc"));
FileOutputFormat.setOutputPath(job, new Path("/opt/output/wc"));

System.exit(job.waitForCompletion(true)?0:1);

} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

}
}

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=327066783&siteId=291194637