1, the new Java project
2, guide package
E: \ tools \ big data \ big data to enhance data \ 01- software data \ 06-Hadoop \ installer \ Java1.8
compiler environment \ hadoop-2.7.3 \ hadoop-2.7.3 \ share \ hadoop \ MapReduce
+ HSFS of those packages + common
3, write project
3.1 WCMapper
package com.zy.wc; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ //map /* * 输入<0,"tom lili tom"> 输出<"tom",1> * */ //public class WCMapper extends Mapper<KEYIN, VALUEIN, KEYOUT,VALUEOUT> // 输入的key long value String 输出的 key String value long类型 @Override //数字 // String protected void Map (LongWritable Key, the Text value, Mapper <LongWritable, the Text, the Text, LongWritable> .context context) throws IOException, InterruptedException { // value of the input character string is a line of "Tom Lili Tom" // segmentation . String [] = value.toString Split () Split ( "\ T" ); // Tab keys spaced tabs for (String name: Split) { // Mapper outputting content context.write ( new new the Text (name) , new new LongWritable (. 1 )); } } }
3.2 WCReduce
package com.zy.wc; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.WordCount.Reduce; import org.apache.hadoop.mapreduce.Reducer; public class WCReduce extends Reducer<Text,LongWritable,Text,LongWritable>{ //输入<"tom",{1,1,1,1,1,1,1}> 输出<"tom",7> @Override // input key // input values protected void the reduce (the Text Key, the Iterable <LongWritable> value, the Reducer <the Text, LongWritable, the Text, LongWritable> .context context) throws IOException, InterruptedException { // iterative calculation in which the accumulation of 1 value Long SUM = 0 ; for (LongWritable LongWritable: value) { SUM + =. 1 ; } // key output context.write (Key, new new LongWritable (SUM)); } }
3.3 WCApp
package com.zy.wc; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class WCApp { public static void main(String[] args) throws Exception { //创建配置对象 Configuration = the Configuration new new the Configuration (); // get job instance the Job job = Job.getInstance (Configuration); // specified to run job class job.setJarByClass (WCApp. Class ); // specified in the job Mapper job.setMapperClass (WCMapper . class ); // specified mapper keys and values of the output type job.setMapOutputKeyClass (the Text. class ); job.setMapOutputValueClass (LongWritable. class ); // specified in the reducer job . job.setReducerClass (WCReduce class ); job.setMapOutputKeyClass (the Text. class ); job.setMapOutputValueClass (LongWritable. class ); // specified input file FileInputFormat.setInputPaths (Job, new new the Path ( "/ wc.txt" )); // the specified output file FileOutputFormat.setOutputPath ( job, new new the Path ( "/ myWCResult" )); // submit jobs job.waitForCompletion ( to true ); } }
4, package upload
The project package (java labeled jar package, web projects labeled as war package), upload it to linux, and then run the jar package hadoop jar WCAPP.jar