Windows 下 MapReduce WordCount小案例

(前提配置好Hadoop和环境变量)

第一步:准备jar包   (我是用的Hadoop2.8.0)

第二步:下载相应的winutils  将其复制到Hadoop的bin目录下,另外将Hadoop.ddl 复制到C:\Windows\System32  下

编写代码:

WCMapper
package mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
 * @Author zhaoxin
 * @Email [email protected]
 * @Description //TODO
 * @Date 2018/10/21
 **/
public class WCMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
       Text keyOut=new Text();
       IntWritable valueOut =new IntWritable();
        String[] arr=value.toString().split(" ");
        for (String s:arr){
            keyOut.set(s);
            valueOut.set(1);
            context.write(keyOut,valueOut);
        }
        }
}

WCReducer

package mr;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/**
 * @Author zhaoxin
 * @Email [email protected]
 * @Description //TODO
 * @Date 2018/10/21
 **/
public class WCReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
      int count=0;
       for (IntWritable iw:values){
           count=count+iw.get();
       }
       context.write(key,new IntWritable(count));
    }
}

WCApp

package mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
 * @Author zhaoxin
 * @Email [email protected]
 * @Description //TODO
 * @Date 2018/10/21
 **/
public class WCApp {
    public static void main(String[] args) throws Exception{
        Configuration configuration=new Configuration();
        System.setProperty("hadoop.home.dir", "F:/hadoop-2.8.0");
        configuration.set("fs.defaultFs", "hdfs://192.168.136.128:9000");
//        configuration.set("hadoop.home.dir", "F:/hadoop-2.8.0");
        Job job=Job.getInstance(configuration);
        //设置作业的各种属性
        job.setJobName("WCApp");//设置作业名
        job.setJarByClass(WCApp.class);//设置搜索类
        job.setInputFormatClass(TextInputFormat.class);//设置输入格式
        job.setMapperClass(WCMapper.class);//设置mapper类
        //添加输入路径
        FileInputFormat.addInputPath(job,new Path(args[0]));
        //设置输出路径
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        job.setReducerClass(WCReducer.class);//设置reducer类
        job.setNumReduceTasks(1);//设置reducer 个数

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        job.waitForCompletion(false);
    }
}

猜你喜欢

转载自blog.csdn.net/qq_37668945/article/details/83243325
今日推荐