Hadoop mapreduce 入门示例详解

wordcout统计词频mapreduce编程

import java.io.IOException;


import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.Text;


import org.apache.hadoop.mapreduce.Mapper;
public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{

@Override

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

       //接收数据v1

       String line = value.toString();

       //切分数据

        String[] words = line.split(" ");

      //循环遍历

     for(String w : words){

        //出现一次记一个1,让后发送数据
   
    context.write(new Text(w), new LongWritable(1));
}

}




}

import java.io.IOException;import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.Text;


import org.apache.hadoop.mapreduce.Reducer;


public class WordRed extends Reducer<Text,LongWritable,Text,LongWritable>{

@Override

protected void reduce(Text key, Iterable<LongWritable>values,Context context)

throws IOException, InterruptedException {

//定义一个计数器

long counter = 0;

//循环累加

for(LongWritable l : values){

counter += l.get();
}

//将累加到的数据进行输出

context.write(key, new LongWritable(counter));
}
}












import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;


import org.apache.hadoop.io.LongWritable;


import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;


import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;


import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;




public class WordCount {

public static void main(String[] args) throws Exception {

//加载配置文件

Configuration conf = new Configuration();

//构建job对象

Job job = Job.getInstance(conf);

//设置main方法所在的类

job.setJarByClass(WordCount.class);
//设置mapper相关属性

job.setMapperClass(WCMapper.class);
//map拆分部分
job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(LongWritable.class);

FileInputFormat.setInputPaths(job, new Path("hdfs://snow521:9000/user/snow/input0/words.txt"));  //设置输入路径
//注意第一次这里报错说input path not extits words.txt 原因是因为拷贝的代码为new Path(/words.txt);表示已经写死,所以我改了下,结果编译正确,但是却没有找到
output0,查找后面代码发现 setOutputPath(job,new Path(/wcout));这个是在Hdfs的/目录下,于是去找,结果找到并输出结果。记住:input可以有多个,但是output只能有一个,也要记住,input output只是起的输出输入名字,你可以起不同的名字,不一定非要是input output ,如果 同一个代码,编译两次就会出错,说output path 已经存在输出文件, 此时,你可以删除,再编译。


//设置reducer相关属性

job.setReducerClass(WCReducer.class);
.//reduce 类来进行map产生中间结果合并,避免给网络数据传输产生压力
job.setOutputKeyClass(Text.class);     //设置job输出中的key和value 数据类型,因为结果是《单词,个数》所以key值设置为Text类型,相当于Java中的的string类型,value设置为IntWritable相当于Java中的int类型。
job.setOutputValueClass(LongWritable.class);

FileOutputFormat.setOutputPath(job, new Path("hdfs://snow521:9000/user/snow/output6/wcout"));
//设置输出路径。

//提交

job.waitForCompletion(true);
}


}



希望对入门像我一样的菜鸟有所帮助

猜你喜欢

转载自blog.csdn.net/qq_38558834/article/details/77964381