MapReduce 自定义数据类型
需求:
具体实现
(1)创建类实现WritableComparable或者Writable
(2)根据需要定义属性,生成get/set
(3)构造:空参,带参数
(4)序列化和反序列方法实现
(5)比较方法compareTo
(6)toString方法的实现
Map代码:
package com.huadian.bigdata.usertest;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class UserMapper extends Mapper<LongWritable,Text,Text, IntWritable> {
private Text mapOutKey = new Text( );
private final static IntWritable mapOutValue = new IntWritable(1);
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//todo:实现对应业务
String row = value.toString();//行内容hadoop java spring springMvc
String[] strs = row.split( " " );
for (String str:strs) {
mapOutKey.set( str );
//借助context将Map方法结果进行输出
context.write( mapOutKey,mapOutValue );
}
}
}
Reduce代码:
package com.huadian.bigdata.usertest;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class UserReducer extends Reducer<Text, IntWritable,UserTestWritable, NullWritable> {
private UserTestWritable outKey = new UserTestWritable();
private NullWritable outValue = NullWritable.get();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value:values) {
sum += value.get();
}
outKey.setFirstKey(key.toString());
outKey.setSecondKey(sum);
context.write(outKey,outValue);
}
}
自定义数据类型:
package com.huadian.bigdata.usertest;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class UserTestWritable implements WritableComparable<UserTestWritable> {
private String firstKey;
private int secondKey;
public UserTestWritable() {
}
public UserTestWritable(String firstKey, int secondKey) {
this.firstKey = firstKey;
this.secondKey = secondKey;
}
@Override
public int compareTo(UserTestWritable o) {
int comp = this.getFirstKey().compareTo( o.getFirstKey() );
if(comp ==0){
return Integer.valueOf( this.secondKey ).compareTo( Integer.valueOf( o.secondKey ) );
}
return comp;
}
@Override
public void write(DataOutput out) throws IOException {
out.writeUTF(firstKey);
out.writeInt(secondKey);
}
@Override
public void readFields(DataInput in) throws IOException {
this.firstKey = in.readUTF();
this.secondKey = in.readInt();
}
public String getFirstKey() {
return firstKey;
}
public void setFirstKey(String firstKey) {
this.firstKey = firstKey;
}
public int getSecondKey() {
return secondKey;
}
public void setSecondKey(int secondKey) {
this.secondKey = secondKey;
}
@Override
public String toString() {
return firstKey + "--" + secondKey ;
}
}
主代码:
package com.huadian.bigdata.usertest;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class UserMapReducerDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(this.getConf(), "UserMapReducerDriver");
job.setJarByClass(UserMapReducerDriver.class);
Path inputPath = new Path(args[0]);
FileInputFormat.setInputPaths(job,inputPath);
job.setMapperClass(UserMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass( UserReducer.class );
job.setOutputKeyClass( UserTestWritable.class );
job.setOutputValueClass( NullWritable.class );
job.setNumReduceTasks( 2 );
FileSystem hdfs = FileSystem.get(this.getConf());
Path outputPath = new Path(args[1]);
if (hdfs.exists(outputPath)){
hdfs.delete(outputPath,true);
}
FileOutputFormat.setOutputPath(job,outputPath);
boolean isSuccess = job.waitForCompletion( true );
return isSuccess?0:1;
}
public static void main(String[] args) {
Configuration configuration = new Configuration();
try {
int status = ToolRunner.run(configuration, new UserMapReducerDriver(), args);
System.exit(status);
} catch (Exception e) {
e.printStackTrace();
}
}
}
写完达成jar包到linux 的hadoop上运行。
因为使用了 job.setNumReduceTasks( 2 );
他将reduce结果分成了两个。