需求:将数据按照某一列分为两类
数据格式:
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-Btt73zcN-1581055525555)(C:\Users\acer\Desktop\大数据系列笔记\7\1581048781109.png)]
Step 1. 定义 Mapper
这个 Mapper 程序不做任何逻辑, 也不对 Key-Value 做任何改变, 只是接收数据, 然后往下发送
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/*
K1:行偏移量,LongWritable
V1:行文本数据,Text
K2:行文本数据,Text
V2:占位符,NullWritable
*/
public class MyMapper extends Mapper<LongWritable, Text,Text, NullWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws InterruptedException, IOException {
context.write(value,NullWritable.get());
}
}
Step 2. 自定义 Partitioner
主要的逻辑就在这里, 这也是这个案例的意义, 通过 Partitioner 将数据分发给不同的 Reducer
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class MyPartitoner extends Partitioner<Text,LongWritable> {
@Override
/*
定义分区规则
返回对应分区编号
*/
public int getPartition(Text text, NullWritable nullWritable, int i) {
// 拆分文本数据(K2),获取中奖信息这一列
String[] split = text.toString().split("\t");
// 指定分区规则,中奖信息大于15的为一列,小于15的为一列
if(Integer.parseInt(split[5]) > 15){
return 1;
}else{
return 0;
}
}
}
Step 3. 定义 Reducer 逻辑
这个 Reducer 也不做任何处理, 将数据原封不动的输出即可
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/*
K2:行文本数据,Text
V2:占位符,NullWritable
K3:行文本数据,Text
V3:占位符,NullWritable
*/
public class MyReducer extends Reducer<Text, NullWritable,Text,NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws InterruptedException, IOException {
context.write(key,NullWritable.get());
}
}
Step 4. Main 入口
public class PartitionMain extends Configured implements Tool {
public static void main(String[] args) throws Exception{
int run = ToolRunner.run(new Configuration(), new PartitionMain(), args);
System.exit(run);
}
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(super.getConf(), PartitionMain.class.getSimpleName());
job.setJarByClass(PartitionMain.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextInputFormat.addInputPath(job,new Path("hdfs://192.168.52.250:8020/partitioner"));
TextOutputFormat.setOutputPath(job,new Path("hdfs://192.168.52.250:8020/outpartition"));
job.setMapperClass(MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setReducerClass(MyReducer.class);
/**
* 设置我们的分区类,以及我们的reducetask的个数,注意reduceTask的个数一定要与我们的
* 分区数保持一致
*/
job.setPartitionerClass(MyPartitioner.class);
job.setNumReduceTasks(2);
boolean b = job.waitForCompletion(true);
return b?0:1;
}
}