1.驱动类PutInDcustomerJob.java
package com.chinalife.distributable.mergedcid.putIndcustomer;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.mapreduce.Job;
import com.chinalife.distributable.util.CustomKeyOut;
import com.chinalife.distributable.util.CustomPC;
import com.chinalife.distributable.util.Util;
public class PutInDcustomerJob {
public static final String TABLE_TEMP = "distributable:dcust_temporary";
public static final String TABLE_DCUST = "distributable:dcustomer";
public static final String TABLE_INDEX = "distributable:dcustomer_index";
public static final String TABLE_STD = "distributable:dcust_std";
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
//添加zookeeper节点
conf.set("hbase.zookeeper.quorum", "weekend05:2181,weekend06:2181,weekend07:2181");
//在configuration中添加属性,可以传递给map阶段使用
conf.set("dcust_temporary", TABLE_TEMP);//不完整字段暂存表
conf.set("dcustomer", TABLE_DCUST);//最终客户表
conf.set("dcustomer_index", TABLE_INDEX);//索引表
conf.set("dcust_std", TABLE_STD);//源表
String jobName = new String("PutInDcustomerJob");
//得到mapredece的job
Job job = new Job(conf, jobName);
job.setJarByClass(PutInDcustomerJob.class);
// 获取开始和结束的时间戳
String start_stamp = Util.getLogicTimeFromHbase("distributable:dcustomer", "PutInDcusomerJobStamp");
String end_stamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
ArrayList<Scan> scanList = new ArrayList<Scan>();
//添加源表数据源
Scan scan1 = new Scan();
scan1.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, TABLE_STD.getBytes());
scan1.setFilter(Util.getIncrFilterByStamp(start_stamp,end_stamp,"di") );
scan1.setCaching(500);
scan1.setCacheBlocks(false);
scanList.add(scan1);
//添加索引表数据源
Scan scan2 = new Scan();
scan2.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, TABLE_INDEX.getBytes());
scan2.setCaching(500);
scan2.setCacheBlocks(false);
scanList.add(scan2);
//如果多表联合扫描调用scanlist参数的方法
TableMapReduceUtil.initTableMapperJob(scanList,PutInDcustomerMapper.class,CustomKeyOut.class,MapWritable.class,job);
TableMapReduceUtil.initTableReducerJob("", PutInDcustomerReducer.class, job);
//因为最终结果写到多个表里,所以设置输出属性
job.setOutputFormatClass(MultiTableOutputFormat.class);
job.setNumReduceTasks(200);
//mapredece的二次排序
//自定义的分区
job.setPartitionerClass(CustomPC.CustomPartitioner.class);
//自定义的分组
job.setGroupingComparatorClass(CustomPC.CustomCombiner.class);
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}
2.PutInDcustomerMapper.java
package com.chinalife.distributable.mergedcid.putIndcustomer;
import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import com.chinalife.distributable.mergedcid.TempUtil;
import com.chinalife.distributable.util.CustomKeyOut;
import com.chinalife.distributable.util.idCheck;
public class PutInDcustomerMapper extends TableMapper<CustomKeyOut, MapWritable> {
/**
* map(keyout,valueout)(索引,找到的dcid/源表数据)
*/
@Override
protected void map(
ImmutableBytesWritable key,
Result value,
Context context)
throws IOException, InterruptedException {
CustomKeyOut keyOut;
MapWritable mapWritable = new MapWritable();
//区别数据来源
boolean containsColumn = value.containsColumn("ci".getBytes(), "dcid".getBytes());
if(containsColumn){
//来自distributable:dcustomer_index
keyOut = new CustomKeyOut(new String(key.get()).trim(),0);
String dcidStr = new String(value.getValue("ci".getBytes(), "dcid".getBytes())).trim();
mapWritable.put(new Text("old_dcid"), new Text(dcidStr));
}else{
//来自distributable:dcust_std
mapWritable = getCustHashMap(value);
//给源表数据添加rowkey字段
mapWritable.put(new Text("src_rowKey"), new Text(key.get()));
String indexStr = getIndex(mapWritable);
if(indexStr != null){
indexStr = indexStr.trim();
keyOut = new CustomKeyOut(indexStr,1);
if(indexStr.equals("surplusField")){
keyOut = new CustomKeyOut(indexStr,2);
}
}else{
//如果字段不全标记放入暂存库
keyOut = new CustomKeyOut("imperfectField",3);
}
}
context.write(keyOut, mapWritable);
}
/**
* 将一条distributable:dcust_std数据封装成一个MapWritable
* @param value
* @return
*/
public MapWritable getCustHashMap(Result value){
MapWritable custHashMap = new MapWritable();
for(Cell cell:value.rawCells())
{
Text cellKey = new Text(new String(CellUtil.cloneQualifier(cell)).trim());
Text cellValue = new Text(new String(CellUtil.cloneValue(cell)).trim());
if(TempUtil.stringIsNullOrEmpty(cellKey)&& TempUtil.stringIsNullOrEmpty(cellValue))
{
custHashMap.put(cellKey, cellValue);
}
}
return custHashMap;
}
/**
* 根据字段和索引优先级选出一个索引
* @param custHashMap
* @return
*/
public static String getIndex(MapWritable custHashMap) {
String name = custHashMap.get(new Text("name"))==null?"":custHashMap.get(new Text("name")).toString().trim();
String gender = custHashMap.get(new Text("gender"))==null?"":custHashMap.get(new Text("gender")).toString().trim();
String birthday = custHashMap.get(new Text("birthday"))==null?"":custHashMap.get(new Text("birthday")).toString().trim();
String idNo = custHashMap.get(new Text("idNo"))==null?"":custHashMap.get(new Text("idNo")).toString().trim();
String passportNo = custHashMap.get(new Text("passportNo"))==null?"":custHashMap.get(new Text("passportNo")).toString().trim();
String officerNo = custHashMap.get(new Text("officerNo"))==null?"":custHashMap.get(new Text("officerNo")).toString().trim();
String inCNNo = custHashMap.get(new Text("inCNNo"))==null?"":custHashMap.get(new Text("inCNNo")).toString().trim();
String HKNo = custHashMap.get(new Text("HKNo"))==null?"":custHashMap.get(new Text("HKNo")).toString().trim();
String TaiWanNo = custHashMap.get(new Text("TaiWanNo"))==null?"":custHashMap.get(new Text("TaiWanNo")).toString().trim();
String otherNo = custHashMap.get(new Text("otherNo"))==null?"":custHashMap.get(new Text("otherNo")).toString().trim();
String mobileNo = custHashMap.get(new Text("mobileNo"))==null?"":custHashMap.get(new Text("mobileNo")).toString().trim();
String wechatNo = custHashMap.get(new Text("wechatNo"))==null?"":custHashMap.get(new Text("wechatNo")).toString().trim();
String email = custHashMap.get(new Text("email"))==null?"":custHashMap.get(new Text("email")).toString().trim();
String qqNo = custHashMap.get(new Text("QQ"))==null?"":custHashMap.get(new Text("QQ")).toString().trim();
String otherContact = custHashMap.get(new Text("otherContact"))==null?"":custHashMap.get(new Text("otherContact")).toString().trim();
//姓名+身份证号
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(idNo)) {
return reverse(idNo) +"~I~"+ name;
}
//姓名+护照号
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(passportNo)) {
return reverse(passportNo) +"~P~"+ name;
}
//姓名+性别+生日+(一个证件号)
// if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
// &&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(idNo)) {
// return reverse(idNo) +"~I~"+ name +"~"+ gender +"~"+ birthday;
// }
// if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
// &&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(passportNo)) {
// return reverse(passportNo) +"~P~"+ name +"~"+ gender +"~"+ birthday;
// }
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
&&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(officerNo)) {
return reverse(officerNo) +"~S~"+ name +"~"+ gender +"~"+ birthday;
}
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
&&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(inCNNo)) {
return reverse(inCNNo) +"~R~"+ name +"~"+ gender +"~"+ birthday;
}
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
&&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(HKNo)) {
return reverse(HKNo) +"~G~"+ name +"~"+ gender +"~"+ birthday;
}
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
&&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty( TaiWanNo)) {
return reverse(TaiWanNo) +"~W~"+ name +"~"+ gender +"~"+ birthday;
}
// if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(gender)
// &&TempUtil.stringIsNullOrEmpty(birthday)&&TempUtil.stringIsNullOrEmpty(otherNo)) {
// return reverse(otherNo) +"~O~"+ name +"~"+ gender +"~"+ birthday;
// }
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(otherNo)) {
if (idCheck.isValidatedAllIdcard(otherNo)) {
return reverse(otherNo) +"~I~"+ name;
} else if (TempUtil.stringIsNullOrEmpty(gender) && TempUtil.stringIsNullOrEmpty(birthday)){
return reverse(otherNo) +"~O~"+ name +"~"+ gender +"~"+ birthday;
}
}
//姓名+手机号
if (TempUtil.stringIsNullOrEmpty(name)&&TempUtil.stringIsNullOrEmpty(mobileNo)) {
return reverse(mobileNo) +"~"+name;
}
//剩余字段
if( surplusField(name, wechatNo) ||
surplusField(name, email) ||
surplusField(name, qqNo) ||
surplusField(name, otherContact)){
return "surplusField";
}
return null;
}
/**
* 剩余字段但不符合索引生成规则
* @return
*/
public static boolean surplusField(String name ,String contactNo){
if (TempUtil.stringIsNullOrEmpty(name)
&&TempUtil.stringIsNullOrEmpty(contactNo)) {
return true;
} else {
return false;
}
}
// 翻转一个字符串
public static String reverse(String str) {
char[] org = str.toCharArray();
char[] newChar = new char[org.length];
int num = 1;
for (char c : org) {
newChar[org.length - num] = c;
num++;
}
String newString = new String(newChar);
return newString;
}
}
3.PutInDcustomerReducer.java
package com.chinalife.distributable.mergedcid.putIndcustomer;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import com.chinalife.distributable.util.CustomKeyOut;
public class PutInDcustomerReducer extends TableReducer<CustomKeyOut, MapWritable, ImmutableBytesWritable> {
private static int seq=0;
private static String taskID = null;
@Override
protected void setup( Context context) throws IOException, InterruptedException {
taskID = context.getTaskAttemptID().toString().split("_")[4];
taskID = taskID.substring(taskID.length()-3, taskID.length());
}
/**
* 1.字段不完整的数据放入暂存库
* 2.判断是否匹配到了dcid
* 2.1匹配到了dcid,直接追加dcid字段放入dcustomer表中
* 2.2没有匹配到dcid,生成新的dcid字段再放入到的dcustomer表中
* 2.2.1新生成的dcid和index添加到distributable:dcustomer_index表中
* 3.将dcid回写到distributable:dcust_std表
*/
@Override
protected void reduce(CustomKeyOut index,Iterable<MapWritable> mapWritables, Context context) throws IOException, InterruptedException {
String dcust_temporary = context.getConfiguration().get("dcust_temporary");
String dcustomer = context.getConfiguration().get("dcustomer");
String dcustomer_index = context.getConfiguration().get("dcustomer_index");
String dcust_std = context.getConfiguration().get("dcust_std");
//索引字符串
String indexStr = new String(index.getKeyOut().trim());
if(indexStr.contains("surplusField")){
//将不需要归并的数据生成唯一dcid存储,即不符合索引规则
//---------------
for(MapWritable mapWritable:mapWritables){
//获取Dcid
String newDcid = getNewDcid();
//插入到的dcustomer表中
ImmutableBytesWritable dcustomer_Immu = new ImmutableBytesWritable();
dcustomer_Immu.set(Bytes.toBytes(dcustomer));
Put dcustomerPut = getDcustomerPut(mapWritable, newDcid);
context.write(dcustomer_Immu, dcustomerPut);
}
//---------------
}else if(indexStr.contains("imperfectField")){
//1.将字段不完整的数据放入暂存库
ImmutableBytesWritable std_temp = new ImmutableBytesWritable();
std_temp.set(Bytes.toBytes(dcust_temporary));
Put src_rowData = null;
for(MapWritable mapWritable : mapWritables){
String src_rowKey = mapWritable.get(new Text("src_rowKey")).toString();
//源表主键
src_rowData = new Put(src_rowKey.getBytes());
//添加其他字段
Set<Writable> keySet = mapWritable.keySet();
for(Writable key : keySet){
if(!key.equals(new Text("src_rowKey"))){
String value = mapWritable.get(key).toString();
src_rowData.add("ci".getBytes(), key.toString().getBytes(), value.getBytes());
}
}
src_rowData.add("ci".getBytes(), "hstamp".getBytes(), getStringDate().getBytes());
//写入暂存库
//System.out.println("distributable:std_imperfectField_temp:"+src_rowKey);
context.write(std_temp, src_rowData);
}
}else{
//字段完整的数据
String old_dcid = null;
boolean generateFlag = false; //是否新生成dcid标识位
HashMap<String, String> srcMap = null;
Iterator<MapWritable> iterator = mapWritables.iterator();
//取第一个元素,
MapWritable mapWritable = iterator.next();
//判断是否匹配到了dcid
Writable dcidWritable = mapWritable.get(new Text("old_dcid"));
if(dcidWritable == null){
//没有匹配到dcid,生成新的dcid
old_dcid = getNewDcid();
generateFlag = true;
//新生成的dcid和index添加到distributable:dcustomer_index表中
ImmutableBytesWritable index_dcid = new ImmutableBytesWritable();
index_dcid.set(Bytes.toBytes(dcustomer_index));
Put index_dcid_put = new Put(indexStr.getBytes());
index_dcid_put.add("ci".getBytes(), "dcid".getBytes(), old_dcid.getBytes());
index_dcid_put.add("ci".getBytes(), "hstamp".getBytes(),getStringDate().getBytes() );
context.write(index_dcid, index_dcid_put);
//放入到的dcustomer表中------
ImmutableBytesWritable dcustomer_Immu = new ImmutableBytesWritable();
dcustomer_Immu.set(Bytes.toBytes(dcustomer));
Put dcustomerPut = getDcustomerPut(mapWritable, old_dcid);
context.write(dcustomer_Immu, dcustomerPut);
} else {
//记录匹配到的dcid
old_dcid = dcidWritable.toString();
}
//遍历剩下的元素
while(iterator.hasNext()){
mapWritable = iterator.next();
//如果是新生成的dcid
if (generateFlag == true) {
if (mapWritable.get(new Text("src_rowKey")) != null) {
//插入到dcustomer表
ImmutableBytesWritable dcustomer_Immu = new ImmutableBytesWritable();
dcustomer_Immu.set(Bytes.toBytes(dcustomer));
Put dcustomerPut = getDcustomerPut(mapWritable, old_dcid);
context.write(dcustomer_Immu, dcustomerPut);
} else {
System.out.println("-----------------Wrong Data-------------------");
for (Entry en : mapWritable.entrySet()) {
System.out.println(en.getKey().toString() +" " +en.getValue().toString());
}
System.out.println("The src_rowkey is :" + mapWritable.get(new Text("src_rowKey")) + "--------" + indexStr);
System.out.println("----------------------------------------------------");
}
} else { //如果是通过归并得来的dcid
if (mapWritable.get(new Text("src_rowKey")) != null) {
//插入到dcustomer表
ImmutableBytesWritable dcustomer_Immu = new ImmutableBytesWritable();
dcustomer_Immu.set(Bytes.toBytes(dcustomer));
String srcRowKey = mapWritable.get(new Text("src_rowKey")).toString();
String partyId = null;
if (mapWritable.get(new Text("party_id")) != null) {
partyId = mapWritable.get(new Text("party_id")).toString();
}
String dcustomerRowKey = old_dcid+"~"+srcRowKey;
Put dcustomerPut = new Put(dcustomerRowKey.getBytes());
Set<Writable> keySet = mapWritable.keySet();
for(Writable key : keySet){
if(!key.equals(new Text("src_rowKey"))){
String value = mapWritable.get(key).toString();
dcustomerPut.add("ci".getBytes(), key.toString().getBytes(), value.getBytes());
}
}
//如果存在party_id,则插入老客户标识位;//如果不存在party_id,则插入新准客户标识位和准客户标识位
// if (partyId != null && !partyId.equals("")) {
// dcustomerPut.add("ci".getBytes(), "partyflag".getBytes(), "1".getBytes());
// } else {
// dcustomerPut.add("ci".getBytes(), "dcidflag".getBytes(), "1".getBytes());
// }
dcustomerPut.add("ci".getBytes(), "hstamp".getBytes(),getStringDate().getBytes() );
//复位地址解析标识位
dcustomerPut.add("ci".getBytes(), "analyzeflag".getBytes(),"N".getBytes());
context.write(dcustomer_Immu, dcustomerPut);
} else {
System.out.println("-----------------Wrong Data-------------------");
for (Entry en : mapWritable.entrySet()) {
System.out.println(en.getKey().toString() +" " +en.getValue().toString());
}
System.out.println("The src_rowkey is :" + mapWritable.get(new Text("src_rowKey")) + "--------" + indexStr);
System.out.println("----------------------------------------------------");
}
}
}
}
}
/**
* 为新生成dcid的数据,根据Dcid和Map,生成Dcustomer的Put对象
* @return Put
*/
private Put getDcustomerPut(MapWritable mapWritable, String newDcid) {
String srcRowKey = mapWritable.get(new Text("src_rowKey")).toString();
String partyId = null;
if (mapWritable.get(new Text("party_id")) != null) {
partyId = mapWritable.get(new Text("party_id")).toString();
}
String dcustomerRowKey = newDcid+"~"+srcRowKey;
Put dcustomerPut = new Put(dcustomerRowKey.getBytes());
Set<Writable> keySet = mapWritable.keySet();
for(Writable key : keySet){
if(!key.equals(new Text("src_rowKey"))){
String value = mapWritable.get(key).toString();
dcustomerPut.add("ci".getBytes(), key.toString().getBytes(), value.getBytes());
}
}
//如果存在party_id,则插入老客户标识位;//如果不存在party_id,则插入新准客户标识位和准客户标识位
// if (partyId != null && !partyId.equals("")) {
// dcustomerPut.add("ci".getBytes(), "partyflag".getBytes(), "1".getBytes());
// } else {
// dcustomerPut.add("ci".getBytes(), "newflag".getBytes(),"1".getBytes());
// dcustomerPut.add("ci".getBytes(), "dcidflag".getBytes(), "1".getBytes());
// }
dcustomerPut.add("ci".getBytes(), "hstamp".getBytes(),getStringDate().getBytes() );
//复位地址解析标识位
dcustomerPut.add("ci".getBytes(), "analyzeflag".getBytes(),"N".getBytes());
return dcustomerPut;
}
public String getNewDcid(){
//dcid,那么用时间+7位流水号生成一个新的dcid
String newDcid="";
Date date=new Date();
SimpleDateFormat format = new SimpleDateFormat("yyMMddHHmmss");//定义日期类型格式
String dateString = format.format(date);//转换为字符串
seq++;
String innerId="000000"+seq;//补0
innerId=innerId.substring(innerId.length()-6, innerId.length());
newDcid=dateString+taskID+innerId;
//反向新的dcid
return reverse(newDcid);
}
// 翻转一个字符串
public static String reverse(String str) {
char[] org = str.toCharArray();
char[] newChar = new char[org.length];
int num = 1;
for (char c : org) {
newChar[org.length - num] = c;
num++;
}
String newString = new String(newChar);
return newString;
}
/**
* 格式化保存的时间
* @return
*/
public static String getStringDate() {
Date currentTime = new Date();
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String dateString = formatter.format(currentTime);
return dateString;
}
}
4.CustomKeyOut.java
package com.chinalife.distributable.util;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
* 自定义的在mapreduce之间传递的类,用于二次排序的通用类。
* 两个默认字段,keyOut和order。
* 1.keyOut :原来的map阶段的keyOut字段
* 2.order :顺序号字段,用于第二次排序的顺序号
* @author wangqingchun
*
*/
public class CustomKeyOut implements WritableComparable<CustomKeyOut>{
public String keyOut; //map(keyout,valueout)中的keyout
public int order; //二次排序顺序号
/**
* 在反序列化时,反射机制需要调用空参构造函数,所以显示定义了一个空参构造函数
*/
public CustomKeyOut() {}
/**
* 为了对象数据的初始化方便,加入一个带参的构造函数
* @param keyOut map(keyout,valueout)中的keyout
* @param order 二次排序顺序号
*/
public CustomKeyOut(String keyOut, int order) {
this.keyOut = keyOut;
this.order = order;
}
/**
* 注意顺序要和序列化的顺序一致
*/
@Override
public void readFields(DataInput input) throws IOException {
this.keyOut = input.readUTF();
this.order = input.readInt();
}
/**
* 将对象数据序列化到流中
*/
@Override
public void write(DataOutput output) throws IOException {
output.writeUTF(keyOut);
output.writeInt(order);
}
/**
* 树形排序规则的实现
* 现在的排序规则:先比较keyOut字段,然后比较order字段。
* 如果指定新的排序规则只需要override该方法即可
*/
@Override
public int compareTo(CustomKeyOut otherObj) {
int res = this.keyOut.compareTo(otherObj.keyOut);
if(res == 0){
return this.order - otherObj.order;
}
return res;
}
/**
* 自定义toString方法是输出keyout字段
*/
@Override
public String toString() {
return getKeyOut();
}
public String getKeyOut() {
return keyOut;
}
public void setKeyOut(String keyOut) {
this.keyOut = keyOut;
}
public int getOrder() {
return order;
}
public void setOrder(int order) {
this.order = order;
}
}
5.CustomPC.java
package com.chinalife.distributable.util;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* 自定义的分区,分组类
* @author wangqingchun
*
*/
public class CustomPC {
/**
* 自定义的分区,分区规则只根据keyout字段的hashCode%numReduceTasks
* @author wangqingchun
*
*/
public static class CustomPartitioner extends Partitioner<CustomKeyOut,Object>{
@Override
public int getPartition(CustomKeyOut keyOut, Object valOut,
int numReduceTasks) {
return Math.abs(keyOut.getKeyOut().hashCode()%numReduceTasks);
}
}
/**
* 自定义的分组,分区规则只根据keyout字段compare
* @author wangqingchun
*
*/
public static class CustomCombiner extends WritableComparator{
public CustomCombiner(){
super(CustomKeyOut.class,true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
CustomKeyOut customKeyOut_a = (CustomKeyOut)a;
CustomKeyOut customKeyOut_b = (CustomKeyOut)b;
return customKeyOut_a.getKeyOut().compareTo(customKeyOut_b.getKeyOut());
}
}
}