mapred代码示例--自定义分区

package partitioner;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/**
* 自定义分区
*
* 输入文件第二列为  ip或者手机号码
* 第21-24列为4个流量指标数据
*
* 统计流量汇总,手机号码流量汇总放一个文件,ip流量汇总放另一个文件
*
* @author Administrator
*
*/
public class KpiApp {
public static final String INPUT_PATH = "hdfs://hadoop:9000/in/kpi";
public static final String OUT_PATH = "hdfs://hadoop:9000/out";

/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
final Configuration conf = new Configuration();
final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
fileSystem.delete(new Path(OUT_PATH), true);


final Job job = new Job(conf, KpiApp.class.getSimpleName());
job.setJarByClass(KpiApp.class);

FileInputFormat.setInputPaths(job, INPUT_PATH);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(MyMapper.class);

job.setPartitionerClass(KpiPartitioner.class);
job.setNumReduceTasks(2);


job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(KpiWritable.class);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
job.setOutputFormatClass(TextOutputFormat.class);

job.waitForCompletion(true);

}

public static class MyMapper extends Mapper<LongWritable, Text, Text, KpiWritable>{

Text k2 = new Text();
KpiWritable v2 = new KpiWritable();

List<String> iplist = new ArrayList<String>();

protected void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper<LongWritable,Text,Text,KpiWritable>.Context context) throws java.io.IOException ,InterruptedException {

//Counter ipcounter = context.getCounter("ip", "ipcounter");
String[] splited = value.toString().split("\t");
/*if(splited[2].matches("[0-9]*")){

}else{
if(!iplist.contains(splited[2])){
iplist.add(splited[2]);
ipcounter.increment(1L);
}
}*/
k2.set(splited[2]);
v2.set(splited[21], splited[22], splited[23], splited[24]);

context.write(k2, v2);
};
}

public static class MyReducer extends Reducer<Text,KpiWritable, Text,KpiWritable>{

KpiWritable v3 = new KpiWritable();
protected void reduce(Text key2, java.lang.Iterable<KpiWritable> value2s, org.apache.hadoop.mapreduce.Reducer<Text,KpiWritable,Text,KpiWritable>.Context context) throws java.io.IOException ,InterruptedException {

int sumuppacknum = 0;
int sumdownpacknum = 0;
int sumuppayload = 0;
int sumdownpayload = 0;

for (KpiWritable v2 : value2s) {
sumuppacknum += v2.uppacknum;
sumdownpacknum += v2.downpacknum;
sumuppayload += v2.uppayload;
sumdownpayload += v2.downpayload;
}
v3.set(sumuppacknum+"", sumdownpacknum+"", sumuppayload+"", sumdownpayload+"");

context.write(key2,v3);
};
}


public static class KpiWritable implements Writable{
int uppacknum;
int downpacknum;
int uppayload;
int downpayload;

public void set(String uppacknum, String downpacknum, String uppayload,
String downpayload) {
this.uppacknum = Integer.parseInt(uppacknum);
this.downpacknum = Integer.parseInt(downpacknum);
this.uppayload = Integer.parseInt(uppayload);
this.downpayload = Integer.parseInt(downpayload);

}

@Override
public void write(DataOutput out) throws IOException {
out.writeInt(uppacknum);
out.writeInt(downpacknum);
out.writeInt(uppayload);
out.writeInt(downpayload);

}

@Override
public void readFields(DataInput in) throws IOException {
this.uppacknum = in.readInt();
this.downpacknum = in.readInt();
this.uppayload = in.readInt();
this.downpayload = in.readInt();
}

@Override
public String toString() {
return this.uppacknum+"\t"+this.downpacknum+"\t"+this.uppayload+"\t"+this.downpayload;
}


}


public static class KpiPartitioner extends Partitioner<Text, KpiWritable>{

@Override
public int getPartition(Text key, KpiWritable value, int numPartitions) {
return (key.toString().indexOf(".")>0)?1:0;
}

}

}

猜你喜欢

转载自jsh0401.iteye.com/blog/2111582