1. Demand
Each statistical total upstream traffic, downstream traffic consuming phone number, the total flow
(1) input data
(2) the input data format:
7 13560436666 120.196.100.99 1116 954 200 id phone number network ip upstream traffic downstream traffic network status code |
(3) the desired output data format
13560436666 1116 954 2070 Phone number upstream flow downstream flow of the total flow |
2 . Program in MapReduce
com.atguigu.mapreduce.flowsum Package; Import the java.io.DataInput; Import java.io.DataOutput; Import java.io.IOException; Import org.apache.hadoop.io.Writable; // writable implement the interface. 1 public class FlowBean {the Writable the implements Private Long Upflow; Private Long downflow; Private Long sumFlow; during deserialization // 2, reflecting the need to call the constructor parameter empty, there must be public FlowBean () { Super (); } public FlowBean (Upflow Long, downflow Long) { Super (); this.upFlow = Upflow; this.downFlow = downflow; this.sumFlow downflow + = Upflow; } // write sequence methods. 3 @Override public void write (OUT of DataOutput) throws IOException { out.writeLong (Upflow); out.writeLong (downflow); out.writeLong (sumFlow); } // methods deserialization. 4 // 5 deserialization method must be sequentially read and write sequence must be the same method of the write sequence ! override public void readFields (of DataInput for primitive in) throws IOException { this.upFlow in.readLong = (); this.downFlow in.readLong = (); this.sumFlow in.readLong = (); } // write toString method. 6, to facilitate subsequent print text @Override public String toString () { return Upflow + "\ T" + downflow + "\ T" + sumFlow; } public Long getUpFlow () { return Upflow; } public void setUpFlow (Long Upflow) { the this. = Upflow Upflow; } public Long getDownFlow () { return downFlow; } public void setDownFlow(long downFlow) { this.downFlow = downFlow; } public long getSumFlow() { return sumFlow; } public void setSumFlow(long sumFlow) { this.sumFlow = sumFlow; } }
(2) write Mapper class
com.atguigu.mapreduce.flowsum Package; Import java.io.IOException; Import org.apache.hadoop.io.LongWritable; Import org.apache.hadoop.io.Text; Import org.apache.hadoop.mapreduce.Mapper; public Mapper FlowCountMapper the extends class <LongWritable, the Text, the Text, FlowBean> { FlowBean new new FlowBean V = (); the Text new new K = the Text (); @Override protected void Map (LongWritable Key, the Text value, the context context) throws IOException, InterruptedException { // Get a line String = value.toString line (); // 2 dicing field String [] = line.split fields ( "\ T"); // encapsulate the object. 3 // remove the phone number String phoneNum = fields [1 ]; // taken upstream and downstream traffic long upFlow = Long.parseLong(fields[fields.length - 3]); long downFlow = Long.parseLong(fields[fields.length - 2]); k.set(phoneNum); v.set(downFlow, upFlow); // 4 写出 context.write(k, v); } }
(3) write Reducer class
com.atguigu.mapreduce.flowsum Package; Import java.io.IOException; Import org.apache.hadoop.io.Text; Import org.apache.hadoop.mapreduce.Reducer; public class FlowCountReducer the extends the Reducer <the Text, FlowBean, the Text, FlowBean> { @Override protected void the reduce (the Text Key, the Iterable <FlowBean> values, the context context) throws IOException, InterruptedException { Long sum_upFlow = 0; Long sum_downFlow = 0; // used to traverse the bean. 1, in which the upstream traffic, the downlink flow rate were accumulated for (flowBean flowBean: values) { sum_upFlow flowBean.getUpFlow + = (); sum_downFlow flowBean.getDownFlow + = (); } // encapsulate the object 2 flowBean resultBean = new new flowBean (sum_upFlow, sum_downFlow); // write. 3 out context.write(key, resultBean); } }
(4) write driver class Driver
com.atguigu.mapreduce.flowsum Package; Import java.io.IOException; Import org.apache.hadoop.conf.Configuration; Import org.apache.hadoop.fs.Path; Import org.apache.hadoop.io.Text; Import org.apache.hadoop.mapreduce.Job; Import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class FlowsumDriver { public static void main (String [] args) throws an IllegalArgumentException, IOException, a ClassNotFoundException, InterruptedException { // the input and output paths on their computers to set the actual input-output path args = new String [] { " e: / input / inputflow", "e: / output1 "}; // 1 for configuration information, or job object instance configuration configuration = new configuration (); Job job = Job.getInstance (configuration); local path specified jar // 6 where the Program package job.setJarByClass (FlowsumDriver.class); // 2 Mapper job designated to use the present service / Reducer traffic class job.setMapperClass (FlowCountMapper.class); job.setReducerClass (FlowCountReducer.class); kV // 3 specifies the type of output data mapper job.setMapOutputKeyClass (Text.class); job.setMapOutputValueClass (FlowBean.class); // specified final output. 4 kv type of data job.setOutputKeyClass (Text.class); job.setOutputValueClass (FlowBean.class); // specified job. 5 of the original input file directory FileInputFormat.setInputPaths (job, the Path new new (args [0])); FileOutputFormat .setOutputPath (job, the Path new new (args [. 1])); // the jar. 7 job packet related parameters configured, and used java class where the job submitted to the yarn to run boolean result = job.waitForCompletion(true); System.exit(result ? 0 : 1); } }