mapreduce serialization

1. Demand

Each statistical total upstream traffic, downstream traffic consuming phone number, the total flow

(1) input data

 

(2) the input data format:

 

7 13560436666 120.196.100.99 1116 954 200

id phone number network ip upstream traffic downstream traffic network status code

 

(3) the desired output data format

 

13560436666 1116 954 2070

Phone number upstream flow downstream flow of the total flow

  2 . Program in MapReduce 

com.atguigu.mapreduce.flowsum Package; 

Import the java.io.DataInput; 

Import java.io.DataOutput; 

Import java.io.IOException; 

Import org.apache.hadoop.io.Writable; 


// writable implement the interface. 1 

public class FlowBean {the Writable the implements 


Private Long Upflow; 

Private Long downflow; 

Private Long sumFlow; 

during deserialization // 2, reflecting the need to call the constructor parameter empty, there must be 

public FlowBean () { 

Super (); 

} 


public FlowBean (Upflow Long, downflow Long) { 

Super (); 

this.upFlow = Upflow; 

this.downFlow = downflow; 

this.sumFlow downflow + = Upflow; 

} 

// write sequence methods. 3 

@Override 

public void write (OUT of DataOutput) throws IOException {

out.writeLong (Upflow); 

out.writeLong (downflow); 

out.writeLong (sumFlow); 

} 

// methods deserialization. 4 

// 5 deserialization method must be sequentially read and write sequence must be the same method of the write sequence 

! override 

public void readFields (of DataInput for primitive in) throws IOException { 

this.upFlow in.readLong = (); 

this.downFlow in.readLong = (); 

this.sumFlow in.readLong = (); 

} 


// write toString method. 6, to facilitate subsequent print text 

@Override 

public String toString () { 

return Upflow + "\ T" + downflow + "\ T" + sumFlow; 

} 


public Long getUpFlow () { 

return Upflow; 

} 


public void setUpFlow (Long Upflow) { 

the this. = Upflow Upflow; 

} 


public Long getDownFlow () {

return downFlow;

}


public void setDownFlow(long downFlow) {

this.downFlow = downFlow;

}


public long getSumFlow() {

return sumFlow;

}


public void setSumFlow(long sumFlow) {

this.sumFlow = sumFlow;

}

} 

  (2) write Mapper class

com.atguigu.mapreduce.flowsum Package; 

Import java.io.IOException; 

Import org.apache.hadoop.io.LongWritable; 

Import org.apache.hadoop.io.Text; 

Import org.apache.hadoop.mapreduce.Mapper; 


public Mapper FlowCountMapper the extends class <LongWritable, the Text, the Text, FlowBean> { 

FlowBean new new FlowBean V = (); 

the Text new new K = the Text (); 

@Override 

protected void Map (LongWritable Key, the Text value, the context context) throws IOException, InterruptedException { 

// Get a line 

String = value.toString line (); 

// 2 dicing field 

String [] = line.split fields ( "\ T"); 

// encapsulate the object. 3 

// remove the phone number 

String phoneNum = fields [1 ]; 


// taken upstream and downstream traffic

long upFlow = Long.parseLong(fields[fields.length - 3]);

long downFlow = Long.parseLong(fields[fields.length - 2]);


k.set(phoneNum);

v.set(downFlow, upFlow);

// 4 写出

context.write(k, v);

}

}

   (3) write Reducer class

com.atguigu.mapreduce.flowsum Package; 

Import java.io.IOException; 

Import org.apache.hadoop.io.Text; 

Import org.apache.hadoop.mapreduce.Reducer; 


public class FlowCountReducer the extends the Reducer <the Text, FlowBean, the Text, FlowBean> { 


@Override 

protected void the reduce (the Text Key, the Iterable <FlowBean> values, the context context) throws IOException, InterruptedException { 


Long sum_upFlow = 0; 

Long sum_downFlow = 0; 


// used to traverse the bean. 1, in which the upstream traffic, the downlink flow rate were accumulated 

for (flowBean flowBean: values) { 

sum_upFlow flowBean.getUpFlow + = (); 

sum_downFlow flowBean.getDownFlow + = (); 

} 


// encapsulate the object 2 

flowBean resultBean = new new flowBean (sum_upFlow, sum_downFlow); 

// write. 3 out

context.write(key, resultBean);

}

} 

   (4) write driver class Driver

com.atguigu.mapreduce.flowsum Package; 

Import java.io.IOException; 

Import org.apache.hadoop.conf.Configuration; 

Import org.apache.hadoop.fs.Path; 

Import org.apache.hadoop.io.Text; 

Import org.apache.hadoop.mapreduce.Job; 

Import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 

Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 


public class FlowsumDriver { 


public static void main (String [] args) throws an IllegalArgumentException, IOException, a ClassNotFoundException, InterruptedException { 

// the input and output paths on their computers to set the actual input-output path 

args = new String [] { " e: / input / inputflow", "e: / output1 "}; 


// 1 for configuration information, or job object instance 

configuration configuration = new configuration ();

Job job = Job.getInstance (configuration); 


local path specified jar // 6 where the Program package 

job.setJarByClass (FlowsumDriver.class); 


// 2 Mapper job designated to use the present service / Reducer traffic class 

job.setMapperClass (FlowCountMapper.class); 

job.setReducerClass (FlowCountReducer.class); 


kV // 3 specifies the type of output data mapper 

job.setMapOutputKeyClass (Text.class); 

job.setMapOutputValueClass (FlowBean.class); 


// specified final output. 4 kv type of data 

job.setOutputKeyClass (Text.class); 

job.setOutputValueClass (FlowBean.class); 

// specified job. 5 of the original input file directory 

FileInputFormat.setInputPaths (job, the Path new new (args [0])); 

FileOutputFormat .setOutputPath (job, the Path new new (args [. 1])); 


// the jar. 7 job packet related parameters configured, and used java class where the job submitted to the yarn to run

boolean result = job.waitForCompletion(true);

System.exit(result ? 0 : 1);

}

}

 

Guess you like

Origin www.cnblogs.com/837634902why/p/11455590.html