1. Serialization Overview
1.1 What is serialized
Is the sequence of objects in memory is converted into a sequence of bytes (or other data transfer protocol) for storage to disk (persistent) and network transmission;
Deserialization is to receive a sequence of bytes (or other data transfer protocol) or a persistent data disk, turned into objects in memory;
1.2 Why should serialize
In general, the "live" objects only survive in memory, there is no shut off; and "live" objects can only be used by a local process that can not be sent to another computer on the network; however serialization You may store a "live" object, the "live" object is sent to a remote computer;
Without staggering 1.3 Java serialization
1.4 hadoop serialization characteristics
1.4.1 Compact: efficient use of storage space;
1.4.2 Fast: small overhead reading and writing data;
Scalable 1.4.3: With upgraded communication protocol and scalable;
1.4.4 Interoperability: supports interactive multi-language;
2. Custom bean object implements serial port (the Writable)
In the enterprise development is often common basic serialization types can not meet all the requirements, such as passing a bean objects within the framework hadoop, then the object would need to implement serial interfaces;
Writable 2.1 must implement the interface;
2.2 deserialization, reflecting the need to call an empty argument constructor, configuration parameters must be free;
public FlowBean() {
super();
}
2.3 Method override sequence
/ * Serialization method
data frame DataOutput * outlet provided us
* * /
@Override
public void Write (DataOutput of DataOutput) throws IOException {
DataOutput.writeLong (Upflow);
DataOutput.writeLong (downflow);
DataOutput.writeLong (sumFlow);
}
2.4 Method rewriting deserialization
/ * Deserialize method
* Source framework provided DataInput
* * /
@Override
public void readFields (DataInput of DataInput for primitive) throws IOException {
Upflow = dataInput.readLong ();
downflow = dataInput.readLong ();
sumFlow = dataInput.readLong ( );
}
3. Case
3.1 Writing FlowwBean
package com.wn.flow;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class FlowwBean implements Writable {
private long upFlow;
private long downFlow;
private long sumFlow;
public FlowwBean() {
}
@Override
public String toString() {
return "FlowwBean{" +
"upFlow=" + upFlow +
", downFlow=" + downFlow +
", sumFlow=" + sumFlow +
'}';
}
public void set(long upFlow, long downFlow){
this.upFlow=upFlow;
this.downFlow=downFlow;
this.sumFlow=upFlow+downFlow;
}
public long getDownFlow() {
return downFlow;
}
public void setDownFlow(long downFlow) {
the this .downFlow = downflow;
}
public Long getSumFlow () {
return sumFlow;
}
public void setSumFlow ( Long sumFlow) {
the this .sumFlow = sumFlow;
}
public Long getUpFlow () {
return Upflow;
}
public void setUpFlow ( Long Upflow) {
the this .upFlow = Upflow;
}
/ * serialization method for
data frame DataOutput * outlet provided us
* * /
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(upFlow);
dataOutput.writeLong(downFlow);
dataOutput.writeLong(sumFlow);
}
/*顺序要完全一致*/
/*反序列化方法
* dataInput 框架提供的数据来源
* */
@Override
public void readFields(DataInput dataInput) throws IOException {
upFlow=dataInput.readLong();
downFlow=dataInput.readLong();
sumFlow=dataInput.readLong();
}
}
3.2 write FlowMapper
package com.wn.flow;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowMapper extends Mapper<LongWritable, Text,Text,FlowwBean> {
private Text phone=new Text();
private FlowwBean flow=new FlowwBean();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
phone.set(split[1]);
long upFlow = Long.parseLong(split[split.length - 3]);
long downFlow = Long.parseLong(split[split.length - 2]);
flow.set(upFlow,downFlow);
context.write(phone,flow);
}
}
3.3 write FlowReducer
package com.wn.flow;
import org.apache.hadoop.mapreduce.Reducer;
import javax.xml.soap.Text;
import java.io.IOException;
public class FlowReducer extends Reducer<Text,FlowwBean,Text,FlowwBean> {
private FlowwBean sumFlow=new FlowwBean();
@Override
protected void reduce(Text key, Iterable<FlowwBean> values, Context context) throws IOException, InterruptedException {
long sumUpFlow=0;
long sumDownFlow=0;
for (FlowwBean value:values){
sumUpFlow+=value.getUpFlow();
sumDownFlow+=value.getDownFlow();
}
sumFlow.set(sumUpFlow,sumDownFlow);
context.write(key,sumFlow);
}
}
3.4 write FlowDriver
package com.wn.flow;
import com.wn.wordcount.WcDriver;
import com.wn.wordcount.WcMapper;
import com.wn.wordcount.WcReducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// Get an instance of Job
Job Job = Job.getInstance ( new new the Configuration ());
// Set the classpath
job.setJarByClass (WcDriver. Class );
// set the mapper and the reducer
job.setMapperClass (FlowMapper. Class );
Job. setReducerClass (FlowReducer. class );
// set the mapper and reducer output type
job.setMapOutputKeyClass (org.apache.hadoop.io.Text. class );
job.setMapOutputValueClass (FlowwBean. class );
job.setOutputKeyClass (. the Text class );
job.setOutputValueClass (FlowwBean.class );
// set the data input
FileInputFormat.setInputPaths (Job, new new the Path (args [ 0 ]));
FileOutputFormat.setOutputPath (Job, new new the Path (args [ . 1 ]));
// Submit Job
Boolean B = Job. the waitForCompletion ( to true );
System.exit (B ? 0 : . 1 );
}
}