1, the raw data
2, using java program
1) New Project
2)导包
hadoop-2.7.3\share\hadoop\mapreduce
+ Hsfs of those packages
+common
3, write project
1) the entity class
Package com.zy.flow; Import the java.io.DataInput; Import java.io.DataOutput; Import java.io.IOException; Import org.apache.hadoop.io.LongWritable; Import org.apache.hadoop.io.Text; Import org.apache.hadoop.io.Writable; public class Flow the implements the Writable { Private the Text Phone; Private LongWritable Upflow; Private LongWritable downflow; Private LongWritable sumflow; // this object to be transmitted later in the cluster, so serializable / / sequence deserialization order to be consistent @Override// This method is called deserialization public void readFields (of DataInput for primitive in) throws IOException { Phone = new new the Text (in.readUTF ()); Upflow = new new LongWritable (in.readLong ()); downflow = new new LongWritable (in .readLong ()); sumflow = new new LongWritable (in.readLong ()); } @Override // this method is called serialization public void Write (OUT of DataOutput) throws IOException { out.writeUTF (phone.toString ()) ; out.writeLong (upflow.get ()); out.writeLong (downflow.get ()); out.writeLong(sumflow.get()); } public Text getPhone() { return phone; } public void setPhone(Text phone) { this.phone = phone; } public LongWritable getUpflow() { return upflow; } public void setUpflow(LongWritable upflow) { this.upflow = upflow; } public LongWritable getDownflow() { return downflow; } public void setDownflow(LongWritable downflow) { this.downflow = downflow; } public LongWritable getSumflow() { return sumflow; } public void setSumflow(LongWritable sumflow) { this.sumflow = sumflow; } public Flow() { } public Flow(Text phone, LongWritable upflow, LongWritable downflow, LongWritable sumflow) { super(); this.phone = phone; this.upflow = upflow; this.downflow = downflow; this.sumflow = sumflow; } public Flow(LongWritable upflow, LongWritable downflow, LongWritable sumflow) { super(); this.upflow = upflow; this.downflow = downflow; this.sumflow = sumflow; } @Override public String toString() { return upflow+"\t"+downflow+"\t"+sumflow; } }
2) FlowMap class
package com.zy.flow; import java.io.IOException; import javax.security.auth.callback.LanguageCallback; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class FlowMap extends Mapper<LongWritable, Text, Text, Flow>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Flow>.Context context) throws IOException, InterruptedException { //输入的值 value //切分value 寻找有价值的列 String[] split = value.toString().split("\t"); int length=split.length; //取哪几列split[1] split[length-3] split[length-2] String phone=split[1]; Long upflow=Long.parseLong(split[length-3]); Long downflow=Long.parseLong(split[length-2]); Long sumflow=upflow+downflow; //输出 context.write(new Text(phone), new Flow(new Text(phone), new LongWritable(upflow), newLongWritable (downflow), new new LongWritable (sumflow))); // target although less than in the phone but give it a value, otherwise will be reported null pointer exception sequence } }
3) Part (partition) class
package com.zy.flow; import java.util.HashMap; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class Part extends Partitioner<Text, Flow> {//分区 //逻辑自己写 HashMap<String,Integer> map = new HashMap(); public void setMap(){ map.put("135",0); map.put("136", 1); map.put("137",2); map.put("138", 3); map.put("139",. 4 ); } @Override public int getPartition (the Text Key, Flow value, int arg2) { the setMap (); // get the top three comparison with the map data from the telephone input. Assigned to decide which region String = key.toString the substring () the substring (0,. 3);. // example taken 135 return as map.get (the substring) == null . 5:? As map.get (the substring); // The remove the key values key 135 0 // other numbers assigned (number 5) in the region of 6 } // at this logical partition divided regions 6, so after six to specify ReduceTask }
4) FlowReduce class
package com.zy.flow; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class FlowReduce extends Reducer<Text, Flow, Text, Flow>{ @Override protected void reduce(Text key, Iterable<Flow> value, Reducer<Text, Flow, Text, Flow>.Context context) throws IOException, InterruptedException { //累加 long allup=0; long alldown=0; for (Flow flow : value) { allup+=Long.parseLong(flow.getUpflow().toString()); alldown+=Long.parseLong(flow.getDownflow().toString()); } long allsum=allup+alldown; context.write(key, new Flow(new Text(key), new LongWritable(allup), new LongWritable(alldown), new LongWritable(allsum))); } }
5) FlowApp class
package com.zy.flow; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class FlowApp { public static void main(String[] args) throws Exception { //创建配置对象 Configuration = the Configuration new new the Configuration (); // get job instance the Job job = Job.getInstance (Configuration); // specified to run job class job.setJarByClass (FlowApp. Class ); // specified in the job Mapper job.setMapperClass (FlowMap . class ); // specified mapper keys and values of the output type job.setMapOutputKeyClass (the Text. class ); job.setMapOutputValueClass (Flow. class ); // specified in the reducer job . job.setReducerClass (FlowReduce class ); job.setMapOutputKeyClass (the Text. class ); job.setMapOutputValueClass (. Flow class ); // ----- // specified class Partitioner used job.setPartitionerClass (Part. class ); // specified number ReduceTask job.setNumReduceTasks ( 6 ); // ----- // input file specified FileInputFormat.setInputPaths (Job, new new the Path (args [0])); // fill in the parameters runtime // the specified output file FileOutputFormat.setOutputPath (Job, new new the Path (args [. 1 ])); //Submit Job job.waitForCompletion (to true ); } }
4, run
1) Packaging
2) upload to linux
3) Run