MapReduce using statistical total traffic per user

1, the raw data

2, using java program

  1) New Project

  2)导包
  hadoop-2.7.3\share\hadoop\mapreduce

  + Hsfs of those packages

  +common

3, write project

  1) the entity class

Package com.zy.flow; 

Import the java.io.DataInput;
 Import java.io.DataOutput;
 Import java.io.IOException; 

Import org.apache.hadoop.io.LongWritable;
 Import org.apache.hadoop.io.Text;
 Import org.apache.hadoop.io.Writable; 

public  class Flow the implements the Writable {
     Private the Text Phone;
     Private LongWritable Upflow;
     Private LongWritable downflow;
     Private LongWritable sumflow;
     // this object to be transmitted later in the cluster, so serializable 
    
    / / sequence deserialization order to be consistent 
    @Override// This method is called deserialization 
    public  void readFields (of DataInput for primitive in) throws IOException { 
        Phone = new new the Text (in.readUTF ()); 
        Upflow = new new LongWritable (in.readLong ()); 
        downflow = new new LongWritable (in .readLong ()); 
        sumflow = new new LongWritable (in.readLong ()); 
    } 
    
    @Override // this method is called serialization 
    public  void Write (OUT of DataOutput) throws IOException { 
        out.writeUTF (phone.toString ()) ; 
        out.writeLong (upflow.get ()); 
        out.writeLong (downflow.get ());
        out.writeLong(sumflow.get());
        
    }
    public Text getPhone() {
        return phone;
    }
    public void setPhone(Text phone) {
        this.phone = phone;
    }
    public LongWritable getUpflow() {
        return upflow;
    }
    public void setUpflow(LongWritable upflow) {
        this.upflow = upflow;
    }
    public LongWritable getDownflow() {
        return downflow;
    }
    public void setDownflow(LongWritable downflow) {
        this.downflow = downflow;
    }
    public LongWritable getSumflow() {
        return sumflow;
    }
    public void setSumflow(LongWritable sumflow) {
        this.sumflow = sumflow;
    }
    public Flow() {
        
    }
    public Flow(Text phone, LongWritable upflow, LongWritable downflow, LongWritable sumflow) {
        super();
        this.phone = phone;
        this.upflow = upflow;
        this.downflow = downflow;
        this.sumflow = sumflow;
    }
    public Flow(LongWritable upflow, LongWritable downflow, LongWritable sumflow) {
        super();
        this.upflow = upflow;
        this.downflow = downflow;
        this.sumflow = sumflow;
    }
    
    @Override
    public String toString() {
        
        return upflow+"\t"+downflow+"\t"+sumflow;
    }
    

}

  2) FlowMap class

package com.zy.flow;

import java.io.IOException;

import javax.security.auth.callback.LanguageCallback;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FlowMap extends Mapper<LongWritable, Text, Text, Flow>{

    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Flow>.Context context)
            throws IOException, InterruptedException {
        //输入的值 value
        //切分value 寻找有价值的列
        String[] split = value.toString().split("\t");
        int length=split.length;
        //取哪几列split[1] split[length-3] split[length-2]
        String phone=split[1];
        Long upflow=Long.parseLong(split[length-3]);
        Long downflow=Long.parseLong(split[length-2]);
        Long sumflow=upflow+downflow;
        //输出
        context.write(new Text(phone), new Flow(new Text(phone), new LongWritable(upflow), newLongWritable (downflow), new new LongWritable (sumflow)));
         // target although less than in the phone but give it a value, otherwise will be reported null pointer exception sequence 
    } 
}

  3) Part (partition) class

package com.zy.flow;
import java.util.HashMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;

public class Part extends Partitioner<Text, Flow> {//分区
    //逻辑自己写
    
    HashMap<String,Integer> map = new HashMap();
    
    public void setMap(){
    map.put("135",0);
    map.put("136", 1);
    map.put("137",2);
    map.put("138", 3);
    map.put("139",. 4 ); 
    }    
    @Override 
    public  int getPartition (the Text Key, Flow value, int arg2) { 
        
        the setMap (); 
        // get the top three comparison with the map data from the telephone input. Assigned to decide which region 
        String = key.toString the substring () the substring (0,. 3);. // example taken 135 
        
        return as map.get (the substring) == null . 5:? As map.get (the substring); // The remove the key values key 135 0  
         // other numbers assigned (number 5) in the region of 6         
    }
     // at this logical partition divided regions 6, so after six to specify ReduceTask 

}

  4) FlowReduce class

package com.zy.flow;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowReduce extends Reducer<Text, Flow, Text, Flow>{
    @Override
    protected void reduce(Text key, Iterable<Flow> value, Reducer<Text, Flow, Text, Flow>.Context context)
            throws IOException, InterruptedException {
        //累加
        long allup=0;
        long alldown=0;
        for (Flow flow : value) {
            allup+=Long.parseLong(flow.getUpflow().toString());
            alldown+=Long.parseLong(flow.getDownflow().toString());
            
        }
        long allsum=allup+alldown;
        context.write(key, new Flow(new Text(key), new LongWritable(allup),  new LongWritable(alldown),  new LongWritable(allsum)));
    }

}

  5) FlowApp class

package com.zy.flow;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FlowApp {

    public static void main(String[] args) throws Exception {
        //创建配置对象
        Configuration = the Configuration new new the Configuration ();
         // get job instance 
        the Job job = Job.getInstance (Configuration);
         // specified to run job class 
        job.setJarByClass (FlowApp. Class ); 
        
        // specified in the job Mapper 
        job.setMapperClass (FlowMap . class );
         // specified mapper keys and values of the output type 
        job.setMapOutputKeyClass (the Text. class ); 
        job.setMapOutputValueClass (Flow. class ); 
        
        // specified in the reducer job 
        . job.setReducerClass (FlowReduce class );
        job.setMapOutputKeyClass (the Text. class ); 
        job.setMapOutputValueClass (. Flow class ); 
        
        
        
        
        // -----
         // specified class Partitioner used 
        job.setPartitionerClass (Part. class );
         // specified number ReduceTask 
        job.setNumReduceTasks ( 6 );
         // ----- 


        // input file specified 
        FileInputFormat.setInputPaths (Job, new new the Path (args [0])); // fill in the parameters runtime
         // the specified output file 
        FileOutputFormat.setOutputPath (Job, new new the Path (args [. 1 ]));
         //Submit Job  
        job.waitForCompletion (to true ); 
        
    } 

}

4, run

  1) Packaging

  2) upload to linux

  3) Run

 

Guess you like

Origin www.cnblogs.com/qfdy123/p/11246635.html