mapreduce data processing - sort Statistics

Continued articles https://www.cnblogs.com/sengzhao666/p/11850849.html

2 , the data processing:

· Statistical most popular video / article Top10 visits ( the above mentioned id )

· According to the statistics of the most popular cities Top10 course ( ip )

· According to traffic statistics of the most popular Top10 course ( traffic )

In two steps:

Statistics; sorting

The initial portion of the sample file:

1.192.25.84    2016-11-10-00:01:14    10    54    video    5551    
1.194.144.222    2016-11-10-00:01:20    10    54    video    3589    
1.194.187.2    2016-11-10-00:01:05    10    54    video    2212    
1.203.177.243    2016-11-10-00:01:18    10    6050    video    7361    
1.203.177.243    2016-11-10-00:01:19    10    72    video    7361    
1.203.177.243    2016-11-10-00:01:22    10    6050    video    7361    
1.30.162.63    2016-11-10-00:01:46    10    54    video    3639    
1.84.205.195    2016-11-10-00:01:12    10    54    video    1412

statistics:

package priv.tzk.mapreduce.dataProcess.visits;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
Import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 

public  class DataVisits {
     public  static String INPUT_PATH = "/ Home / Hadoop / OUT" ;  
     public  static String OUTPUT_PATH = "HDFS: // localhost: 9000 / The mapReduce / mymapreduce1 / OUT " ;     

    public  static  class the Map the extends Mapper <Object, Text, Text, IntWritable> {     // the output as the input string type, corresponding to the type Text 
            Private  static Text = newKey new new Text ();     // for each data row as a   
            public  void the Map (Object Key, Text value, context context)throws IOException, InterruptedException {    
                String Line = value.toString (); // it to a string
                 // System.out.println (Line); 
                IF (!. ( "" the equals (Line))) // increase control statement so that the line is "able to stop the" when. Otherwise, the data does not meet the acceptance is not performed reduce reduce 
                { 
                    String ARR [] = line.split ( "\ T"); // SPLite is split into an array by values entered 
                    newKey.set (ARR [. 5 ]);
                     int =. 1 the Click ; 
                    context.write (newKey, new new IntWritable (the Click)); 
                     //System.out.println(newKey+"  "+new IntWritable(click));
                }
             } 
         }   
         
    public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{   
        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{      
                 int count=0;
                 for(IntWritable val:values) {
                     //Iterable迭代器
                     count++;
                 }         
                 context.write(key,new IntWritable(count));
                 //System.out.println("reduceStart");
             }   
        }    
        
        public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{              
            Configuration conf=new Configuration();   
            System.out.println("start");
            Job job=Job.getInstance(conf); 
            job.setJobName("MyAverage");
            //Job job =new Job(conf,"MyAverage");
            job.setJarByClass(DataVisits.class);
            job.setMapperClass(Map.class);  
            job.setReducerClass(Reduce.class);
            job.setOutputKeyClass(Text.class);  
            job.setOutputValueClass(IntWritable.class);//设置map的输出格式
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            Path outputpath=new Path(OUTPUT_PATH); 
            Path inputpath=new Path(INPUT_PATH); 
            FileInputFormat.addInputPath(job,inputpath );  
            FileOutputFormat.setOutputPath(job,outputpath);  
            boolean flag = job.waitForCompletion(true);
            System.out.println(flag);
            System.exit(flag? 0 : 1);
         }
        
}

Statistics partial results Example:

Sort the statistical results:

package priv.tzk.mapreduce.dataProcess.visits;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
importorg.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 Import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 

public  class visitsSort {
     public  static INPUT_PATH = String "/ Home / Hadoop / visits_out" ;  
     public  static String OUTPUT_PATH = "HDFS: // localhost: 9000 / The mapReduce / mymapreduce1 / OUT1 of" ;     
        
    public  static  class the Sort the extends WritableComparator {
         public the Sort () {
         // this is to see you fill in the map output key is what type of data, give what type 
        super(IntWritable. Class , to true ); 
        } 
        @Override 
        public  int Compare (WritableComparable A, B WritableComparable) {
         return -a.compareTo (B); // add negative sign is reverse, the negative sign is positive sequence removed. 
        } 
    } 
    
    Public  static  class the Map the extends Mapper <Object, Text, IntWritable, Text> {     // the output as the input string type, corresponding to the type Text 
            Private  static Text = MID new new Text (); 
             Private  static IntWritable NUM = new new IntWritable ();
             public void Map (Object Key, the Text value, the Context context) throws IOException, InterruptedException {    
                String Line = value.toString (); // Switch string type 
                IF (( "" the equals (Line))!.) // increase control statement, making the line is "able to stop the" when. Otherwise, the data does not meet the acceptance is not performed reduce reduce 
                { 
                    String ARR [] = line.split ( "\ T"); // SPLite is split into an array by values entered 
                    mid.set (ARR [0 ]); 
                    NUM .set (the Integer.parseInt (ARR [ . 1 ])); 
                    context.write (NUM, MID); 
                } 
             } 
         }    
         //MapReduce framework default collation. It is ordered by key value 
    public  static  class the Reduce the extends the Reducer <IntWritable, the Text, IntWritable, the Text> { 
         Private  static  int I = 0 ;
         public  void the reduce (IntWritable key, the Iterable <the Text> values, the Context context) throws IOException, {InterruptedException       

                 for (the Text Val: values) {
                      // the Iterable iterator 
                     IF (I <10 ) { 
                         I ++ ; 
                         context.write (Key, Val); 
                     } 
                 }
                 //System.out.println("reduceStart");
             }   
        }    
        
        public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{              
            Configuration conf=new Configuration();   
            System.out.println("start");
            Job job=Job.getInstance(conf); 
            //Job job =new Job(conf,"");
            job.setJarByClass(visitsSort.class);
            job.setMapperClass(Map.class);  
            job.setReducerClass(Reduce.class);
            job.setSortComparatorClass(Sort.class);
            //设置map的输出格式
            job.setOutputKeyClass(IntWritable.class);  
            job.setOutputValueClass(Text.class);
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            Path outputpath=new Path(OUTPUT_PATH); 
            Path inputpath=new Path(INPUT_PATH); 
            FileInputFormat.addInputPath(job,inputpath );  
            FileOutputFormat.setOutputPath(job,outputpath);  
            boolean flag = job.waitForCompletion(true);
            System.out.println(flag);
            System.exit(flag? 0 : 1);
         }
        
}

Sort results:

31    2402
19    1309
18    3078
18    2801
16    5683
16    3369
16    1336
16    4018
15    11239
15    13098

mapreduce data processing - sort Statistics

Guess you like