Continued articles https://www.cnblogs.com/sengzhao666/p/11850849.html
2 , the data processing:
· Statistical most popular video / article Top10 visits ( the above mentioned id )
· According to the statistics of the most popular cities Top10 course ( ip )
· According to traffic statistics of the most popular Top10 course ( traffic )
In two steps:
Statistics; sorting
The initial portion of the sample file:
1.192.25.84 2016-11-10-00:01:14 10 54 video 5551 1.194.144.222 2016-11-10-00:01:20 10 54 video 3589 1.194.187.2 2016-11-10-00:01:05 10 54 video 2212 1.203.177.243 2016-11-10-00:01:18 10 6050 video 7361 1.203.177.243 2016-11-10-00:01:19 10 72 video 7361 1.203.177.243 2016-11-10-00:01:22 10 6050 video 7361 1.30.162.63 2016-11-10-00:01:46 10 54 video 3639 1.84.205.195 2016-11-10-00:01:12 10 54 video 1412
statistics:
package priv.tzk.mapreduce.dataProcess.visits; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; Import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class DataVisits { public static String INPUT_PATH = "/ Home / Hadoop / OUT" ; public static String OUTPUT_PATH = "HDFS: // localhost: 9000 / The mapReduce / mymapreduce1 / OUT " ; public static class the Map the extends Mapper <Object, Text, Text, IntWritable> { // the output as the input string type, corresponding to the type Text Private static Text = newKey new new Text (); // for each data row as a public void the Map (Object Key, Text value, context context)throws IOException, InterruptedException { String Line = value.toString (); // it to a string // System.out.println (Line); IF (!. ( "" the equals (Line))) // increase control statement so that the line is "able to stop the" when. Otherwise, the data does not meet the acceptance is not performed reduce reduce { String ARR [] = line.split ( "\ T"); // SPLite is split into an array by values entered newKey.set (ARR [. 5 ]); int =. 1 the Click ; context.write (newKey, new new IntWritable (the Click)); //System.out.println(newKey+" "+new IntWritable(click)); } } } public static class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{ public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{ int count=0; for(IntWritable val:values) { //Iterable迭代器 count++; } context.write(key,new IntWritable(count)); //System.out.println("reduceStart"); } } public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{ Configuration conf=new Configuration(); System.out.println("start"); Job job=Job.getInstance(conf); job.setJobName("MyAverage"); //Job job =new Job(conf,"MyAverage"); job.setJarByClass(DataVisits.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class);//设置map的输出格式 job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path outputpath=new Path(OUTPUT_PATH); Path inputpath=new Path(INPUT_PATH); FileInputFormat.addInputPath(job,inputpath ); FileOutputFormat.setOutputPath(job,outputpath); boolean flag = job.waitForCompletion(true); System.out.println(flag); System.exit(flag? 0 : 1); } }
Statistics partial results Example:
10061 1 10077 1 10198 1 10290 1 10314 1 10324 1 1034 1 10400 1 10421 1 10427 1 10450 1 10505 1 10506 7 10511 1
Sort the statistical results:
package priv.tzk.mapreduce.dataProcess.visits; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; importorg.apache.hadoop.mapreduce.lib.input.TextInputFormat; Import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; Import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class visitsSort { public static INPUT_PATH = String "/ Home / Hadoop / visits_out" ; public static String OUTPUT_PATH = "HDFS: // localhost: 9000 / The mapReduce / mymapreduce1 / OUT1 of" ; public static class the Sort the extends WritableComparator { public the Sort () { // this is to see you fill in the map output key is what type of data, give what type super(IntWritable. Class , to true ); } @Override public int Compare (WritableComparable A, B WritableComparable) { return -a.compareTo (B); // add negative sign is reverse, the negative sign is positive sequence removed. } } Public static class the Map the extends Mapper <Object, Text, IntWritable, Text> { // the output as the input string type, corresponding to the type Text Private static Text = MID new new Text (); Private static IntWritable NUM = new new IntWritable (); public void Map (Object Key, the Text value, the Context context) throws IOException, InterruptedException { String Line = value.toString (); // Switch string type IF (( "" the equals (Line))!.) // increase control statement, making the line is "able to stop the" when. Otherwise, the data does not meet the acceptance is not performed reduce reduce { String ARR [] = line.split ( "\ T"); // SPLite is split into an array by values entered mid.set (ARR [0 ]); NUM .set (the Integer.parseInt (ARR [ . 1 ])); context.write (NUM, MID); } } } //MapReduce framework default collation. It is ordered by key value public static class the Reduce the extends the Reducer <IntWritable, the Text, IntWritable, the Text> { Private static int I = 0 ; public void the reduce (IntWritable key, the Iterable <the Text> values, the Context context) throws IOException, {InterruptedException for (the Text Val: values) { // the Iterable iterator IF (I <10 ) { I ++ ; context.write (Key, Val); } } //System.out.println("reduceStart"); } } public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException{ Configuration conf=new Configuration(); System.out.println("start"); Job job=Job.getInstance(conf); //Job job =new Job(conf,""); job.setJarByClass(visitsSort.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setSortComparatorClass(Sort.class); //设置map的输出格式 job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); Path outputpath=new Path(OUTPUT_PATH); Path inputpath=new Path(INPUT_PATH); FileInputFormat.addInputPath(job,inputpath ); FileOutputFormat.setOutputPath(job,outputpath); boolean flag = job.waitForCompletion(true); System.out.println(flag); System.exit(flag? 0 : 1); } }
Sort results:
31 2402 19 1309 18 3078 18 2801 16 5683 16 3369 16 1336 16 4018 15 11239 15 13098