mapreduce data cleaning - the second stage

Result file data Description:

Ip: 106.39.41.166, (city)

Date: 10 / Nov / 2016: 00: 01: 02 +0800, (date)

Day: 10, (number of days)

Traffic: 54, (the flow rate)

Type: video, (Type: Video video or article article)

Id: 8701 (id video or article)

Testing requirements:

2 , the data processing:

· Top10 statistical popular video / article visits ( Video / Article This article was)

· According to the statistics of the most popular cities Top10 course ( ip)

· According to traffic statistics Top10 most popular courses ( traffic)

1、

package test4;

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

public class quchong {

public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException {

Job job = Job.getInstance();

job.setJobName("paixu");

job.setJarByClass(quchong.class);

job.setMapperClass(doMapper.class);

job.setReducerClass(doReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

Path in = new Path("hdfs://localhost:9000/test/in/result");

Path out = new Path("hdfs://localhost:9000/test/stage3/out1");

FileInputFormat.addInputPath(job,in);

FileOutputFormat.setOutputPath(job,out);

////

if(job.waitForCompletion(true)){

Job job2 = Job.getInstance();

job2.setJobName("paixu");

        job2.setJarByClass(quchong.class);  

        job2.setMapperClass(doMapper2.class);  

        job2.setReducerClass(doReduce2.class);  

        job2.setOutputKeyClass(IntWritable.class);  

        job2.setOutputValueClass(Text.class);  

        job2.setSortComparatorClass(IntWritableDecreasingComparator.class);

        job2.setInputFormatClass(TextInputFormat.class);  

        job2.setOutputFormatClass(TextOutputFormat.class);  

        Path in2=new Path("hdfs://localhost:9000/test/stage3/out1/part-r-00000");  

        Path out2=new Path("hdfs://localhost:9000/test/stage3/out2");

        FileInputFormat.addInputPath(job2,in2);  

        FileOutputFormat.setOutputPath(job2,out2);  

System.exit(job2.waitForCompletion(true) ? 0 : 1);

}

}

public static class doMapper extends Mapper<Object,Text,Text,IntWritable>{

public static final IntWritable one = new IntWritable(1);

public static Text word = new Text();

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

//StringTokenizer tokenizer = new StringTokenizer(value.toString(),"  ");

   String[] strNlist = value.toString().split(",");

  // String str=strNlist[3].trim();

   String str2=strNlist[4]+strNlist[5];

// Integer temp= Integer.valueOf(str);

word.set(str2);

//IntWritable abc = new IntWritable(temp);

context.write(word,one);

}

}

public static class doReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

private IntWritable result = new IntWritable();

@Override

protected void reduce(Text key,Iterable<IntWritable> values,Context context)

throws IOException,InterruptedException{

int sum = 0;

for (IntWritable value : values){

sum += value.get();

}

result.set(sum);

context.write(key,result);

}

}

/////////////////

public static class doMapper2 extends Mapper<Object , Text , IntWritable,Text>{

private static Text goods=new Text();  

    private static IntWritable num=new IntWritable();  

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

 String line=value.toString();  

    String arr[]=line.split(" ");

    num.set(Integer.parseInt(arr[1]));  

    goods.set(arr[0]);

    context.write(num,goods);

}

}

public static class doReduce2 extends Reducer< IntWritable, Text, IntWritable, Text>{  

    private static IntWritable result= new IntWritable();  

    int i=0;

    public void reduce(IntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException{  

        for(Text val:values){

         if(i<10)

         {

            context.write(key,val);

         i++;

         }

        }  

        }

        }

 private static class IntWritableDecreasingComparator extends IntWritable.Comparator {

 

     public int compare(WritableComparable a, WritableComparable b) {

         return -super.compare(a, b);

      }

     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

                return -super.compare(b1, s1, l1, b2, s2, l2);

       }

}

}

  

  (De-emphasis, and output visits)

(Sorting, output Top10)

2、

package test3;

 

import java.io.IOException;

 

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import test4.quchong.doMapper2;

import test4.quchong.doReduce2;

public class quchong {

public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException {

Job job = Job.getInstance();

job.setJobName("paixu");

job.setJarByClass(quchong.class);

job.setMapperClass(doMapper.class);

job.setReducerClass(doReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

Path in = new Path("hdfs://localhost:9000/test/in/result");

Path out = new Path("hdfs://localhost:9000/test/stage2/out1");

FileInputFormat.addInputPath(job,in);

FileOutputFormat.setOutputPath(job,out);

if(job.waitForCompletion(true)){

Job job2 = Job.getInstance();

job2.setJobName("paixu");

        job2.setJarByClass(quchong.class);  

        job2.setMapperClass(doMapper2.class);  

        job2.setReducerClass(doReduce2.class);  

        job2.setOutputKeyClass(IntWritable.class);  

        job2.setOutputValueClass(Text.class);  

        job2.setSortComparatorClass(IntWritableDecreasingComparator.class);

        job2.setInputFormatClass(TextInputFormat.class);  

        job2.setOutputFormatClass(TextOutputFormat.class);  

        Path in2=new Path("hdfs://localhost:9000/test/stage2/out1/part-r-00000");  

        Path out2=new Path("hdfs://localhost:9000/test/stage2/out2");

        FileInputFormat.addInputPath(job2,in2);  

        FileOutputFormat.setOutputPath(job2,out2);  

System.exit(job2.waitForCompletion(true) ? 0 : 1);

}

}

public static class doMapper extends Mapper<Object,Text,Text,IntWritable>{

public static final IntWritable one = new IntWritable(1);

public static Text word = new Text();

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

//StringTokenizer tokenizer = new StringTokenizer(value.toString(),"  ");

   String[] strNlist = value.toString().split(",");

  // String str=strNlist[3].trim();

   String str2=strNlist[0];

// Integer temp= Integer.valueOf(str);

word.set(str2);

//IntWritable abc = new IntWritable(temp);

context.write(word,one);

}

}

public static class doReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

private IntWritable result = new IntWritable();

@Override

protected void reduce(Text key,Iterable<IntWritable> values,Context context)

throws IOException,InterruptedException{

int sum = 0;

for (IntWritable value : values){

sum += value.get();

}

result.set(sum);

context.write(key,result);

}

}

////////////////

public static class doMapper2 extends Mapper<Object , Text , IntWritable,Text>{

private static Text goods=new Text();  

    private static IntWritable num=new IntWritable();  

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

 String line=value.toString();  

    String arr[]=line.split(" ");

    num.set(Integer.parseInt(arr[1]));  

    goods.set(arr[0]);

    context.write(num,goods);

}

}

public static class doReduce2 extends Reducer< IntWritable, Text, IntWritable, Text>{  

    private static IntWritable result= new IntWritable();  

    int i=0;

    public void reduce(IntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException{  

        for(Text val:values){

         if(i<10)

         {

            context.write(key,val);

         i++;

         }

        }  

        }

        }

 private static class IntWritableDecreasingComparator extends IntWritable.Comparator {

 

     public int compare(WritableComparable a, WritableComparable b) {

         return -super.compare(a, b);

      }

     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

                return -super.compare(b1, s1, l1, b2, s2, l2);

       }

}

}

  

  (De-emphasis display ip times)

(Sorting, output Top10)

3、

package test2;

 

import java.io.IOException;

import java.text.SimpleDateFormat;

import java.util.Date;

import java.util.Locale;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Reducer.Context;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

 

import test3.quchong;

import test3.quchong.doMapper2;

import test3.quchong.doReduce2;

public class paixu {

public static void main(String[] args) throws IOException,ClassNotFoundException,InterruptedException {

Job job = Job.getInstance();

job.setJobName("paixu");

job.setJarByClass(paixu.class);

job.setMapperClass(doMapper.class);

job.setReducerClass(doReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

Path in = new Path("hdfs://localhost:9000/test/in/result");

Path out = new Path("hdfs://localhost:9000/test/stage1/out1");

FileInputFormat.addInputPath(job,in);

FileOutputFormat.setOutputPath(job,out);

if(job.waitForCompletion(true)){

Job job2 = Job.getInstance();

job2.setJobName("paixu");

        job2.setJarByClass(quchong.class);  

        job2.setMapperClass(doMapper2.class);  

        job2.setReducerClass(doReduce2.class);  

        job2.setOutputKeyClass(IntWritable.class);  

        job2.setOutputValueClass(Text.class);  

        job2.setSortComparatorClass(IntWritableDecreasingComparator.class);

        job2.setInputFormatClass(TextInputFormat.class);  

        job2.setOutputFormatClass(TextOutputFormat.class);  

        Path in2=new Path("hdfs://localhost:9000/test/stage1/out1/part-r-00000");  

        Path out2=new Path("hdfs://localhost:9000/test/stage1/out2");

        FileInputFormat.addInputPath(job2,in2);  

        FileOutputFormat.setOutputPath(job2,out2);  

System.exit(job2.waitForCompletion(true) ? 0 : 1);

}

}

public static class doMapper extends Mapper<Object,Text,Text,IntWritable>{

public static final IntWritable one = new IntWritable(1);

public static Text word = new Text();

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

//StringTokenizer tokenizer = new StringTokenizer(value.toString(),"  ");

   String[] strNlist = value.toString().split(",");

   String str=strNlist[3].trim();

   String str2=strNlist[4]+strNlist[5];

 Integer temp= Integer.valueOf(str);

word.set(str2);

IntWritable abc = new IntWritable(temp);

context.write(word,abc);

}

}

public static class doReducer extends Reducer<Text,IntWritable,Text,IntWritable>{

private IntWritable result = new IntWritable();

@Override

protected void reduce(Text key,Iterable<IntWritable> values,Context context)

throws IOException,InterruptedException{

int sum = 0;

for (IntWritable value : values){

sum += value.get();

}

result.set(sum);

context.write(key,result);

}

}

/////////////

public static class doMapper2 extends Mapper<Object , Text , IntWritable,Text>{

private static Text goods=new Text();  

    private static IntWritable num=new IntWritable();  

@Override

protected void map(Object key, Text value, Context context)

throws IOException,InterruptedException {

 String line=value.toString();  

    String arr[]=line.split(" ");

    num.set(Integer.parseInt(arr[1]));  

    goods.set(arr[0]);

    context.write(num,goods);

}

}

public static class doReduce2 extends Reducer< IntWritable, Text, IntWritable, Text>{  

    private static IntWritable result= new IntWritable();  

    int i=0;

    public void reduce(IntWritable key,Iterable<Text> values,Context context) throws IOException, InterruptedException{  

        for(Text val:values){

         if(i<10)

         {

            context.write(key,val);

         i++;

         }

        }  

        }

        }

 private static class IntWritableDecreasingComparator extends IntWritable.Comparator {

 

     public int compare(WritableComparable a, WritableComparable b) {

         return -super.compare(a, b);

      }

     public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

                return -super.compare(b1, s1, l1, b2, s2, l2);

       }

}

}

  

  (De-emphasis, shows the total flow)

(Sorting, output Top10)

 

to sum up:

Originally, I was achieved through two classes, then I can find multiple classes in a job, I'll define two job, job1 de-heavy, the total output. job2 sort output Top10.

In for: when (Intwritable val values) traversed by primary key ascending to traverse, but the results we need is descending, so here we need to introduce a comparator.

 

 

Guess you like

Origin www.cnblogs.com/jccjcc/p/11862920.html