Spark finds the average number of clicks on a product

Average number of clicks on product

Find the average number of clicks on the product, that is, find the average number. Therefore, according to the MapReduce process, the data is converted into key-value pairs in the Mapper, and then shuffled, and then the key-value pairs are handed over to the reducer, where the product is clicked. Calculate the average number of times and output
the code implementation:

package com.ks.clickCount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

class CCMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
    
    private static Text goods = new Text();
    @Override
    protected void map(LongWritable key, Text value,Context context)
            throws IOException, InterruptedException {
    
    
        String val = value.toString();  //text类型转为string类型
        String arr[] = val.split("\t");
        goods.set(arr[0]);
        int click = Integer.parseInt(arr[1]);
        context.write(goods,new IntWritable(click));

    }
}

class CCReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
    
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values,Context context)
            throws IOException, InterruptedException {
    
    
        Iterator<IntWritable> iterator = values.iterator();
        //点击次数分组求和,以及求和次数,以便计算平均数
        int count = 0;
        int sum =0;
        while(iterator.hasNext()){
    
    
            int v = iterator.next().get();
            sum += v;
            count++;
        }
        int avg = sum/count;
        context.write(key, new IntWritable(avg));
    }
}

public class ClickCountMR {
    
    

    public static void main(String[] args) throws Exception {
    
    
        //使用默认的配置类
        Configuration conf = new Configuration();
        //conf.set("fs.defaultFS","hdfs://hdoop01:9000");
        // 创建一个Job
        Job job = Job.getInstance(conf);
        // 设置Job对应的参数: 主类
        job.setJarByClass(ClickCountMR.class);
        // 设置Job对应的参数: 设置自定义的Mapper和Reducer处理类
        job.setMapperClass(CCMapper1.class);
        job.setReducerClass(CCReducer1.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);
        job.setNumReduceTasks(1);
        // 设置Job对应的参数: Mapper输出key和value的类型:作业输入和输出的路径
        FileInputFormat.setInputPaths(job,args[0]);
        Path outputPath = new Path(args[1]);

        FileSystem fs = FileSystem.get(conf);
        if(fs.exists(outputPath)){
    
    
            fs.delete(outputPath,false);
        }

        FileOutputFormat.setOutputPath(job,outputPath);
        // 提交job
        boolean isDone = job.waitForCompletion(true);


        //成功后退出
        System.exit(isDone ? 0 : -1);

    }

}
mpletion(true);


        //成功后退出
        System.exit(isDone ? 0 : -1);

    }

}

Guess you like

Origin blog.csdn.net/qq_62127918/article/details/130414269