十、MapReduce 分析网站基本指标

1、理解【网站基本指标】的几个概念
PV即page view 浏览量,页面的浏览次数,用户每打开一个页面就记录1次,多次打开同一页面,则浏览量累计。
UV 即Unique Visitor,独立访客数,1天内同一访客的多次访问只计为1个访客。
VV 即Visit View,访客的访问次数,记录所有访客1天内访问了多少次您的网站。
独立ip数即指1天内用不同的ip地址的用户访问网站的数量。同一ip不管访问了几个页面,独立ip数均为1。

分析需求,依据MapReduce 编程模板编程PV 程序

package org.apache.hadoop.mr01;
 
import java.io.IOException;
 
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class PvMr extends Configured implements Tool {
         public static class WebPvMapper extends
         Mapper<LongWritable,Text,IntWritable,IntWritable>{
         private final static IntWritable mapOutputValue = new IntWritable(1) ;
         private IntWritable mapOutputKey = new IntWritable();
 
         @Override
         public void map(LongWritable key, Text value, Context context)
                            throws IOException, InterruptedException {
                   String lineValue = value.toString();
                   //制表符分割
                   String[] values = lineValue.split("\\t") ;
                   if(30 > values.length){
                            return ;
                   }
                   String provinceIdValue = values[23] ;
                   String url = values[1] ;
                   //判断省id是否为空
                   if(StringUtils.isBlank(provinceIdValue)){
                            return ;
                   }
                   //判断url是否为空
                   if(StringUtils.isBlank(url)){
                            return ;
                   }
                  
                   int provinceId = Integer.MAX_VALUE;
                   try{
                            provinceId = Integer.valueOf(provinceIdValue) ;
                   }catch(Exception e){
                            return ;
                   }
                   //如果省Id为非整数,不是则跳过
                   if(Integer.MAX_VALUE == provinceId){
                            return ;
                   }
                   mapOutputKey.set(provinceId);
                   context.write(mapOutputKey, mapOutputValue);
         }
}
         public static class WebPvReducer extends
         Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
        
         private IntWritable outputValue = new IntWritable();
        
         @Override
         public void reduce(IntWritable key, Iterable<IntWritable> values,
                            Context context)
                            throws IOException, InterruptedException {
                   int sum = 0 ;
                   for(IntWritable value : values){
                            sum += value.get() ;
                   }
                   outputValue.set(sum);
                   context.write(key, outputValue);
         }
}
         public int run(String[] args) throws Exception {
                  
                   Configuration configuration = super.getConf() ;
                   Job job = Job.getInstance(configuration,this.getClass().getSimpleName());
                   job.setJarByClass(this.getClass());
                   Path inPath = new Path(args[0]) ;
                   FileInputFormat.addInputPath(job, inPath);
                   job.setMapperClass(WebPvMapper.class);
                   job.setMapOutputKeyClass(IntWritable.class);
                  job.setMapOutputValueClass(IntWritable.class);
                   job.setReducerClass(WebPvReducer.class);
                   job.setOutputKeyClass(IntWritable.class);
                   job.setOutputValueClass(IntWritable.class);
                   Path outPath = new Path(args[1]);
                   FileOutputFormat.setOutputPath(job, outPath);
                   boolean isSuccess = job.waitForCompletion(true);
                   return isSuccess ? 0 : 1 ;
         }
        
         public static void main(String[] args) throws Exception {
                  
                   Configuration configuration = new Configuration();
                   int status = ToolRunner.run(configuration, new PvMr(),args) ;
                   System.exit(status);
         }
}


3.自定义计数器,对MapReduce 进行DEBUG 测试
在要添加计数器的地方context.getCounter()





猜你喜欢

转载自blog.csdn.net/zipo/article/details/54946312
今日推荐