package jishuqi;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/*
* 统计不合法的数据条数 通过全局计数器 全局计数器都在控制台输出
* ------用于统计数据中特定的信息
*/
public class MissFileds {
static class MyMapper extends Mapper<LongWritable, Text, NullWritable, NullWritable>{
/**
* LongWritable key,偏移量
* Text value,一行内容
Context context 上下文对象 传输 job运行过程中上文传参数
*/
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, NullWritable, NullWritable>.Context context)
throws IOException, InterruptedException {
//取出你每一条数据
String[] split = value.toString().split(",");
//进行判断 如果长度为3 证明 数据完美 否则数据残缺
if(split.length<3){
//将残缺的数据记录在全局计数器中
//取出全局计数器
Counter counter = context.getCounter(MissCounter.Miss_Field_Lines);
/*
Increment this counter by the given value
* @param incr the value to increase this counter by
*/
//void increment(long incr); 参数代表需要增加的值 类似于 +=incr
counter.increment(1L);
}
}
}
//Driver
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
System.setProperty("HADOOP_USER_NAME", "hadoop");
Configuration conf=new Configuration();
conf.set("fs.defaultFS", "hdfs://hadoop02:9000");
Job job=Job.getInstance(conf);
job.setJarByClass(MissFileds.class);
//只需一个mapper就可以了
job.setMapperClass(MyMapper.class);
//设置map的输出
job.setOutputKeyClass(NullWritable.class); //没有数据输出
job.setOutputValueClass(NullWritable.class);
//如果不需要reducetask 这里请设置为0 否则默认执行一个reducetask任务
job.setNumReduceTasks(0);
//可以指定多个输入路径
FileInputFormat.setInputPaths(job, new Path("/friendout_01"));
//输出路径需要吗? 不设置 报错Output directory not set.
FileOutputFormat.setOutputPath(job, new Path("/counter_out01"));
job.waitForCompletion(true);
}
}
自定义一个枚举类
package jishuqi;
/**
* 枚举类定义全局变量
* @author Administrator
*/
public enum MissCounter {
Miss_Field_Lines
}