版权声明:个人 https://blog.csdn.net/csdnmrliu/article/details/82837622
1. MapReduce中自定义计数器(枚举方式)
1.1 首先定义并使用计数器
//第一种 组名+计数器名
context.getCounter(groupName, counterName).increment(1);//参数类型为String
//第二种 Enum
context.getCounter(counterName).increment(1);//参数类型为Enum
//使用,改变计数器的值
increment(long incr)
public enum LogProcessorCounter {
//组名_名称
Test_Count;
}
1.2 在ApplicationMain中Job完成后获取计数器的值
//本次Job所有的计数器(包括MapReduce自带的和自定义的计数器)
Counters counters=locatorJob.getCounters();
//获取指定计数器
Counter counter1 = counters.findCounter(groupName, counterName);
Counter counter2 = counters.findCounter(LogProcessorCounter.Test_Count);
//获取计数器的值
long value = counter1.getValue();
2. Spark中使用累加器
spark版本为2.3.1
注:在使用累加器的过程中如果对于spark的执行过程理解的不够深入就会遇到两类典型的错误:少加(或者没加)、多加。
少加:没有触发action算子
多加:累加器放在 transformations 算子,多次计算。应把多次计算的RDD调用cache方法(或persist),把数据缓存下来。
2.1 使用scala开发spark程序
2.1.1数值型累加器(spark内置提供的long和double累加器)
val longAccum = sc.longAccumulator("count")
val doubleAccum = sc.doubleAccumulator("ratio")
//改变值
longAccum.add(1L)
doubleAccum.add(1D)
//获取值
LOGGER.warn("longAccum value == " + longAccum.value)
LOGGER.warn("doubleAccum value == " + doubleAccum.value)
2.2 使用Java开发spark
// 定义double类型的累加器
Accumulator<Double> doubleAccumulator = sc.doubleAccumulator(Double.MIN_VALUE, "doubleAccum");
// 定义Long类型的累加器
Accumulator<Long> longAccum = sc.accumulator(Long.MIN_VALUE, "longAccum" , new AccumulatorParam<Long>(){
private static final long serialVersionUID = 770193566372144994L;
@Override
public Long addInPlace(Long init, Long value) {
// TODO Auto-generated method stub
return init + value;
}
@Override
public Long zero(Long init) {
// TODO Auto-generated method stub
return init;
}
@Override
public Long addAccumulator(Long value, Long step) {
// TODO Auto-generated method stub
return value + step;
}
});
//更改值
doubleAccumulator.add(1D);
longAccum.add(1L);
//获取值
LOG.warn("doubleAccumulator value == " + doubleAccumulator.value());
LOG.warn("doubleAccumulator value == " + doubleAccumulator.name().get());
LOG.warn("longAccum value == " + longAccum.value());
LOG.warn("longAccum value == " + longAccum.name().get());
3.小工具(将属于同一组的计数器组装在JavaBean中)
package tools;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.log4j.Logger;
import enums.LogCounter;
/**
* 健康监控工具
*
* @author 15257
*
*/
public class MonitorUtil {
private static final Logger LOGGER = Logger.getLogger(MonitorUtil.class);
/**
* 生成日志Bean
*
* @param clazz
* 日志Bean class
* @param counterEnumType
* 日志枚举类 该方法适用把所有指标放到一个枚举中 需指定prefixs 可以有多个prefixs.. 当不知道prefix时,
* 多个bean中有相同属性但不是一个类别时可能会导致计数器置为0
* @param counters
* Job的所有计数器
* @param prefixs
* 组名(可以有多个。建议指定)
* @return
*/
public static <T, E extends Enum<E>> T generateLogBean(Class<T> clazz, Class<E> counterEnumType, Counters counters,
String... prefixs) {
List<E> counterEnumList = fetchCounterEnums(counterEnumType, prefixs);
try {
T t = clazz.newInstance();
Field[] fields = clazz.getDeclaredFields();// JavaBean对象属性数组
for (Field field : fields) {
for (E e : counterEnumList) {
if (field.getName().equalsIgnoreCase(e.name().substring(e.name().indexOf("_") + 1))) {
boolean flag = field.isAccessible();
field.setAccessible(true);
field.set(t, counters.findCounter(e).getValue());
field.setAccessible(flag);
}
}
}
return t;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
/**
*
* @param clazz
* 日志Bean class
* @param counters
* Job的所有计数器
* @param counterEnumTypes
* 日志枚举类(日志枚举类 该方法适用把日志枚举放在接口中分类管理)
* @return
*/
public static <T, E extends Enum<E>> T generateLogBean(Class<T> clazz, Counters counters,
Class<E>... counterEnumTypes) {
List<E> counterEnumList = fetchCounterEnums(counterEnumTypes);
try {
T t = clazz.newInstance();
Field[] fields = clazz.getDeclaredFields();// JavaBean对象属性数组
for (Field field : fields) {
for (E e : counterEnumList) {
if (field.getName().equalsIgnoreCase(e.name())) {
boolean flag = field.isAccessible();
field.setAccessible(true);
field.set(t, counters.findCounter(e).getValue());
field.setAccessible(flag);
}
}
}
return t;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
/**
*
* @param counterEnumType
* 计数器枚举Class LogProcessorCounter.class
* @param prefixs
* 计数器组(不区分大小写) TEST
* @return
*/
public static <T extends Enum<T>> List<T> fetchCounterEnums(Class<T> counterEnumType, String... prefixs) {
List<T> counterEnumList = null;
try {
T[] enumConstants = counterEnumType.getEnumConstants();
if (prefixs.length == 0) {
LOGGER.warn("prefixs is empty... Bean的属性可能会在计数器中重复出现,可能导致计数结果为0");
return Arrays.asList(enumConstants);
}
counterEnumList = new ArrayList<>();
for (String prefix : prefixs) {
for (T t : enumConstants) {
if (prefix.equalsIgnoreCase(t.name().substring(0, t.name().indexOf("_")))) {
counterEnumList.add(t);
}
}
}
return counterEnumList;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
public static <T extends Enum<T>> List<T> fetchCounterEnums(Class<T>... counterEnumTypes) {
if (counterEnumTypes.length == 0) {
LOGGER.warn("counterEnumType cannot be empty...");
return null;
}
List<T> counterEnumList = null;
try {
counterEnumList = new ArrayList<>();
for (Class<T> counterEnumType : counterEnumTypes) {
T[] enumConstants = counterEnumType.getEnumConstants();
for (T t : enumConstants) {
counterEnumList.add(t);
}
}
return counterEnumList;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return null;
}
}