Process window - Flink exemplary base

Window flow

WindowedStream pure API configuration, runtime WindowedStream to the KeyedStream merge operations on a window and operation.

 

aggregate

By a field or by location (tuple) polymerization convection / packet

  private def aggregate(aggregationType: AggregationType, field: String): DataStream[T] = {
    val position = fieldNames2Indices(getInputType(), Array(field))(0)
    aggregate(aggregationType, position)
  }

  def aggregate(aggregationType: AggregationType, position: Int): DataStream[T] = {

    val jStream = javaStream.asInstanceOf[JavaWStream[Product, K, W]]

    val reducer = aggregationType match {
      case AggregationType.SUM =>
        new SumAggregator(position, jStream.getInputType, jStream.getExecutionEnvironment.getConfig)

      case _ =>
        new ComparableAggregator(
          position,
          jStream.getInputType,
          aggregationType,
          true,
          jStream.getExecutionEnvironment.getConfig)
    }

    new DataStream[Product](jStream.reduce(reducer)).asInstanceOf[DataStream[T]]
  }

Aggregation function abstract class

public abstract class AggregationFunction<T> implements ReduceFunction<T> {
	private static final long serialVersionUID = 1L;

	/**
	 * Aggregation types that can be used on a windowed stream or keyed stream.
	 */
	public enum AggregationType {
		SUM, MIN, MAX, MINBY, MAXBY,
	}
}

Implement the abstract class has SumAggregator, ComparableAggregator

For example SumAggregator, ReduceFunction implements the interface (through inheritance over abstract class AggregationFunction)

public class SumAggregator<T> extends AggregationFunction<T> {

	private static final long serialVersionUID = 1L;

	private final FieldAccessor<T, Object> fieldAccessor;
	private final SumFunction adder;
	private final TypeSerializer<T> serializer;
	private final boolean isTuple;

	public SumAggregator(int pos, TypeInformation<T> typeInfo, ExecutionConfig config) {
		fieldAccessor = FieldAccessorFactory.getAccessor(typeInfo, pos, config);
		adder = SumFunction.getForClass(fieldAccessor.getFieldType().getTypeClass());
		if (typeInfo instanceof TupleTypeInfo) {
			isTuple = true;
			serializer = null;
		} else {
			isTuple = false;
			this.serializer = typeInfo.createSerializer(config);
		}
	}

	public SumAggregator(String field, TypeInformation<T> typeInfo, ExecutionConfig config) {
		fieldAccessor = FieldAccessorFactory.getAccessor(typeInfo, field, config);
		adder = SumFunction.getForClass(fieldAccessor.getFieldType().getTypeClass());
		if (typeInfo instanceof TupleTypeInfo) {
			isTuple = true;
			serializer = null;
		} else {
			isTuple = false;
			this.serializer = typeInfo.createSerializer(config);
		}
	}

	@Override
	@SuppressWarnings("unchecked")
	public T reduce(T value1, T value2) throws Exception {
		if (isTuple) {
			Tuple result = ((Tuple) value1).copy();
			return fieldAccessor.set((T) result, adder.add(fieldAccessor.get(value1), fieldAccessor.get(value2)));
		} else {
			T result = serializer.copy(value1);
			return fieldAccessor.set(result, adder.add(fieldAccessor.get(value1), fieldAccessor.get(value2)));
		}
	}
}

Common derivation method

sum

def sum(position: Int): DataStream[T] = aggregate(AggregationType.SUM, position)

maxBy

def maxBy(position: Int): DataStream[T] = aggregate(AggregationType.MAXBY, position)

Example code snippet

sum

val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase().split("\\W+"))
  .filter(_.nonEmpty)
  .map((_, 1))
  .keyBy(0)
  .countWindow(windowSize, slideSize)
  .sum(1)

maxBy

val counts: DataStream[(String, Int)] = text.flatMap(_.toLowerCase().split("\\W+"))
  .filter(_.nonEmpty)
  .map((_, 1))
  .keyBy(0)
  .countWindow(windowSize, slideSize)
  .maxBy(1)

 

reduce

The same is applied separately for the handler set a key element of

def reduce(function: (T, T) => T): DataStream[T] = {
    if (function == null) {
      throw new NullPointerException("Reduce function must not be null.")
    }
    val cleanFun = clean(function)
    val reducer = new ScalaReduceFunction[T](cleanFun)
    reduce(reducer)
  }

Interface also implements ReduceFunction

final class ScalaReduceFunction[T](private[this] val function: (T, T) => T)
    extends ReduceFunction[T] {
  
  @throws(classOf[Exception])
  override def reduce(a: T, b: T): T = {
    function(a, b)
  }
}

Source call

// file: org.apache.flink.streaming.api.datastream.WindowedStream

public <R> SingleOutputStreamOperator<R> reduce(
		ReduceFunction<T> reduceFunction,
		WindowFunction<T, R, K, W> function,
		TypeInformation<R> resultType) {

	if (reduceFunction instanceof RichFunction) {
		throw new UnsupportedOperationException("ReduceFunction of reduce can not be a RichFunction.");
	}

	//clean the closures
	function = input.getExecutionEnvironment().clean(function);
	reduceFunction = input.getExecutionEnvironment().clean(reduceFunction);

	final String opName = generateOperatorName(windowAssigner, trigger, evictor, reduceFunction, function);
	KeySelector<T, K> keySel = input.getKeySelector();

	OneInputStreamOperator<T, R> operator;

	if (evictor != null) {
		@SuppressWarnings({"unchecked", "rawtypes"})
		TypeSerializer<StreamRecord<T>> streamRecordSerializer =
			(TypeSerializer<StreamRecord<T>>) new StreamElementSerializer(input.getType().createSerializer(getExecutionEnvironment().getConfig()));

		ListStateDescriptor<StreamRecord<T>> stateDesc =
			new ListStateDescriptor<>("window-contents", streamRecordSerializer);

		operator =
			new EvictingWindowOperator<>(windowAssigner,
				windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
				keySel,
				input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()),
				stateDesc,
				new InternalIterableWindowFunction<>(new ReduceApplyWindowFunction<>(reduceFunction, function)),
				trigger,
				evictor,
				allowedLateness,
				lateDataOutputTag);

	} else {
		ReducingStateDescriptor<T> stateDesc = new ReducingStateDescriptor<>("window-contents",
			reduceFunction,
			input.getType().createSerializer(getExecutionEnvironment().getConfig()));

		operator =
			new WindowOperator<>(windowAssigner,
				windowAssigner.getWindowSerializer(getExecutionEnvironment().getConfig()),
				keySel,
				input.getKeyType().createSerializer(getExecutionEnvironment().getConfig()),
				stateDesc,
				new InternalSingleValueWindowFunction<>(function),
				trigger,
				allowedLateness,
				lateDataOutputTag);
	}

	return input.transform(opName, resultType, operator);
}

Example code snippet

stream.keyBy(0)
  .timeWindow(Time.of(2500, TimeUnit.MILLISECONDS), Time.of(500, TimeUnit.MILLISECONDS))
  .reduce((value1, value2) => (value1._1, value1._2 + value2._2))
  .addSink(new SinkFunction[(Long, Long)] {})

 

process

 233

 

apply

233

Guess you like

Origin www.cnblogs.com/lemos/p/12640664.html