Finlk之程序员甩锅必杀技门清
java代码实现数据条数统计
package counts;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.configuration.Configuration;
import java.util.ArrayList;
public class JavaCounts {
public static void main(String[] args) throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
ArrayList<String> words = new ArrayList<>();
for (int i = 97; i <102 ; i++) {
words.add(String.valueOf((char) i));
}
DataSource<String> data = env.fromCollection(words);
MapOperator<String, String> result = data.map(new RichMapFunction<String, String>() {
LongCounter counter = new LongCounter();
@Override
public void open(Configuration parameters) throws Exception {
getRuntimeContext().addAccumulator("counterName", counter);
}
@Override
public String map(String value) throws Exception {
counter.add(1);
return value;
}
}).setParallelism(4);
result.writeAsText("./out");
JobExecutionResult counts = env.execute("Counts");
Object total = counts.getAccumulatorResult("counterName");
System.out.println("数据总共: "+total+"条");
}
}
Scala代码实现数据条数统计
package counts
import org.apache.flink.api.common.JobExecutionResult
import org.apache.flink.api.common.accumulators.LongCounter
import org.apache.flink.api.common.functions.{IterationRuntimeContext, RichFunction, RichMapFunction, RuntimeContext}
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
object Counts {
def main(args: Array[String]): Unit = {
val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
val words: DataSet[String] = env.fromElements("a","b","c","d","e")
val out: DataSet[String] = words.map(new RichMapFunction[String, String]() {
val counter: LongCounter = new LongCounter()
override def open(parameters: Configuration): Unit = {
getRuntimeContext.addAccumulator("counterName", counter)
}
override def map(value: String) = {
counter.add(1)
value
}
}).setParallelism(4)
out.writeAsText("./out2")
val result: JobExecutionResult = env.execute("Counts")
val total: Long = result.getAccumulatorResult[Long]("counterName")
println("数据总共: "+total+"条")
}
}