从一个Socket端口中实时的读取数据,然后实时统计相同单词出现的次数,该程序会一直运行
注意:启动程序前先使用nc -lk 8888启动一个socket用来发送数据,否则会报错
java版本:
package cn.mydoit.day01;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
/**
* @Author: Zhang
* @Description:
* @Date: Created in 20:12 2020/10/9
* @Modified By:
*/
public class StreamWordCount2 {
public static void main(String[] args) throws Exception {
//创建env
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//调用source,创建DataStream
//下面的DataStream是DataStream的子类
DataStreamSource<String> lines = env.socketTextStream("doitedu03", 8888);
//调用Transformation
//flatMap中需要传入lambda表达式或者匿名实现类,这里使用匿名实现类
SingleOutputStreamOperator<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String line, Collector<String> out) throws Exception {
String[] words = line.split(" ");
for (String word : words) {
out.collect(word);
}
}
});
//将单词和1组合起来
SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
@Override
public Tuple2<String, Integer> map(String word) throws Exception {
return Tuple2.of(word, 1);
}
});
//分组
KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);
//聚合
SingleOutputStreamOperator<Tuple2<String, Integer>> summed = keyed.sum(1);
//调用sink
summed.print();
//启动 异常抛出去,不要自己捕获
env.execute("StreamWordCount2");
}
}
Scala版本:
package cn._51doit.flink.day01
import org.apache.flink.streaming.api.scala._
/**
* @Auther Zhang
* @Date 2020/10/9
*/
object StreamWordCount {
def main(args: Array[String]): Unit = {
//跟sparkStreaming进行对比,sparkStreaming首先需要创建一个StreamingContext
//创建flink的执行环境(上下文)
val env = StreamExecutionEnvironment.getExecutionEnvironment
//sparkSteaming要根据StreamingContext创建DStream
//flink则是根据evn,调用Source方法创建DataStream
val lines: DataStream[String] = env.socketTextStream("doitedu03", 8888)
//调用transformation
//切分压平,需要导入隐式转换
val words: DataStream[String] = lines.flatMap(_.split(" "))
//将单词和1组合在一起
val wordAndOne: DataStream[(String, Int)] = words.map((_, 1))
//分组聚合,按照单词进行聚合
val keyed: KeyedStream[(String, Int), String] = wordAndOne.keyBy(_._1)
//聚合,将单词的1相加
val summed: DataStream[(String, Int)] = keyed.sum(1)
//调用sink
summed.print()
//启动程序
env.execute("StreamWordCount")
}
}