Flink实时处理的第一个程序

从一个Socket端口中实时的读取数据,然后实时统计相同单词出现的次数,该程序会一直运行

注意:启动程序前先使用nc -lk 8888启动一个socket用来发送数据,否则会报错

java版本:

package cn.mydoit.day01;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;


/**
 * @Author: Zhang
 * @Description:
 * @Date: Created in 20:12 2020/10/9
 * @Modified By:
 */
public class StreamWordCount2 {
    
    
    public static void main(String[] args) throws Exception {
    
    

        //创建env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //调用source,创建DataStream
        //下面的DataStream是DataStream的子类
        DataStreamSource<String> lines = env.socketTextStream("doitedu03", 8888);

        //调用Transformation
        //flatMap中需要传入lambda表达式或者匿名实现类,这里使用匿名实现类
        SingleOutputStreamOperator<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
    
    
            @Override
            public void flatMap(String line, Collector<String> out) throws Exception {
    
    
                String[] words = line.split(" ");
                for (String word : words) {
    
    
                    out.collect(word);
                }
            }
        });

        //将单词和1组合起来
        SingleOutputStreamOperator<Tuple2<String, Integer>> wordAndOne = words.map(new MapFunction<String, Tuple2<String, Integer>>() {
    
    
            @Override
            public Tuple2<String, Integer> map(String word) throws Exception {
    
    
                return Tuple2.of(word, 1);
            }
        });

        //分组
        KeyedStream<Tuple2<String, Integer>, String> keyed = wordAndOne.keyBy(t -> t.f0);

        //聚合
        SingleOutputStreamOperator<Tuple2<String, Integer>> summed = keyed.sum(1);

        //调用sink
        summed.print();

        //启动 异常抛出去,不要自己捕获
        env.execute("StreamWordCount2");
        

    }
}

Scala版本:

package cn._51doit.flink.day01

import org.apache.flink.streaming.api.scala._

/**
 * @Auther Zhang
 * @Date 2020/10/9
 */
object StreamWordCount {
    
    

  def main(args: Array[String]): Unit = {
    
    

    //跟sparkStreaming进行对比,sparkStreaming首先需要创建一个StreamingContext
    //创建flink的执行环境(上下文)
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    //sparkSteaming要根据StreamingContext创建DStream
    //flink则是根据evn,调用Source方法创建DataStream
    val lines: DataStream[String] = env.socketTextStream("doitedu03", 8888)

    //调用transformation
    //切分压平,需要导入隐式转换
    val words: DataStream[String] = lines.flatMap(_.split(" "))
    //将单词和1组合在一起
    val wordAndOne: DataStream[(String, Int)] = words.map((_, 1))
    //分组聚合,按照单词进行聚合
    val keyed: KeyedStream[(String, Int), String] = wordAndOne.keyBy(_._1)
    //聚合,将单词的1相加
    val summed: DataStream[(String, Int)] = keyed.sum(1)

    //调用sink
    summed.print()

    //启动程序
    env.execute("StreamWordCount")
  }
}

猜你喜欢

转载自blog.csdn.net/weixin_43648241/article/details/108985912