[Flink] # 11_ application development quick_start

Development steps

  1. Get an execution environment
  2. Load / create initialization data
  3. Transaction data designating operation Operators
  4. Specifies the calculated data storage position
  5. Call the execute () trigger the execution of the program

Flink stream processing development


import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.runtime.state.memory.MemoryStateBackend;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;


public class SocketWindowWordCountJava {
    public static void main(String[] args) throws Exception{
        //获取需要的端口号
        int port;
        try {
            ParameterTool parameterTool = ParameterTool.fromArgs(args);
            port = parameterTool.getInt("port");
        }catch (Exception e){
            System.err.println("No port set. use default port 9000--Java");
            port = 9000;
        }

        //获取Flink的运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        String hostname = "localhost";
        String delimiter = "\n";
        //连接Socket获取输入的数据
        DataStreamSource<String> text = env.socketTextStream(hostname, port, delimiter);

        // a a c

        // a 1
        // a 1
        // c 1
        DataStream<WordWithCount> windowCounts = text.flatMap(new FlatMapFunction
                <String, WordWithCount>() {
            public void flatMap(String value, Collector<WordWithCount> out) throws
                    Exception {
                String[] splits = value.split("\\s");
                for (String word : splits) {
                    out.collect(new WordWithCount(word, 1L));
                }
            }
        }).keyBy("word")
                .timeWindow(Time.seconds(2), Time.seconds(1))//指定时间窗口大小为2s,指定时间间隔为1s
                .sum("count");//在这里使用sum或者reduce都可以
                /*.reduce(new ReduceFunction<WordWithCount>() {
                                    public WordWithCount reduce(WordWithCount a,
WordWithCount b) throws Exception {

                                        return new WordWithCount(a.word,a.count+b.count);
                                    }
                                })*/
        //把数据打印到控制台并且设置并行度
        windowCounts.print().setParallelism(1);
        //这一行代码一定要实现,否则程序不执行
        env.execute("Socket window count");

    }

    public static class WordWithCount{
        public String word;
        public long count;
        public  WordWithCount(){}
        public WordWithCount(String word,long count){
            this.word = word;
            this.count = count;
        }
        @Override
        public String toString() {
            return "WordWithCount{" +
                    "word='" + word + '\'' +
                    ", count=" + count +
                    '}';
        }
    }
}


import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.windowing.time.Time

/**
 * 单词计数之滑动窗口计算
 *
 */
object SocketWindowWordCountScala {

    def main(args: Array[String]): Unit = {

        //获取Socket端口号
        val port: Int = try {
            ParameterTool.fromArgs(args).getInt("port")
        }catch {
            case e: Exception => {
                System.err.println("No port set. use default port 9000--Scala")
            }
                9000
        }

        //获取运行环境
        val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

        //连接Socket获取输入数据
        val text = env.socketTextStream("localhost",port,'\n')

        //解析数据(把数据打平),分组,窗口计算,并且聚合求sum

        //注意:必须要添加这一行隐式转行,否则下面的FlatMap方法执行会报错
        import org.apache.flink.api.scala._

        val windowCounts = text.flatMap(line => line.split("\\s"))//打平,把每一行单词都切开
          .map(w => WordWithCount(w,1))//把单词转成word , 1这种形式
          .keyBy("word")//分组
          .timeWindow(Time.seconds(2),Time.seconds(1))//指定窗口大小,指定间隔时间
          .sum("count");// sum或者reduce都可以
        //.reduce((a,b)=>WordWithCount(a.word,a.count+b.count))

        //打印到控制台
        windowCounts.print().setParallelism(1);

        //执行任务
        env.execute("Socket window count");

    }

    case class WordWithCount(word: String,count: Long)

}

Flink Batch Development


package batch;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.operators.DataSource;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

/**
 *单词计数之离线计算
 *
 */
public class BatchWordCountJava {

    public static void main(String[] args) throws Exception{
        String inputPath = "/Users/eric/Desktop/flink-train/FlinkTech/src/main/resources/data/input";
        String outPath = "/Users/eric/Desktop/flink-train/FlinkTech/src/main/resources/data/result";

        //获取运行环境
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        //获取文件中的内容
        DataSource<String> text = env.readTextFile(inputPath);

        DataSet<Tuple2<String, Integer>> counts = text.flatMap(new Tokenizer()).groupBy(0).sum(1);
        counts.writeAsCsv(outPath,"\n"," ").setParallelism(1);
        env.execute("batch word count");

    }

    public static class Tokenizer implements FlatMapFunction<String,Tuple2<String,
            Integer>>{
        public void flatMap(String value, Collector<Tuple2<String, Integer>> out)
                throws Exception {
            String[] tokens = value.toLowerCase().split("\\W+");
            for (String token: tokens) {
                if(token.length()>0){
                    out.collect(new Tuple2<String, Integer>(token,1));
                }
            }
        }
    }
}
package batch

import org.apache.flink.api.scala.ExecutionEnvironment

/**
 * 单词计数之离线计算
 */
object BatchWordCountScala {

    def main(args: Array[String]): Unit = {
        val inputPath = "/Users/eric/Desktop/flink-train/FlinkTech/src/main/resources/data/input"
        val outPut = "/Users/eric/Desktop/flink-train/FlinkTech/src/main/resources/data/result"

        val env = ExecutionEnvironment.getExecutionEnvironment
        val text = env.readTextFile(inputPath)

        //引入隐式转换
        import org.apache.flink.api.scala._

        val counts = text.flatMap(_.toLowerCase.split("\\W+"))
          .filter(_.nonEmpty)
          .map((_,1))
          .groupBy(0)
          .sum(1)
        counts.writeAsCsv(outPut,"\n"," ").setParallelism(1)
        env.execute("batch word count")
    }

}

Published 78 original articles · won praise 0 · Views 1418

Guess you like

Origin blog.csdn.net/qq_30782921/article/details/102819396
Recommended