Flink learns from 0 to 1-Chapter 2 Quick Start

1.1 Build maven project flink-scala

1.1.1 pom file

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>big-data-cloud</artifactId>
        <groupId>org.big.data</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>flink</artifactId>
    <packaging>pom</packaging>
    <modules>
        <module>flink-java</module>
        <module>flink-scala</module>
        <module>flink-cep</module>
        <module>kkb-pro</module>
    </modules>

    <properties>
        <compiler.version>1.8</compiler.version>
        <flink.version>1.10.0</flink.version>
        <java.version>1.8</java.version>
        <hive.version>1.2.1</hive.version>
        <scala.binary.version>2.11</scala.binary.version>

        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <!--工具包依赖-->
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>23.0</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.68</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.4</version>
        </dependency>
        <dependency>
            <groupId>com.jayway.jsonpath</groupId>
            <artifactId>json-path</artifactId>
            <version>2.4.0</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>2.9.9</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <!--state backend-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-sql-parser</artifactId>
            <version>${flink.version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-avro</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- flink-connector-redis -->
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.0</version>
        </dependency>


        <!-- flink-connector-elasticsearch6 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
            <version>1.10.0</version>
        </dependency>


    </dependencies>
	<build>
        <plugins>
            <!-- 该插件用于将Scala代码编译成class文件 -->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.4.6</version>
                <executions>
                    <execution>
                        <!-- 声明绑定到maven的compile阶段 -->
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

1.1.2 Add scala framework and scala folder

Insert picture description here

1.2 Batch WordCount

package com.flink.scala

import org.apache.flink.api.scala._

object WordCount {
    
    
  def main(args: Array[String]): Unit = {
    
    
    // 创建执行环境
    val env = ExecutionEnvironment.getExecutionEnvironment
    // 从文件中读取数据
    val inputPath = "/words.txt"
    val inputDS: DataSet[String] = env.readTextFile(inputPath)
    // 分词之后,对单词进行 groupby 分组,然后用 sum 进行聚合
    val wordCountDS: AggregateDataSet[(String, Int)] = inputDS.flatMap(_.split(",")).map((_, 1)).groupBy(0).sum(1)
    // 打印输出
    wordCountDS.print()
  }
}

Note: Flink programs support two languages, java and scala, and scala is the main language in this course. In the introduction of packages, when there are two packages, java and scala, pay attention to using the scala package.

1.3 Stream Processing WordCount

package com.flink.scala

import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.{
    
    DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
object StreamWordCount {
    
    

  def main(args: Array[String]): Unit = {
    
    
    // 从外部命令中获取参数
    val params: ParameterTool = ParameterTool.fromArgs(args)
    val host: String = params.get("host")
    val port: Int = params.getInt("port")
    // 创建流处理环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 接收 socket 文本流
    val textDstream: DataStream[String] = env.socketTextStream(host, port)
    // flatMap 和 Map 需要引用的隐式转换
    val dataStream: DataStream[(String, Int)] = textDstream.flatMap(_.split("\\s")).filter(_.nonEmpty).map((_, 1)).keyBy(0).sum(1)
    dataStream.print().setParallelism(1)
    // 启动 executor,执行任务
    env.execute("Socket stream word count")
  }
}

send data:


import org.apache.commons.lang3.RandomUtils;

import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class NcServer implements Runnable{
    
    
    String[] words = {
    
    "java","python","spark","flink","hive","hadoop","hbase"};

    private Socket socket;


    public NcServer(Socket socket) {
    
    
        this.socket = socket;
    }


    public static void main(String[] args) throws Exception{
    
    
        ServerSocket serverSocket = new ServerSocket(8080);
        ExecutorService es = Executors.newSingleThreadExecutor();
        while (true){
    
    
            Socket socket = serverSocket.accept();
            System.out.println("接受请求");
            es.execute(new NcServer(socket));
        }
    }


    @Override
    public void run() {
    
    
        try {
    
    
            PrintWriter out = new PrintWriter(socket.getOutputStream(), true);
            while (true) {
    
    
                String word = words[RandomUtils.nextInt(0,words.length)];
                System.out.println(word);
                out.write(word+"\n");
                out.flush();
                TimeUnit.MILLISECONDS.sleep(1000);
            }
        }catch (Exception e){
    
    
            e.printStackTrace();
        }finally {
    
    
            try {
    
    
                socket.close();
            } catch (IOException e) {
    
    
                e.printStackTrace();
            }
        }
    }
}

Guess you like

Origin blog.csdn.net/dwjf321/article/details/109067904
Recommended