Flink 从 0 到 1 学习 —— 第二章 快速上手

1.1 搭建 maven 工程 flink-scala

1.1.1 pom文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <parent>
        <artifactId>big-data-cloud</artifactId>
        <groupId>org.big.data</groupId>
        <version>1.0-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>

    <artifactId>flink</artifactId>
    <packaging>pom</packaging>
    <modules>
        <module>flink-java</module>
        <module>flink-scala</module>
        <module>flink-cep</module>
        <module>kkb-pro</module>
    </modules>

    <properties>
        <compiler.version>1.8</compiler.version>
        <flink.version>1.10.0</flink.version>
        <java.version>1.8</java.version>
        <hive.version>1.2.1</hive.version>
        <scala.binary.version>2.11</scala.binary.version>

        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <!--工具包依赖-->
        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>23.0</version>
        </dependency>
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.68</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.2</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.4</version>
        </dependency>
        <dependency>
            <groupId>com.jayway.jsonpath</groupId>
            <artifactId>json-path</artifactId>
            <version>2.4.0</version>
            <scope>compile</scope>
        </dependency>
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>2.9.9</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <!--state backend-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-sql-parser</artifactId>
            <version>${flink.version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-avro</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- flink-connector-redis -->
        <dependency>
            <groupId>org.apache.bahir</groupId>
            <artifactId>flink-connector-redis_2.11</artifactId>
            <version>1.0</version>
        </dependency>


        <!-- flink-connector-elasticsearch6 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
            <version>1.10.0</version>
        </dependency>


    </dependencies>
	<build>
        <plugins>
            <!-- 该插件用于将Scala代码编译成class文件 -->
            <plugin>
                <groupId>net.alchim31.maven</groupId>
                <artifactId>scala-maven-plugin</artifactId>
                <version>3.4.6</version>
                <executions>
                    <execution>
                        <!-- 声明绑定到maven的compile阶段 -->
                        <goals>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

1.1.2 添加 scala 框架 和 scala 文件夹

在这里插入图片描述

1.2 批处理 WordCount

package com.flink.scala

import org.apache.flink.api.scala._

object WordCount {
    
    
  def main(args: Array[String]): Unit = {
    
    
    // 创建执行环境
    val env = ExecutionEnvironment.getExecutionEnvironment
    // 从文件中读取数据
    val inputPath = "/words.txt"
    val inputDS: DataSet[String] = env.readTextFile(inputPath)
    // 分词之后,对单词进行 groupby 分组,然后用 sum 进行聚合
    val wordCountDS: AggregateDataSet[(String, Int)] = inputDS.flatMap(_.split(",")).map((_, 1)).groupBy(0).sum(1)
    // 打印输出
    wordCountDS.print()
  }
}

注意:Flink 程序支持 java 和 scala 两种语言,本课程中以 scala 语言为主。在 引入包中,有 java 和 scala 两种包时注意要使用 scala 的包。

1.3 流处理 WordCount

package com.flink.scala

import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.{
    
    DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
object StreamWordCount {
    
    

  def main(args: Array[String]): Unit = {
    
    
    // 从外部命令中获取参数
    val params: ParameterTool = ParameterTool.fromArgs(args)
    val host: String = params.get("host")
    val port: Int = params.getInt("port")
    // 创建流处理环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    // 接收 socket 文本流
    val textDstream: DataStream[String] = env.socketTextStream(host, port)
    // flatMap 和 Map 需要引用的隐式转换
    val dataStream: DataStream[(String, Int)] = textDstream.flatMap(_.split("\\s")).filter(_.nonEmpty).map((_, 1)).keyBy(0).sum(1)
    dataStream.print().setParallelism(1)
    // 启动 executor,执行任务
    env.execute("Socket stream word count")
  }
}

发送数据:


import org.apache.commons.lang3.RandomUtils;

import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;

public class NcServer implements Runnable{
    
    
    String[] words = {
    
    "java","python","spark","flink","hive","hadoop","hbase"};

    private Socket socket;


    public NcServer(Socket socket) {
    
    
        this.socket = socket;
    }


    public static void main(String[] args) throws Exception{
    
    
        ServerSocket serverSocket = new ServerSocket(8080);
        ExecutorService es = Executors.newSingleThreadExecutor();
        while (true){
    
    
            Socket socket = serverSocket.accept();
            System.out.println("接受请求");
            es.execute(new NcServer(socket));
        }
    }


    @Override
    public void run() {
    
    
        try {
    
    
            PrintWriter out = new PrintWriter(socket.getOutputStream(), true);
            while (true) {
    
    
                String word = words[RandomUtils.nextInt(0,words.length)];
                System.out.println(word);
                out.write(word+"\n");
                out.flush();
                TimeUnit.MILLISECONDS.sleep(1000);
            }
        }catch (Exception e){
    
    
            e.printStackTrace();
        }finally {
    
    
            try {
    
    
                socket.close();
            } catch (IOException e) {
    
    
                e.printStackTrace();
            }
        }
    }
}

猜你喜欢

转载自blog.csdn.net/dwjf321/article/details/109067904
今日推荐