Directorio de artículos
1.1 Crear proyecto maven flink-scala
1.1.1 archivo pom
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>big-data-cloud</artifactId>
<groupId>org.big.data</groupId>
<version>1.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>flink</artifactId>
<packaging>pom</packaging>
<modules>
<module>flink-java</module>
<module>flink-scala</module>
<module>flink-cep</module>
<module>kkb-pro</module>
</modules>
<properties>
<compiler.version>1.8</compiler.version>
<flink.version>1.10.0</flink.version>
<java.version>1.8</java.version>
<hive.version>1.2.1</hive.version>
<scala.binary.version>2.11</scala.binary.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!--工具包依赖-->
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>23.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.68</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.4</version>
</dependency>
<dependency>
<groupId>com.jayway.jsonpath</groupId>
<artifactId>json-path</artifactId>
<version>2.4.0</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.9</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<!--state backend-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-sql-parser</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-avro</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka-0.11_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- flink-connector-redis -->
<dependency>
<groupId>org.apache.bahir</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<version>1.0</version>
</dependency>
<!-- flink-connector-elasticsearch6 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.10.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- 该插件用于将Scala代码编译成class文件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution>
<!-- 声明绑定到maven的compile阶段 -->
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
1.1.2 Agregar marco scala y carpeta scala
1.2 Lote WordCount
package com.flink.scala
import org.apache.flink.api.scala._
object WordCount {
def main(args: Array[String]): Unit = {
// 创建执行环境
val env = ExecutionEnvironment.getExecutionEnvironment
// 从文件中读取数据
val inputPath = "/words.txt"
val inputDS: DataSet[String] = env.readTextFile(inputPath)
// 分词之后,对单词进行 groupby 分组,然后用 sum 进行聚合
val wordCountDS: AggregateDataSet[(String, Int)] = inputDS.flatMap(_.split(",")).map((_, 1)).groupBy(0).sum(1)
// 打印输出
wordCountDS.print()
}
}
Nota: Los programas Flink admiten dos idiomas, java y scala, y scala es el idioma principal de este curso. En la introducción de paquetes, cuando hay dos paquetes, java y scala, preste atención al uso del paquete scala.
1.3 Procesamiento de flujo de WordCount
package com.flink.scala
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala.{
DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
object StreamWordCount {
def main(args: Array[String]): Unit = {
// 从外部命令中获取参数
val params: ParameterTool = ParameterTool.fromArgs(args)
val host: String = params.get("host")
val port: Int = params.getInt("port")
// 创建流处理环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
// 接收 socket 文本流
val textDstream: DataStream[String] = env.socketTextStream(host, port)
// flatMap 和 Map 需要引用的隐式转换
val dataStream: DataStream[(String, Int)] = textDstream.flatMap(_.split("\\s")).filter(_.nonEmpty).map((_, 1)).keyBy(0).sum(1)
dataStream.print().setParallelism(1)
// 启动 executor,执行任务
env.execute("Socket stream word count")
}
}
enviar datos:
import org.apache.commons.lang3.RandomUtils;
import java.io.*;
import java.net.ServerSocket;
import java.net.Socket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
public class NcServer implements Runnable{
String[] words = {
"java","python","spark","flink","hive","hadoop","hbase"};
private Socket socket;
public NcServer(Socket socket) {
this.socket = socket;
}
public static void main(String[] args) throws Exception{
ServerSocket serverSocket = new ServerSocket(8080);
ExecutorService es = Executors.newSingleThreadExecutor();
while (true){
Socket socket = serverSocket.accept();
System.out.println("接受请求");
es.execute(new NcServer(socket));
}
}
@Override
public void run() {
try {
PrintWriter out = new PrintWriter(socket.getOutputStream(), true);
while (true) {
String word = words[RandomUtils.nextInt(0,words.length)];
System.out.println(word);
out.write(word+"\n");
out.flush();
TimeUnit.MILLISECONDS.sleep(1000);
}
}catch (Exception e){
e.printStackTrace();
}finally {
try {
socket.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}