有不懂及不对的地方请大家提出,共同探讨与进步
1.搭建maven工程
这里就不多说了
2.导入依赖
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.7.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<!-- 该插件用于将 Scala 代码编译成 class 文件 -->
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution>
<!-- 声明绑定到 maven 的 compile 阶段 -->
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
3.添加scala框架和文件夹
标记为资源文件夹
4.批处理wordcount
package com.flink.study
import org.apache.flink.api.scala._
import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment}
/**
* @Author :star
* @Date :2020/7/3 12:12
* @Version :1.0
*/
object WordCount {
def main(args: Array[String]): Unit = {
//1.创建Flink运行环境
val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
//2.读取文件数据
val inputPath = "C:\\Users\\DELL\\Desktop\\1.txt"
val data: DataSet[String] = env.readTextFile(inputPath)
//3.对数据进行切分及其处理
val value: DataSet[(String, String)] = data.map(
d => {
val strings: Array[String] = d.split(",")
(strings(0), strings(3))
}
)
value.print()
value.writeAsCsv("C:\\Users\\DELL\\Desktop\\1.csv")
}
}
5.流处理wordcount
在这里我使用是网络测试工具 netcat 进数据的生成
package com.flink.study
import org.apache.flink.streaming.api.scala._
/**
* @Author :star
* @Date :2020/7/3 13:37
* @Version :1.0
*/
object StreamWordCount {
def main(args: Array[String]): Unit = {
//1.创建Flink基本环境
val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
//2.获取实时数据
val data: DataStream[String] = streamEnv.socketTextStream("Localhost",8888)
//3.对数据进行处理
val dataCount: DataStream[(String, Int)] = data.flatMap(_.split(" "))
.map((_, 1))
.keyBy(0)
.sum(1)
dataCount.print()
//4.启动任务
streamEnv.execute()
}
}