Still the same, we must first introduce dependencies, in pom.xml
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>1.10.1</version>
</dependency>
Read data from file
Create a new TableApiFromFile object:
/**
*
* @author mafei
* @date 2020/11/22
*
* 把txt内容注册成为表,按照预期的格式输出出来
*/
package com.mafei.apitest.tabletest
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.{DataTypes, Table}
import org.apache.flink.table.api.scala._
import org.apache.flink.table.descriptors.{Csv, FileSystem, OldCsv, Schema}
object TableApiFromFile {
def main(args: Array[String]): Unit = {
//1 、创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val tableEnv = StreamTableEnvironment.create(env)
//2、读取文件
val filePath = "/opt/java2020_study/maven/flink1/src/main/resources/sensor.txt"
tableEnv.connect(new FileSystem().path(filePath))
.withFormat(new Csv()) //因为txt里头是以,分割的跟csv一样,所以可以用oldCsv
.withSchema(new Schema() //这个表结构要跟你txt中的内容对的上
.field("id", DataTypes.STRING())
.field("timestamp", DataTypes.BIGINT())
.field("temperature", DataTypes.DOUBLE())
).createTemporaryTable("inputTable")
val inputTable: Table = tableEnv.from("inputTable") //就是上面创建的表名
inputTable.toAppendStream[(String, Long, Double)].print()
tableEnv.execute("table api test from file")
}
}
############ The OldCsv above has been abandoned. If you want to use a new method, you need to introduce a dependency separately. Add ############## #########
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>1.10.1</version>
</dependency>
The corresponding code only needs to change the OldCsv method to Csv.
.withFormat(new Csv())
Code structure and operation effect:
Read data from Kafka
For basic knowledge and installation of kafka, please refer to: https://blog.51cto.com/mapengfei/1926065
/**
*
* @author mafei
* @date 2020/11/22
*/
package com.mafei.apitest.tabletest
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.{DataTypes, Table}
import org.apache.flink.table.api.scala._
import org.apache.flink.table.descriptors.{Csv, Kafka, Schema}
object TableApiFromKafka {
def main(args: Array[String]): Unit = {
//1 、创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val tableEnv = StreamTableEnvironment.create(env)
//2、从kafka中读取数据
tableEnv.connect(
new Kafka()
.version("0.11")
.topic("sensor1")
.startFromLatest()
.property("zookeeper.connect", "localhost:2181")
.property("bootstrap.servers", "localhost:9092")
).withFormat(new Csv())
.withSchema(new Schema() //这个表结构要跟你kafka中的内容对的上
.field("id", DataTypes.STRING())
.field("timestamp", DataTypes.BIGINT())
.field("temperature", DataTypes.DOUBLE())
)
/**
* 如果是json也是一样,需要引入单独的flink-json
* <dependency>
* <groupId>org.apache.flink</groupId>
* <artifactId>flink-json</artifactId>
* <version>1.10.1</version>
* </dependency>
*/
.createTemporaryTable("kafkaInputTable")
val inputTable: Table = tableEnv.from("kafkaInputTable")
inputTable.toAppendStream[(String,Long,Double)].print()
env.execute("table api test from kafka! ")
}
}
Start the kafka service, and open a producer command line, write a few pieces of data into it (note that it must conform to the expected format)
/opt/kafka_2.11-0.10.2.0/bin/kafka-console-producer.sh --broker- list localhost:9092 --topic sensor1
Various conversion operations
Goal: Read data from the file, and use table api and SQL respectively to achieve filtering and conversion formats and other operations
/**
*
* @author mafei
* @date 2020/11/22
*/
package com.mafei.apitest.tabletest
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.{DataTypes, Table}
import org.apache.flink.table.api.scala._
import org.apache.flink.table.descriptors.{Csv, FileSystem, Schema}
object TableApiTransform {
def main(args: Array[String]): Unit = {
//1 、创建环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val tableEnv = StreamTableEnvironment.create(env)
//2、读取文件
val filePath = "/opt/java2020_study/maven/flink1/src/main/resources/sensor.txt"
tableEnv.connect(new FileSystem().path(filePath))
.withFormat(new Csv()) //因为txt里头是以,分割的跟csv一样,所以可以用oldCsv
.withSchema(new Schema() //这个表结构要跟你txt中的内容对的上
.field("id", DataTypes.STRING())
.field("timestamp", DataTypes.BIGINT())
.field("temperature", DataTypes.DOUBLE())
).createTemporaryTable("inputTable")
val inputTable: Table = tableEnv.from("inputTable") //就是上面创建的表名
//3、查询转换
//第一种,使用table api
val sensorTable = tableEnv.from("inputTable")
val resultTable = sensorTable
//可以用scala里面的表达式来写
.select('id,'temperature)
.filter('id === "sensor1")
//也可以这样子常规的写法
// .select("id,temperature")
// .filter("`id`='sensor1'")
//第二种,使用SQL来实现
val resultSqlTable = tableEnv.sqlQuery(
"""
|select id, temperature
|from inputTable
|where id='sensor1'
|""".stripMargin
)
resultTable.toAppendStream[(String, Double)].print("table result:")
resultSqlTable.toAppendStream[(String,Double)].print("sql result: ")
inputTable.toAppendStream[(String, Long, Double)].print("原始的,没做任何处理:")
tableEnv.execute("table api test from file")
}
}
Code structure and operation effect
Convert DataStream to table
对于一个DataStream,可以直接转换成Table,进而方便的调用Table API做转换操作
val dataStream: DataStream[SensorReadingTest5] = ....
val sensorTable: Table = tableEnv.fromDataStream(dataStream)
默认转换后的Table scheam和DataStream中的字段定义一一对应,也可以单独指定出来
val dataStream: DataStream[SensorReadingTest5] = ...
val sensorTable = tableEnv.fromDataStream(dataStream,'id,'timestamp,'temperature)
Correspondence between data type and Schema
DataStream中的数据类型,与表的Scheam之间的对应关系,可以有两种: 基于字段名称,或者字段的位置
基于名称(name-based)
val sensorTable = tableEnv.fromDataStream(dataStream,'timestamp as 'ts, 'id as 'myId, 'temperature)
基于位置(position-based)
val sensorTable = tableEnv.fromDataStream(dataStream,'myId,'ts)
Create Temporary View (Temporary View)
tableEnv.createTemporaryView("sensorView", dataStream)
tableEnv.createTemporaryView("sensorView",dataStream,'id, 'temperature, 'timestamp as 'ts)
基于Table创建临时视图
tableEnv.createTemporaryView("sensorView", sensorTable)
Output table
表的输出是通过将数据写入TableSink来实现的
TableSink是一个通用接口,可以支持不同的文件格式、存储数据库和消息队列
输出表最直接的方法就是通过Table.insertinto()方法将一个Table写入注册过的TableSink中
tableEnv.connect(....).createTemporaryTable("outputTable")
val resultSqlTable: Table = .... resultTable.insertInto("outputTable")