window上运行Flink 读取hive catalog 代码实操

一，导入依赖，主要是hive 跟 hadoop

<!--hive依赖-->
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-hive_2.11</artifactId>
    <version>1.10.0</version>
    <!--<scope>provided</scope>-->
</dependency>

<dependency>
    <groupId>org.apache.hive</groupId>
    <artifactId>hive-exec</artifactId>
    <version>1.1.0</version>
    <!--<scope>provided</scope>-->
</dependency>

<!-- hadoop依赖-->
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>2.6.0-cdh5.16.1</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>2.6.0-cdh5.16.1</version>
</dependency>

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>2.6.0-cdh5.16.1</version>
</dependency>

二，在window环境下最好有hadoop的环境变量，这个请自己百度。

三，就是搞一个hive-site.xml配置文件了。参考官网主要是几个指标

1）一定要看自己的hive版本

https://ci.apache.org/projects/flink/flink-docs-release-1.10/dev/table/hive/

2）配置文件模板

扫描二维码关注公众号，回复： 11477372 查看本文章

<configuration>
   <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
      <description>metadata is stored in a MySQL server</description>
   </property>

   <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
      <description>MySQL JDBC driver class</description>
   </property>

   <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>...</value>
      <description>user name for connecting to mysql server</description>
   </property>

   <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>...</value>
      <description>password for connecting to mysql server</description>
   </property>

   <property>
       <name>hive.metastore.uris</name>
       <value>thrift://localhost:9083</value>
       <description>IP address (or fully-qualified domain name) and port of the metastore host</description>
   </property>

   <property>
       <name>hive.metastore.schema.verification</name>
       <value>true</value>
   </property>

</configuration>

三，最后就是代码了很简单的读取代码：

package flink_sql


import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.table.api.EnvironmentSettings
import org.apache.flink.table.api.scala.StreamTableEnvironment
import org.apache.flink.table.catalog.hive.HiveCatalog
import org.apache.flink.table.api.Table

object Sql_client_scala {
  def main(args: Array[String]): Unit = {
    val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setParallelism(5)
    streamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val tableEnvSettings = EnvironmentSettings.newInstance()
      .useBlinkPlanner()
      .inStreamingMode()
      .build()
    val tableEnv = StreamTableEnvironment.create(streamEnv, tableEnvSettings)

    val catalog = new HiveCatalog(
      "flink", // catalog name
      "default", // default database
      "G:\\Flink SQL开发文件", // Hive config (hive-site.xml) directory
      "1.1.0" // Hive version
    )

    //todo 注册这个catalog
    tableEnv.registerCatalog("flink", catalog)

    //todo 使用这个catalog，这个表在内存
    tableEnv.useCatalog("flink")

//    val createDbSql1 = "CREATE DATABASE IF NOT EXISTS flink"
//    tableEnv.sqlUpdate(createDbSql1)

    //todo 测试创建表
//    val createtable = "CREATE TABLE flink_test (id STRING, product STRING, amount INT)"
//    tableEnv.sqlUpdate(createtable)

    //todo 存在哪些库
    val aa: Array[String] = tableEnv.listCatalogs()
    print(aa.toList)

    //todo 存在的表
    val tables = tableEnv.listTables()
    println(tables.toList)

    /**
      * todo 从kafka读取数据构建表
      */

    //todo 查询sql
    val createDbSql = "SELECT code ,total_emp FROM  sample_07"
    val rTable = tableEnv.sqlQuery(createDbSql)
     println(rTable.printSchema())

    import org.apache.flink.api.scala._

    //todo 查询数据
    val rsss: DataStream[(String, Int)] =   tableEnv.toAppendStream[(String, Int)](rTable)
    rsss.print()
    streamEnv.execute()
     //    val  sql ="CREATE TABLE rtdw.ods. (name String, age Int) WITH (\n" +
    //      "   'connector.type' = 'kafka',\n" +
    //      "   'connector.version' = 'universal',\n" +
    //      "   'connector.topic' = 'test1',\n" +
    //      "   'connector.properties.zookeeper.connect' = 'dev-ct6-dc-worker01:2181,dev-ct6-dc-worker02:2181,dev-ct6-dc-worker03:2181',\n" +
    //      "   'connector.properties.bootstrap.servers' = 'dev-ct6-dc-worker01:9092,dev-ct6-dc-worker02:9092,dev-ct6-dc-worker03:9092',\n" +
    //      "   'format.type' = 'json',\n" +
    //      "   'update-mode' = 'append'\n" +
    //      ");"
    //    tableEnv.sqlUpdate(createDbSql)
  }

}

window上运行Flink 读取hive catalog 代码实操

猜你喜欢