flink1.13 table api 查询hive数据,及腾讯云cos存储

前言

在调试flink table api 查询 hive数据的时候,真的是遇到很多坑,特别是要hive存储的数据是在腾讯云的cos上,而且我是跨集群查询数据,要解决各种依赖和环境问题,下面的代码和pom.xml 已经调试成功,在本地和集群 on yarn都可以运行,本地的时候需要在idea里面加args为dev,集群 on yarn不用加。

代码

package com.bigdata.etl

import org.apache.flink.table.api.{EnvironmentSettings, TableEnvironment}
import org.apache.flink.table.catalog.hive.HiveCatalog
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

object FlinkTableTest extends App {

//本地idea环境需要设置用户为hadoop
  System.setProperty("HADOOP_USER_NAME", "hadoop")
  val settings = EnvironmentSettings.newInstance().useBlinkPlanner().build()
  println(settings.isStreamingMode)
  val stenv = TableEnvironment.create(settings)
  //查询默认的元数据
  stenv.executeSql("show catalogs").print()
  stenv.useCatalog("default_catalog")
  stenv.executeSql("show databases").print()
  stenv.executeSql("select 1").print()
  println("-----fengexian--------------")

  val name = "hive"
  val defaultDatabase = "odl"
  var hiveConfDir = ""
  var hadoopConf=""
  val hiveVision = "2.3.6"
  if (args.size > 0) {
    hiveConfDir = "/Users/duzhixin/Documents/flink-hive-conf"
    hadoopConf="/Users/duzhixin/Documents/flink-hive-conf"
  } else {
    hiveConfDir = "/usr/local/service/hive/conf"
  }

 //在这里把hive,hadoop的配置文件加进去,本地idea环境需要制定hive和hadoop的配置文件,集群上不用制定hadoop的配置
  val hive = new HiveCatalog(name, defaultDatabase, hiveConfDir,hadoopConf,hiveVision)
  
  hive.getHiveConf.set("streaming-source.enable ", "true")
  stenv.getConfig.getConfiguration.setString("streaming-source.enable ", "true")
  stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism.max ", "10000")
  stenv.getConfig.getConfiguration.setString("table.exec.hive.infer-source-parallelism ", "true")

  stenv.registerCatalog("hive", hive)

  // set the HiveCatalog as the current catalog of the session
  stenv.useCatalog("hive")

 //执行sql,查询hive的元数据
  stenv.executeSql("show databases").print()

  stenv.executeSql("show tables").print()

  stenv.executeSql("select 1 from test.app limit 1").print()
  
  stenv.executeSql("select * from odl.tb_book where dt='2021-06-05' limit 10").print()
  
 stenv.executeSql("select *   from odl.dadian where dt='2021-06-05' limit 10").print()

}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <repositories>

        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>

        <repository>
            <id>spring-plugin</id>
            <url>https://repo.spring.io/plugins-release/</url>
        </repository>


    </repositories>
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.jiashu</groupId>
    <artifactId>flink-dw</artifactId>
    <version>1.0-SNAPSHOT</version>
    <packaging>jar</packaging>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <spark.version>2.4.3</spark.version>
        <scala.version>2.12</scala.version>

        <jedis.version>2.8.2</jedis.version>
        <fastjson.version>1.2.14</fastjson.version>
        <jetty.version>9.2.5.v20141112</jetty.version>
        <container.version>2.17</container.version>
        <java.version>1.8</java.version>
        <hbase.version>1.2.0</hbase.version>
        <hive.version>2.3.6</hive.version>
        <flink.version>1.13.0</flink.version>
    </properties>
    <build>

        <sourceDirectory>src/main/scala</sourceDirectory>

        <finalName>flink-dw</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.5.1</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.scala-tools</groupId>
                <artifactId>maven-scala-plugin</artifactId>
                <version>2.12</version>
                <executions>
                    <execution>
                        <goals>
                            <goal>compile</goal>
                            <goal>testCompile</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>


            <!--manven打包插件-->
            <!-- 用于打可执行jar包 -->
            <!-- 打包jar文件时,配置manifest文件,加入lib包的jar依赖 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <configuration>
                    <classesDirectory>target/classes/</classesDirectory>
                    <archive>

                        <manifestEntries>
                            <Class-Path>.</Class-Path>
                        </manifestEntries>
                    </archive>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <type>jar</type>
                            <includeTypes>jar</includeTypes>
                            <!--<useUniqueVersions>false</useUniqueVersions> -->
                            <outputDirectory>
                                ${project.build.directory}/lib
                            </outputDirectory>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

    <dependencies>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_${scala.version}</artifactId>
            <version>${spark.version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>



        <!-- https://mvnrepository.com/artifact/redis.clients/jedis -->
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>2.9.0</version>
        </dependency>




        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>


        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>2.6.0</version>
        </dependency>



        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java_2.12 -->


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_${scala.version}</artifactId>
            <version>${flink.version}</version>

        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_${scala.version}</artifactId>
            <version>${flink.version}</version>

        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_${scala.version}</artifactId>
            <version>${flink.version}</version>

        </dependency>


        <dependency>
            <groupId>org.codehaus.jackson</groupId>
            <artifactId>jackson-core-asl</artifactId>
            <version>1.9.13</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/com.alibaba/fastjson -->
        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.47</version>
        </dependency>
        <!-- Flink Dependency -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-hive_2.12</artifactId>
            <version>1.13.1</version>
        </dependency>



        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.12</artifactId>
            <version>1.13.1</version>
        </dependency>

        <!-- Hive Dependency -->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>


        <!-- https://mvnrepository.com/artifact/org.mongodb.spark/mongo-spark-connector -->



        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-auth</artifactId>
            <version>2.8.5</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.8.5</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-auth -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.8.5</version>
        </dependency>




        <!-- https://mvnrepository.com/artifact/hadoop-util/hadoop-util -->
        <dependency>
            <groupId>hadoop-util</groupId>
            <artifactId>hadoop-util</artifactId>
            <version>0.3.0</version>
        </dependency>


        <dependency>
            <groupId>com.qcloud</groupId>
            <artifactId>cos_api</artifactId>
            <version>5.6.42</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.commons/org.apache.commons.codec -->
        <!-- https://mvnrepository.com/artifact/org.apache.commons/org.apache.commons.codec -->
        <dependency>
            <groupId>com.qcloud</groupId>
            <artifactId>qcloud-java-sdk</artifactId>
            <version>2.0.1</version>
        </dependency>

        <dependency>
            <groupId>commons-codec</groupId>
            <artifactId>commons-codec</artifactId>
            <version>1.11</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-compress -->
        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-compress</artifactId>
            <version>1.19</version>
        </dependency>



        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.11</version>
        </dependency>
        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-resources-plugin</artifactId>
            <version>2.4</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.maven.plugins/maven-compiler-plugin -->
        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.8.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-dependency-plugin</artifactId>
            <version>2.8</version>
        </dependency>

        <dependency>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-jar-plugin</artifactId>
            <version>2.4</version>
        </dependency>
    </dependencies>

</project>

如果还不行就试试加入hadoop-cos-2.8.5-5.9.22.jar 这个依赖
hadoop-cos-2.8.5-5.9.22.jar

Guess you like

Origin blog.csdn.net/qq_27474277/article/details/118086670