spark sql 读取hbase中数据保存到hive

新建SaprkReadHbase类:

package sparksql
import java.util.Properties
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Result
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SQLContext, SparkSession}

import scala.collection.mutable.ArrayBuffer
/**
  *
  *  yang
  *  2019/3/27 10:37
  *  1.0
  */
object SaprkReadHbase {

  def main(args: Array[String]): Unit = {
    val properties = new Properties()
    properties.load(Thread.currentThread().getContextClassLoader.getResourceAsStream("hbase.properties"))
    val ta= properties.getProperty("t1")
    val TABLES: Array[String]= ta.split(",")
    TABLES.foreach(s => print(s + ' '))

    val spark = SparkSession
      .builder()
      .master("local[2]")
      .appName("Spark Read  Hbase ")
      .enableHiveSupport()    //如果要读取hive的表,就必须使用这个
      .getOrCreate()
    val confg=new SparkConf().setMaster("master").setAppName("appName")
    val sc= new SparkContext(confg)
    val spark=new SQLContext(sc)

    val conf = HBaseConfiguration.create()
    conf.set("hbase.zookeeper.quorum",properties.getProperty("zookeeper.quorum"))
    conf.set("hbase.zookeeper.property.clientPort", properties.getProperty("zookeeper.property.clientPort"))
    TABLES.foreach(s => {
      conf.set(TableInputFormat.INPUT_TABLE, s)
      val Column=properties.getProperty(s)
      val Columns:Array[String]=Column.split(",")
      Columns.foreach(c=>print(c + "hbase字段名"))

    import spark.implicits._

    val hbaseRDD = sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result])
    // 将数据映射为表  也就是将 RDD转化为 dataframe schema
    val shop = hbaseRDD.map(r=> {
      val arr = new ArrayBuffer[String]()
      Columns.foreach(c => {
        print(c + "hbase字段名")
        val fieldVal = Bytes.toString(r._2.getValue(Bytes.toBytes("f"), Bytes.toBytes(c)))
        arr.append(fieldVal)
      })
      (Bytes.toString(r._2.getRow),arr(0), arr(1))
    }).toDF()
    shop.createOrReplaceTempView("shop")
      val sql=s+"sql"
    spark.sql(properties.getProperty(sql)+"from shop")
        spark.sql("select * from shop").show()
    print(s + ' ')
    })
    sc.stop()
    spark.stop()
  }
}
hbase.properties文件:
zookeeper.quorum=cdh-node02,cdh-node03,cdh-node04
zookeeper.property.clientPort=2181

t1=CDR,CDR2
CDR=customer_id,create_id
CDR2=customer_id,create_id

CDRsql=insert overwrite table cdr select *
CDR2sql=insert overwrite table cdr2 select *

pom.xml文件:

<properties>
    <hbase.version>1.2.6</hbase.version>
    <spark.version>2.0.0</spark.version>
    <hadoop.version>2.7.7</hadoop.version>
    <hive.version>1.2.2</hive.version>
    <scala.version>2.10.6</scala.version>
    <scala.binary.version>2.10</scala.binary.version>
</properties>
<dependencies>
    <dependency>
         <groupId>org.springframework.boot</groupId>
         <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
    <dependency>
        <groupId>com.alibaba</groupId>
        <artifactId>fastjson</artifactId>
        <version>1.2.50</version>
    </dependency>

    <dependency>
        <groupId>org.testng</groupId>
        <artifactId>testng</artifactId>
        <version>6.9.9</version>
        <scope>test</scope>
    </dependency>

    
    <dependency>
        <groupId>org.scala-lang</groupId>
        <artifactId>scala-library</artifactId>
        <version>${scala.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.10</artifactId>
        <version>${spark.version}</version>
        <exclusions>
            <exclusion>
                <groupId>org.codehaus.janino</groupId>
                <artifactId>commons-compiler</artifactId>
            </exclusion>
        </exclusions>
    </dependency>
    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-core</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-annotations</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-databind</artifactId>
        <version>2.6.5</version>
    </dependency>
    <dependency>
        <groupId>org.codehaus.janino</groupId>
        <artifactId>commons-compiler</artifactId>
        <version>2.6.1</version>
    </dependency>
    <dependency>
        <groupId>org.codehaus.janino</groupId>
        <artifactId>janino</artifactId>
        <version>2.6.1</version>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive_2.10</artifactId>
        <version>${spark.version}</version>
    </dependency>
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>5.1.27</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>${hadoop.version}</version>
    </dependency>
    <!--hbase依赖-->
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-client</artifactId>
        <version>${hbase.version}</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>${hbase.version}</version>
    </dependency>
</dependencies>
发布了43 篇原创文章 · 获赞 34 · 访问量 19万+

猜你喜欢

转载自blog.csdn.net/lin443514407lin/article/details/89512433