SparkSql连接Hive的各种操作

1、数据kv1.txt

238val_238
86val_86
311val_311
27val_27
165val_165
409val_409
255val_255
278val_278
...

2、代码测试

package com.cn.sparkSql

import java.io.File

import org.apache.spark.sql.{Row, SparkSession}


object SparkSql_Hive {
  case class Record(key: Int, value: String)
  def main(args: Array[String]): Unit = {
    val warehouseLocation = "/spark-warehouse01"
    //val warehouseLocation = new File("spark-warehouse").getAbsolutePath
    val spark = SparkSession
      .builder()
      .master("local[*]")
      .config("hive.metastore.uris","thrift://master:9083")
      .config("fs.defaultFS","hdfs://master:9000")
      .appName("Spark Hive Example")
      .config("spark.sql.warehouse.dir", warehouseLocation)
      .enableHiveSupport()
      .getOrCreate()
    spark.sparkContext.setLogLevel("WARN")

    //在db_hive_test库中创建一个新表src
    //spark.sql("CREATE TABLE IF NOT EXISTS db_hive_test.src (key INT, value STRING)")
    //为src导入数据
    //spark.sql("LOAD DATA LOCAL INPATH 'kv1.txt' INTO TABLE db_hive_test.src")
    //查询数据
    //spark.sql("select * from db_hive_test.src").show(100)
    //统计总条数
    //spark.sql("select count(*) from db_hive_test.src").show()

    //复杂操作
//    import spark.implicits._
//    val df = spark.sql("select key,value from db_hive_test.src where key<10 order by key")
//    val sdf = df.map {
//      case Row(key, value) => s"key:$key,value:$value"
//    }
//    sdf.show()
//    df.show()

    //使用dataFram创建临时视图并进行关联查询
    val ndf = spark.createDataFrame((1 to 10).map(i=>Record(i,s"val_$i")))
    ndf.createTempView("record")
    spark.sql("select * from record r join db_hive_test.src s where r.key=s.key").show(100)

  }
}

注意:db_hive_test为新建的database;

发布了143 篇原创文章 · 获赞 12 · 访问量 8653

猜你喜欢

转载自blog.csdn.net/hyj_king/article/details/104946369