1、数据kv1.txt
238val_238
86val_86
311val_311
27val_27
165val_165
409val_409
255val_255
278val_278
...
2、代码测试
package com.cn.sparkSql
import java.io.File
import org.apache.spark.sql.{Row, SparkSession}
object SparkSql_Hive {
case class Record(key: Int, value: String)
def main(args: Array[String]): Unit = {
val warehouseLocation = "/spark-warehouse01"
//val warehouseLocation = new File("spark-warehouse").getAbsolutePath
val spark = SparkSession
.builder()
.master("local[*]")
.config("hive.metastore.uris","thrift://master:9083")
.config("fs.defaultFS","hdfs://master:9000")
.appName("Spark Hive Example")
.config("spark.sql.warehouse.dir", warehouseLocation)
.enableHiveSupport()
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
//在db_hive_test库中创建一个新表src
//spark.sql("CREATE TABLE IF NOT EXISTS db_hive_test.src (key INT, value STRING)")
//为src导入数据
//spark.sql("LOAD DATA LOCAL INPATH 'kv1.txt' INTO TABLE db_hive_test.src")
//查询数据
//spark.sql("select * from db_hive_test.src").show(100)
//统计总条数
//spark.sql("select count(*) from db_hive_test.src").show()
//复杂操作
// import spark.implicits._
// val df = spark.sql("select key,value from db_hive_test.src where key<10 order by key")
// val sdf = df.map {
// case Row(key, value) => s"key:$key,value:$value"
// }
// sdf.show()
// df.show()
//使用dataFram创建临时视图并进行关联查询
val ndf = spark.createDataFrame((1 to 10).map(i=>Record(i,s"val_$i")))
ndf.createTempView("record")
spark.sql("select * from record r join db_hive_test.src s where r.key=s.key").show(100)
}
}
注意:db_hive_test为新建的database;