Spark向Hbase写数据
package spark.hbase
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD
import org.apache.spark.{
SparkConf, SparkContext}
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat
object _02SparkWriteHBase {
def main(args: Array[String]): Unit = {
val sc: SparkContext = new SparkContext(new SparkConf().setAppName("SougoRecord").setMaster("local[2]"))
val InputRDD = sc.textFile("", 2)
val resultRDD: RDD[(String, Int)] = InputRDD
.filter(line => line != null && line.trim.length > 0)
.flatMap(line => line.trim.split("\\s+"))
.map(word => (word, 1))
.reduceByKey((tmp, item) => tmp + item)
resultRDD.foreach(println)
val putsRDD = resultRDD
.map {
case (word, count) =>
val rowkey = new ImmutableBytesWritable(Bytes.toBytes(word))
val put = new Put(rowkey.get())
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("count"), Bytes.toBytes(count + " "))
rowkey -> put
}
val conf = HBaseConfiguration.create()
conf.set("hbase.zookeeper.quorum", "node1.itcast.cn")
conf.set("hbase.zookeeper.property.clientPort", "2181")
conf.set("zookeeper.znode.parent", "/hbase")
conf.set(TableOutputFormat.OUTPUT_TABLE, "htb_wordcount")
putsRDD saveAsNewAPIHadoopFile(
"datas/hbase/htb_wordcount/",
classOf[ImmutableBytesWritable],
classOf[Put],
classOf[TableOutputFormat[ImmutableBytesWritable]],
conf
)
sc.stop()
}
}
Spark从Hbase中读数据
package cn.itcast.spark.hbase
import org.apache.spark.sql.{
DataFrame, SparkSession}
object _05SparkHBaseTest {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.appName(this.getClass.getSimpleName.stripSuffix("$"))
.master("local[2]")
.config("spark.sql.shuffle.partitions", "2")
.getOrCreate()
import spark.implicits._
val hbaseDF: DataFrame = spark.read
.format("hbase")
.option("zkHosts", "node1.itcast.cn")
.option("zkPort", "2181")
.option("hbaseTable", "stus")
.option("family", "info")
.option("selectFields", "name,age")
.load()
hbaseDF.printSchema()
hbaseDF.show(10, truncate = false)
spark.stop()
}
}
Spark向hive读数据
package cn.itcast.spark.hive
import org.apache.spark.sql.SparkSession
object _04SparkSQLHiveTest {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession.builder()
.appName(this.getClass.getSimpleName.stripSuffix("$"))
.master("local[2]")
.enableHiveSupport()
.config("hive.metastore.uris", "thrift://node1.itcast.cn:9083")
.getOrCreate()
import spark.implicits._
val empDF = spark.read
.table("db_hive.emp")
empDF.printSchema()
empDF.show(10, truncate = false)
println("==================================================")
spark.sql("select * from db_hive.emp").show()
spark.stop()
}
}
Spark向Hive写入数据
val spark: SparkSession = SparkSession
.builder()
.appName(this.getClass.getSimpleName.stripSuffix("$"))
.master("local[2]")
.enableHiveSupport()
.config("hive.metastore.uris", "thrift://node1.itcast.cn:9083")
.config("hive.exec.dynamic.partition.mode","nonstrict")
.config("spark.sql.shuffle.partitions", "4")
.getOrCreate()
val InputFrame: DataFrame = spark.read.option("inferSchema",
"true").json("dataset/pmt.json")
resultDF
.coalesce(1)
.write
.format("hive")
.mode(SaveMode.Append)
.partitionBy("data_str")
.saveAsTable("itcast_ads.pmt_ads_info")