Hbase实践

取Hive中的数据写入Hbase中

package hbase

import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{HTable, Put}
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.sql.SparkSession

object SparkHbase {
  def main(args: Array[String]): Unit = {
//    client 请求hbase,写数据 zookeeper
    val ZOOKEEPER_QUORUM = "192.168.174.134,192.168.174.135,192.168.174.129"
//    读取hive中的数据写入hbase,创建sparksession
    val spark = SparkSession.builder()
      .appName("spark to hbase")
      .enableHiveSupport()
      .getOrCreate()

    val rdd = spark.sql("select order_id,user_id,order_dow from badou.orders limit 300").rdd

    /**
      * 一个put对象就是一行记录,在构造方法中主键rowkey(user_id作为rowkey)
      * 所有插入的数据必须用org.apache.hadoop.hbase.util.Bytes
      * */
    rdd.map{row=>
      val order_id = row(0).asInstanceOf[String]
      val user_id = row(1).asInstanceOf[Long]
      val order_dow = row(2).asInstanceOf[Int]

//      加处理逻辑user_id为主key
      var p = new Put(Bytes.toBytes(user_id))
//      id 列族存放所有id类型列,order为列,value对应的order_id
      p.addColumn(Bytes.toBytes("id"),Bytes.toBytes("order"),Bytes.toBytes(order_id))
//      num为列族存放所有num数值型列,dow为列,order_dow为具体值
      p.addColumn(Bytes.toBytes("num"),Bytes.toBytes("dow"),Bytes.toBytes(order_dow))
      p    //返回值
    }.foreachPartition{partiton=>
      val jobconf = new JobConf(HBaseConfiguration.create())
      jobconf.set("hbase.zookeeper.quorum",ZOOKEEPER_QUORUM)
      jobconf.set("hbase.zookeeper.property.clientPort","2181")
      jobconf.set("zookeeper.znode.parent","/hbase")
      jobconf.setOutputFormat(classOf[TableOutputFormat])
//      写入表名
      val table = new HTable(jobconf,TableName.valueOf("orders"))
      import scala.collection.JavaConversions._
      table.put(seqAsJavaList(partiton.toSeq))
    }
  }

}
View Code

创建Hive表去分析Hbase表中的数据

create external table h_table(rowkey string,col1 string, col2 string, age int) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,f1:col1,f1:col2,f1:age") 
TBLPROPERTIES("hbase.table.name" = "table1");

猜你喜欢

转载自www.cnblogs.com/xumaomao/p/12743404.html