spark操作hbase的两种方法

添加数据之前先 create table

create 'student','cmf1','cmf2','cmf3'

1、RDD[(String,String)]类型添加

package com.xtd.hbase

import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.{SparkConf, SparkContext}

object SparkHBase {

  def main(args: Array[String]): Unit = {

    val config = new SparkConf()
    config.setMaster("local[*]").setAppName("SparkHBase")
    // spark上下文对象
    val sc = new SparkContext(config)
    // hbase配置对象
    val conf = HBaseConfiguration.create()
//    conf.set("hbase.zookeeper.quorum","cdh01,cdh02,cdh03,cdh04,cdh05")
//    conf.set("hbase.zookeeper.property.clientPort","2181")
    conf.set(TableInputFormat.INPUT_TABLE,"student")

    // hbase读取数据形成 RDD, hbase查询 scan 'student'
    val hbaseRDD = sc.newAPIHadoopRDD(
      conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],
      classOf[Result]
    )

    // 对hbaseRDD进行处理,读取
    hbaseRDD.foreach{
      case(rowkey,result) =>{
        val cells = result.rawCells()
        cells.foreach({cell =>
          val str = Bytes.toString(CellUtil.cloneValue(cell))
          println("str:"+str)
        })
      }
    }

    /** hbase写入 */
    // put 'student','1001','info:name','科比'
    val dataRDD = sc.makeRDD(List(("1005","张三1"),("1006","李四1"),("1007","王五1")))

    val putRDD = dataRDD.map{
      case(rowkey,name) =>{
        val put = new Put(Bytes.toBytes(rowkey))
        put.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"),Bytes.toBytes(name))
        (new ImmutableBytesWritable(Bytes.toBytes(rowkey)),put)
      }
    }
    val jobConf = new JobConf(conf)
    jobConf.setOutputFormat(classOf[TableOutputFormat])
    jobConf.set(TableOutputFormat.OUTPUT_TABLE,"student")
    putRDD.saveAsHadoopDataset(jobConf)

    // 查询student表的数量
    val count = hbaseRDD.count()
    println(count)

    sc.stop()

  }
}

执行结果

2、hbase shell 添加

put 'student','201500208408','cf1:name','LJ'
put 'student','201500208408','cf1:age','20'
put 'student','201500208409','cf1:name','JR'
put 'student','201500208409','cf1:age','20'

 执行结果

3、RDD[Int]类型添加 

package com.xtd.hbase

import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, HTable, Put}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}

object HBaseTablePut {

  def main(args: Array[String]): Unit = {

    val conf = new SparkConf()
    conf.setAppName("HBaseTablePut").setMaster("local")
    val sc = new SparkContext(conf)
    val list = List(1,2,3,4,5,6,7,8,9)
    val rdd = sc.parallelize(list,1)
    rdd.foreach(x =>{
      println(x)
      val hbaseConf = HBaseConfiguration.create()
      val connection = ConnectionFactory.createConnection(hbaseConf)
      val table:HTable = connection.getTable(TableName.valueOf("student")).asInstanceOf[HTable]
      // rowkey
      val put = new Put(Bytes.toBytes("spark_" + x))
      // column
      put.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("count"),Bytes.toBytes(x))
      table.put(put)
      table.close()
      connection.close()
    })
  }
}

执行结果

注意事项

Maven项目的resource目录下需要拷贝集群的配置文件过来

原创文章 171 获赞 129 访问量 8万+

猜你喜欢

转载自blog.csdn.net/qq262593421/article/details/105969665