添加数据之前先 create table
create 'student','cmf1','cmf2','cmf3'
1、RDD[(String,String)]类型添加
package com.xtd.hbase
import org.apache.hadoop.hbase.client.{Put, Result}
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration}
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.{SparkConf, SparkContext}
object SparkHBase {
def main(args: Array[String]): Unit = {
val config = new SparkConf()
config.setMaster("local[*]").setAppName("SparkHBase")
// spark上下文对象
val sc = new SparkContext(config)
// hbase配置对象
val conf = HBaseConfiguration.create()
// conf.set("hbase.zookeeper.quorum","cdh01,cdh02,cdh03,cdh04,cdh05")
// conf.set("hbase.zookeeper.property.clientPort","2181")
conf.set(TableInputFormat.INPUT_TABLE,"student")
// hbase读取数据形成 RDD, hbase查询 scan 'student'
val hbaseRDD = sc.newAPIHadoopRDD(
conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],
classOf[Result]
)
// 对hbaseRDD进行处理,读取
hbaseRDD.foreach{
case(rowkey,result) =>{
val cells = result.rawCells()
cells.foreach({cell =>
val str = Bytes.toString(CellUtil.cloneValue(cell))
println("str:"+str)
})
}
}
/** hbase写入 */
// put 'student','1001','info:name','科比'
val dataRDD = sc.makeRDD(List(("1005","张三1"),("1006","李四1"),("1007","王五1")))
val putRDD = dataRDD.map{
case(rowkey,name) =>{
val put = new Put(Bytes.toBytes(rowkey))
put.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"),Bytes.toBytes(name))
(new ImmutableBytesWritable(Bytes.toBytes(rowkey)),put)
}
}
val jobConf = new JobConf(conf)
jobConf.setOutputFormat(classOf[TableOutputFormat])
jobConf.set(TableOutputFormat.OUTPUT_TABLE,"student")
putRDD.saveAsHadoopDataset(jobConf)
// 查询student表的数量
val count = hbaseRDD.count()
println(count)
sc.stop()
}
}
执行结果
2、hbase shell 添加
put 'student','201500208408','cf1:name','LJ'
put 'student','201500208408','cf1:age','20'
put 'student','201500208409','cf1:name','JR'
put 'student','201500208409','cf1:age','20'
执行结果
3、RDD[Int]类型添加
package com.xtd.hbase
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.hadoop.hbase.client.{ConnectionFactory, HTable, Put}
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.{SparkConf, SparkContext}
object HBaseTablePut {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setAppName("HBaseTablePut").setMaster("local")
val sc = new SparkContext(conf)
val list = List(1,2,3,4,5,6,7,8,9)
val rdd = sc.parallelize(list,1)
rdd.foreach(x =>{
println(x)
val hbaseConf = HBaseConfiguration.create()
val connection = ConnectionFactory.createConnection(hbaseConf)
val table:HTable = connection.getTable(TableName.valueOf("student")).asInstanceOf[HTable]
// rowkey
val put = new Put(Bytes.toBytes("spark_" + x))
// column
put.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("count"),Bytes.toBytes(x))
table.put(put)
table.close()
connection.close()
})
}
}
执行结果
注意事项
Maven项目的resource目录下需要拷贝集群的配置文件过来