新建SaprkReadHbase类: package sparksql import java.util.Properties import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.hadoop.hbase.client.Result import org.apache.hadoop.hbase.io.ImmutableBytesWritable import org.apache.hadoop.hbase.mapreduce.TableInputFormat import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{SQLContext, SparkSession} import scala.collection.mutable.ArrayBuffer /** * * yang * 2019/3/27 10:37 * 1.0 */ object SaprkReadHbase { def main(args: Array[String]): Unit = { val properties = new Properties() properties.load(Thread.currentThread().getContextClassLoader.getResourceAsStream("hbase.properties")) val ta= properties.getProperty("t1") val TABLES: Array[String]= ta.split(",") TABLES.foreach(s => print(s + ' ')) val spark = SparkSession .builder() .master("local[2]") .appName("Spark Read Hbase ") .enableHiveSupport() //如果要读取hive的表,就必须使用这个 .getOrCreate() val confg=new SparkConf().setMaster("master").setAppName("appName") val sc= new SparkContext(confg) val spark=new SQLContext(sc) val conf = HBaseConfiguration.create() conf.set("hbase.zookeeper.quorum",properties.getProperty("zookeeper.quorum")) conf.set("hbase.zookeeper.property.clientPort", properties.getProperty("zookeeper.property.clientPort")) TABLES.foreach(s => { conf.set(TableInputFormat.INPUT_TABLE, s) val Column=properties.getProperty(s) val Columns:Array[String]=Column.split(",") Columns.foreach(c=>print(c + "hbase字段名")) import spark.implicits._ val hbaseRDD = sc.newAPIHadoopRDD(conf,classOf[TableInputFormat],classOf[ImmutableBytesWritable],classOf[Result]) // 将数据映射为表 也就是将 RDD转化为 dataframe schema val shop = hbaseRDD.map(r=> { val arr = new ArrayBuffer[String]() Columns.foreach(c => { print(c + "hbase字段名") val fieldVal = Bytes.toString(r._2.getValue(Bytes.toBytes("f"), Bytes.toBytes(c))) arr.append(fieldVal) }) (Bytes.toString(r._2.getRow),arr(0), arr(1)) }).toDF() shop.createOrReplaceTempView("shop") val sql=s+"sql" spark.sql(properties.getProperty(sql)+"from shop") spark.sql("select * from shop").show() print(s + ' ') }) sc.stop() spark.stop() } }
hbase.properties文件:
zookeeper.quorum=cdh-node02,cdh-node03,cdh-node04 zookeeper.property.clientPort=2181 t1=CDR,CDR2 CDR=customer_id,create_id CDR2=customer_id,create_id CDRsql=insert overwrite table cdr select * CDR2sql=insert overwrite table cdr2 select *
pom.xml文件:
<properties> <hbase.version>1.2.6</hbase.version> <spark.version>2.0.0</spark.version> <hadoop.version>2.7.7</hadoop.version> <hive.version>1.2.2</hive.version> <scala.version>2.10.6</scala.version> <scala.binary.version>2.10</scala.binary.version> </properties>
<dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.50</version> </dependency> <dependency> <groupId>org.testng</groupId> <artifactId>testng</artifactId> <version>6.9.9</version> <scope>test</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> <version>${spark.version}</version> <exclusions> <exclusion> <groupId>org.codehaus.janino</groupId> <artifactId>commons-compiler</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-core</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-annotations</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> <version>2.6.5</version> </dependency> <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>commons-compiler</artifactId> <version>2.6.1</version> </dependency> <dependency> <groupId>org.codehaus.janino</groupId> <artifactId>janino</artifactId> <version>2.6.1</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive_2.10</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.27</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <!--hbase依赖--> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> </dependencies>