sparksql读写hbase

  1        //写入hbase
  2        org.apache.hadoop.hbase.client.Connection conn = null;
  3         try {
  4              SparkLog.debug("开始读取hbase信息...");
  5             if (StringUtils.isNotBlank(type) && type.equalsIgnoreCase("hbase")) {
  6                 SparkLog.debug("==========================================");
  7                 String hbasetable = dict.getStringItem("table", "");
  8                 String hbase_site_path = dict.getStringItem("path_site", "");
  9                 String hfile_path = dict.getStringItem("hfile_path", "");
 10                 Configuration  conf = new Configuration();
 11                 
 12                 if (StringUtils.isBlank(hbase_site_path)) {
 13                     SparkLog.warn("参数配置错误，未配置hbase-site信息!");
 14                 }
 15                 if (StringUtils.isNotBlank(hbase_site_path)) {
 16                     hbase_site_path = hbase_site_path + (hbase_site_path.contains("hbase-site.xml") ? "" : "/hbase-site.xml");
 17                     conf.addResource(new Path(hbase_site_path));
 18                 }
 19                 
 20                 SparkLog.debug("读取hbase信息完成");
 21                 conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "60000");
 22                 if (!P_Spark.delHDFSDir(hfile_path)) {
 23                     return TCResult.newFailureResult("SPARK_ERROR", "删除旧文件失败");
 24                 }
 25                 SparkLog.debug(conf);
 26                 
 27                 SparkLog.debug("创建hbase的链接...");
 28                 // 创建hbase的链接,利用默认的配置文件,实际上读取的hbase的master地址
 29                 conn = ConnectionFactory.createConnection(conf);
 30                 
 31                 SparkLog.debug("开始生成hfile文件...");
 32                 data.flatMapToPair(new PairFlatMapFunction<Row, ImmutableBytesWritable, KeyValue>() {
 33 
 34                 private static final long serialVersionUID = -8033772725296906227L;
 35     
 36                 @Override
 37                 public Iterator<Tuple2<ImmutableBytesWritable, KeyValue>> call(Row s) throws Exception {
 38                         byte[] rowkey = Bytes.toBytes((new SimpleDateFormat("yyyyMMddHHmmss.sss")).format(System.currentTimeMillis())); 
 39                         List<Tuple2<ImmutableBytesWritable,KeyValue>> cols = new ArrayList<>();
 40                         byte[] family = Bytes.toBytes(fm);
 41                         
 42                         /*String hostname =s.getString(0);
 43                         String request_date =s.getString(1);
 44                         String post_id=s.get(2)==null ? "":Integer.toString(s.getInt(2));
 45                         String title=s.getString(3);
 46                         String author=s.getString(4);
 47                         String country=s.getString(5);
 48                         String category=s.getString(6);*/
 49                         
 50                         String hostname ="======";
 51                         String request_date ="======";
 52                         String post_id="======";
 53                         String title="======";
 54                         String author="======";
 55                         String country="======";
 56                         String category="======";
 57                         
 58 
 59                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "title".getBytes(),  Bytes.toBytes(title))));
 60                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "author".getBytes(),  Bytes.toBytes(author))));
 61                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "country".getBytes(),  Bytes.toBytes(country))));
 62                          cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "post_id".getBytes(),  Bytes.toBytes(post_id))));
 63                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "category".getBytes(),  Bytes.toBytes(category))));
 64                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "hostname".getBytes(),  Bytes.toBytes(hostname))));
 65                         cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "request_date".getBytes(),  Bytes.toBytes(request_date))));
 66 
 67                         return cols.iterator();
 68                 } 
 69             }).saveAsNewAPIHadoopFile(hfile_path, ImmutableBytesWritable.class, KeyValue.class, HFileOutputFormat2.class, conf);
 70                 SparkLog.debug("生成hfile文件成功");
 71                 LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf);
 72                 // 根据表名获取表
 73                 SparkLog.debug("根据表名获取表...");
 74                 Table table = conn.getTable(TableName.valueOf(hbasetable));
 75                 SparkLog.debug(table.toString());
 76 
 77                 // 获取hbase表的region分布
 78                 SparkLog.debug("获取hbase表的region分布...");
 79                 RegionLocator regionLocator = conn.getRegionLocator(TableName
 80                         .valueOf(hbasetable));
 81                 // 创建一个hadoop的mapreduce的job
 82                 Job job = Job.getInstance(conf);
 83                 // 设置job名称
 84                 job.setJobName("DumpFile");
 85                 // 此处最重要,需要设置文件输出的key,因为我们要生成HFil,所以outkey要用ImmutableBytesWritable
 86                 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
 87                 // 输出文件的内容KeyValue
 88                 job.setMapOutputValueClass(KeyValue.class);
 89                 // 配置HFileOutputFormat2的信息
 90                 HFileOutputFormat2.configureIncrementalLoad(job, table,
 91                         regionLocator);
 92 
 93                 // 开始导入
 94                 SparkLog.debug("开始导入...");
 95                 load.doBulkLoad(new Path(hfile_path), conn.getAdmin(), table,
 96                         regionLocator);
 97                 // load.doBulkLoad(new Path(path),new , table,regionLocator);
 98                 // load.doBulkLoad(new Path(path), (HTable)table);这个目前也可用
 99                 table.close();
100 
101             }
102 
103         } catch (Throwable e) {
104             return TCResult.newFailureResult("SPARK_ERROR", e);
105         } finally {
106             try {
107                 conn.close();
108             } catch (Throwable e) {
109                 return TCResult.newFailureResult("SPARK_ERROR", e);
110             }
111         }

 1 //读取hbase
 2         SparkLog.info("初始化hbase...");
 3         Configuration conf = new Configuration();
 4         if (null != hbase_site_path) {
 5             hbase_site_path = hbase_site_path.contains("hbase-site.xml") ? hbase_site_path : hbase_site_path
 6                     + "/hbase-site.xml";
 7             conf.addResource(new Path(hbase_site_path));
 8         } else {
 9             if(zn_parent == null || zn_parent.equals("")){
10                 zn_parent="/hbase";
11             }
12             conf.set("hbase.zookeeper.quorum", quorum);
13             conf.set("hbase.zookeeper.property.clientPort", zkport);
14             conf.set("zookeeper.znode.parent", zn_parent);
15 
16         }
17         conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "60000");
18         JavaRDD<String> javardd = null;
19         try {
20             conf.set(TableInputFormat.INPUT_TABLE, tablename);
21             JavaPairRDD<ImmutableBytesWritable, Result> hbRDD = sc
22                     .newAPIHadoopRDD(conf, TableInputFormat.class,
23                             ImmutableBytesWritable.class, Result.class);
24 
25             javardd = hbRDD.values().map(new Function<Result, String>(){
26 
27                 /**
28                  * 
29                  */
30                 private static final long serialVersionUID = 1L;
31 
32                 @Override
33                 public String call(Result r) throws Exception {
34                     // TODO 自动生成的方法存根
35                     String s = "";
36                     for (Cell cell : r.rawCells()) {
37                         s += "Rowkey:"
38                                 + Bytes.toString(CellUtil.cloneRow(cell))
39                                 + ",column="
40                                 + Bytes.toString(CellUtil.cloneFamily(cell))
41                                 + ":"
42                                 + Bytes.toString(
43                                         CellUtil.cloneQualifier(cell))
44                                         .replaceAll("Quilifier:", "")
45                                 + ",timestamp=" + cell.getTimestamp()
46                                 + ",value:"
47                                 + Bytes.toString(CellUtil.cloneValue(cell));
48                     }
49                     return s;
50                 }
51                  
52              });
53             SparkLog.debug("hbase table records num = " + javardd.count());
54             
55         } catch (Throwable e) {
56             return TCResult.newFailureResult("SPARK_ERROR",e);
57         }

猜你喜欢