1 //写入hbase 2 org.apache.hadoop.hbase.client.Connection conn = null; 3 try { 4 SparkLog.debug("开始读取hbase信息..."); 5 if (StringUtils.isNotBlank(type) && type.equalsIgnoreCase("hbase")) { 6 SparkLog.debug("=========================================="); 7 String hbasetable = dict.getStringItem("table", ""); 8 String hbase_site_path = dict.getStringItem("path_site", ""); 9 String hfile_path = dict.getStringItem("hfile_path", ""); 10 Configuration conf = new Configuration(); 11 12 if (StringUtils.isBlank(hbase_site_path)) { 13 SparkLog.warn("参数配置错误,未配置hbase-site信息!"); 14 } 15 if (StringUtils.isNotBlank(hbase_site_path)) { 16 hbase_site_path = hbase_site_path + (hbase_site_path.contains("hbase-site.xml") ? "" : "/hbase-site.xml"); 17 conf.addResource(new Path(hbase_site_path)); 18 } 19 20 SparkLog.debug("读取hbase信息完成"); 21 conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "60000"); 22 if (!P_Spark.delHDFSDir(hfile_path)) { 23 return TCResult.newFailureResult("SPARK_ERROR", "删除旧文件失败"); 24 } 25 SparkLog.debug(conf); 26 27 SparkLog.debug("创建hbase的链接..."); 28 // 创建hbase的链接,利用默认的配置文件,实际上读取的hbase的master地址 29 conn = ConnectionFactory.createConnection(conf); 30 31 SparkLog.debug("开始生成hfile文件..."); 32 data.flatMapToPair(new PairFlatMapFunction<Row, ImmutableBytesWritable, KeyValue>() { 33 34 private static final long serialVersionUID = -8033772725296906227L; 35 36 @Override 37 public Iterator<Tuple2<ImmutableBytesWritable, KeyValue>> call(Row s) throws Exception { 38 byte[] rowkey = Bytes.toBytes((new SimpleDateFormat("yyyyMMddHHmmss.sss")).format(System.currentTimeMillis())); 39 List<Tuple2<ImmutableBytesWritable,KeyValue>> cols = new ArrayList<>(); 40 byte[] family = Bytes.toBytes(fm); 41 42 /*String hostname =s.getString(0); 43 String request_date =s.getString(1); 44 String post_id=s.get(2)==null ? "":Integer.toString(s.getInt(2)); 45 String title=s.getString(3); 46 String author=s.getString(4); 47 String country=s.getString(5); 48 String category=s.getString(6);*/ 49 50 String hostname ="======"; 51 String request_date ="======"; 52 String post_id="======"; 53 String title="======"; 54 String author="======"; 55 String country="======"; 56 String category="======"; 57 58 59 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "title".getBytes(), Bytes.toBytes(title)))); 60 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "author".getBytes(), Bytes.toBytes(author)))); 61 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "country".getBytes(), Bytes.toBytes(country)))); 62 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "post_id".getBytes(), Bytes.toBytes(post_id)))); 63 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "category".getBytes(), Bytes.toBytes(category)))); 64 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "hostname".getBytes(), Bytes.toBytes(hostname)))); 65 cols.add(new Tuple2<ImmutableBytesWritable, KeyValue>(new ImmutableBytesWritable(rowkey),new KeyValue(rowkey, family, "request_date".getBytes(), Bytes.toBytes(request_date)))); 66 67 return cols.iterator(); 68 } 69 }).saveAsNewAPIHadoopFile(hfile_path, ImmutableBytesWritable.class, KeyValue.class, HFileOutputFormat2.class, conf); 70 SparkLog.debug("生成hfile文件成功"); 71 LoadIncrementalHFiles load = new LoadIncrementalHFiles(conf); 72 // 根据表名获取表 73 SparkLog.debug("根据表名获取表..."); 74 Table table = conn.getTable(TableName.valueOf(hbasetable)); 75 SparkLog.debug(table.toString()); 76 77 // 获取hbase表的region分布 78 SparkLog.debug("获取hbase表的region分布..."); 79 RegionLocator regionLocator = conn.getRegionLocator(TableName 80 .valueOf(hbasetable)); 81 // 创建一个hadoop的mapreduce的job 82 Job job = Job.getInstance(conf); 83 // 设置job名称 84 job.setJobName("DumpFile"); 85 // 此处最重要,需要设置文件输出的key,因为我们要生成HFil,所以outkey要用ImmutableBytesWritable 86 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 87 // 输出文件的内容KeyValue 88 job.setMapOutputValueClass(KeyValue.class); 89 // 配置HFileOutputFormat2的信息 90 HFileOutputFormat2.configureIncrementalLoad(job, table, 91 regionLocator); 92 93 // 开始导入 94 SparkLog.debug("开始导入..."); 95 load.doBulkLoad(new Path(hfile_path), conn.getAdmin(), table, 96 regionLocator); 97 // load.doBulkLoad(new Path(path),new , table,regionLocator); 98 // load.doBulkLoad(new Path(path), (HTable)table);这个目前也可用 99 table.close(); 100 101 } 102 103 } catch (Throwable e) { 104 return TCResult.newFailureResult("SPARK_ERROR", e); 105 } finally { 106 try { 107 conn.close(); 108 } catch (Throwable e) { 109 return TCResult.newFailureResult("SPARK_ERROR", e); 110 } 111 }
1 //读取hbase 2 SparkLog.info("初始化hbase..."); 3 Configuration conf = new Configuration(); 4 if (null != hbase_site_path) { 5 hbase_site_path = hbase_site_path.contains("hbase-site.xml") ? hbase_site_path : hbase_site_path 6 + "/hbase-site.xml"; 7 conf.addResource(new Path(hbase_site_path)); 8 } else { 9 if(zn_parent == null || zn_parent.equals("")){ 10 zn_parent="/hbase"; 11 } 12 conf.set("hbase.zookeeper.quorum", quorum); 13 conf.set("hbase.zookeeper.property.clientPort", zkport); 14 conf.set("zookeeper.znode.parent", zn_parent); 15 16 } 17 conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, "60000"); 18 JavaRDD<String> javardd = null; 19 try { 20 conf.set(TableInputFormat.INPUT_TABLE, tablename); 21 JavaPairRDD<ImmutableBytesWritable, Result> hbRDD = sc 22 .newAPIHadoopRDD(conf, TableInputFormat.class, 23 ImmutableBytesWritable.class, Result.class); 24 25 javardd = hbRDD.values().map(new Function<Result, String>(){ 26 27 /** 28 * 29 */ 30 private static final long serialVersionUID = 1L; 31 32 @Override 33 public String call(Result r) throws Exception { 34 // TODO 自动生成的方法存根 35 String s = ""; 36 for (Cell cell : r.rawCells()) { 37 s += "Rowkey:" 38 + Bytes.toString(CellUtil.cloneRow(cell)) 39 + ",column=" 40 + Bytes.toString(CellUtil.cloneFamily(cell)) 41 + ":" 42 + Bytes.toString( 43 CellUtil.cloneQualifier(cell)) 44 .replaceAll("Quilifier:", "") 45 + ",timestamp=" + cell.getTimestamp() 46 + ",value:" 47 + Bytes.toString(CellUtil.cloneValue(cell)); 48 } 49 return s; 50 } 51 52 }); 53 SparkLog.debug("hbase table records num = " + javardd.count()); 54 55 } catch (Throwable e) { 56 return TCResult.newFailureResult("SPARK_ERROR",e); 57 }