1 Java api 操作hbase代码写法如下:
package hbase; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTableInterface; import org.apache.hadoop.hbase.client.HTablePool; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; // hbase shell 和对应 java api操作大全 http://blog.csdn.net/wulantian/article/details/41011297 /** * 如果在执行 build-hbase.xml时遇到 connect的错误,请多次重复执行 虚拟机第一次连接基本都连接不上 * @author Administrator * */ public class MyHbaseAPI { /** * @param args */ public static void main(String[] args) { Configuration conf = getConf(); // 查询所有表 /*try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); HTableDescriptor[] desors = hBaseAdmin.listTables(); for(int i=0; i<desors.length; i++){ HTableDescriptor desor = desors[i]; TableName tableName = desor.getTableName(); System.out.println("tablename is: " + tableName.getNameAsString()); } } catch (Exception e) { e.printStackTrace(); } */ // 删除表 /* try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); hBaseAdmin.disableTable("tea"); // 先disable 否则会报还没有disable 异常 hBaseAdmin.deleteTable("tea"); } catch (Exception e) { e.printStackTrace(); } */ // 创建表 /*try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); // 设置表名 这个类是表名和列族的容器 HTableDescriptor hTableDescriptor = new HTableDescriptor("teacher"); // 设置两个列族名 HColumnDescriptor basecolumnDescriptor = new HColumnDescriptor("base"); HColumnDescriptor morecolumnDescriptor = new HColumnDescriptor("more"); // 列族加入到表中 hTableDescriptor.addFamily(basecolumnDescriptor); hTableDescriptor.addFamily(morecolumnDescriptor); // 创建之 hBaseAdmin.createTable(hTableDescriptor); boolean exist = hBaseAdmin.tableExists("teacher"); System.out.println("创建的新表teacher成功了吗? " + exist); } catch (Exception e) { e.printStackTrace(); } */ /* * boolean b = admin.isTableDisabled("stu"); * if(!b){ hBaseAdmin.disableTable("emp"); System.out.println("Table disabled"); 停用此表 }esle{ hBaseAdmin.enableTable("emp"); // 启动此表 }*/ // 向hbase表插入/更新 数据 /*try { HTablePool htablepool = new HTablePool(conf, 1000); HTableInterface htable = htablepool.getTable("stu"); //HTable htable = new HTable(conf, "stu"); 建议用pool方式获得表 List puts = new ArrayList<Put>(); Put put1 = new Put("2".getBytes()); put1.add("base".getBytes(), "name".getBytes(), "baobao".getBytes()); Put put2 = new Put("2".getBytes()); put2.add("base".getBytes(), "age".getBytes(), "1".getBytes()); //Put put3 = new Put("2".getBytes()); //put3.add("more".getBytes(), "weigth".getBytes(), "3.5kg".getBytes()); //Put put4 = new Put("2".getBytes()); //put4.add("more".getBytes(), "height".getBytes(), "50cm".getBytes()); puts.add(put1); puts.add(put2); //puts.add(put3); //puts.add(put4); htable.put(puts); htable.close(); } catch (Exception e) { e.printStackTrace(); } */ // hbase删除一条数据的指定列 /* try { HTable htable = new HTable(conf, "stu"); Delete delete = new Delete("3".getBytes()); // 删除整行数据 //delete.deleteFamily("base".getBytes()); // 删除整个列族 delete.deleteColumn("base".getBytes(), "sex".getBytes()); // 删除某个列 htable.delete(delete); htable.close(); } catch (Exception e) { e.printStackTrace(); }*/ // 查询单条记录所有数据 /*try { HTable htable = new HTable(conf, "stu"); Get get = new Get("2".getBytes()); Result result = htable.get(get); //String name = Bytes.toString(result.getValue("base".getBytes(), "name".getBytes())); //String age = Bytes.toString(result.getValue("base".getBytes(), "age".getBytes())); //String weight = Bytes.toString(result.getValue("more".getBytes(), "weight".getBytes())); //String high = Bytes.toString(result.getValue("more".getBytes(), "high".getBytes())); for(KeyValue kv : result.raw()){ // 得到组成result的各个单元cell, 这些cell是由row, family, qualifier,value等组成, 如果id为2下有多条则遍历后将每一行的列输出 System.out.println("rowkey: " + Bytes.toString(kv.getRow())); System.out.println("family: " + Bytes.toString(kv.getFamily())); System.out.println("Qualifier: " + Bytes.toString(kv.getQualifier())); System.out.println("value: " + Bytes.toString(kv.getValue())); } //System.out.println("name: " + name + " age: " + age + " weight: " + weight + " high: " + high); htable.close(); } catch (Exception e) { e.printStackTrace(); }*/ // 查询单条记录某一列数值 /*try { HTable htable = new HTable(conf, "stu"); Get get = new Get("2".getBytes()); get.addColumn("base".getBytes(), "name".getBytes()); Result result = htable.get(get); String name = Bytes.toString(result.getValue("base".getBytes(), "name".getBytes())); String age = Bytes.toString(result.getValue("base".getBytes(), "age".getBytes())); // 得到 null System.out.println("name: " + name + " age: " + age); htable.close(); } catch (Exception e) { e.printStackTrace(); }*/ // 增加和删除表中的列族 /* try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); //hBaseAdmin.deleteColumn("stu", "more"); //Column family 删除表列族 HColumnDescriptor morecolumnDescriptor = new HColumnDescriptor("more"); // 增加列族 hBaseAdmin.addColumn("stu", morecolumnDescriptor); } catch (Exception e) { e.printStackTrace(); }*/ // 关闭hbase集群 /*try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); //hBaseAdmin.stopMaster() //hBaseAdmin.stopRegionServer(hostnamePort) hBaseAdmin.shutdown(); // 关闭hbase } catch (Exception e) { e.printStackTrace(); }*/ // 插入列 /*try { HTable htable = new HTable(conf, "stu"); List puts = new ArrayList<Put>(); Put put1 = new Put("3".getBytes()); put1.add("base".getBytes(), "sex".getBytes(), "man".getBytes()); puts.add(put1); htable.put(puts); htable.close(); } catch (Exception e) { e.printStackTrace(); } */ // scan过滤器 /* try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); HTable htable = new HTable(conf, "stu"); Scan scan = new Scan(); scan.setStartRow("1".getBytes()); // 增加开始rowkey 包含 scan.setStopRow("3".getBytes()); //增加结束rowkey 不包含 scan.addColumn("base".getBytes(), "name".getBytes());// 扫描中增加 列族 列名 ResultScanner resultScanner = htable.getScanner(scan); for(Result result : resultScanner){ //result: HBase cell defined by the row, family, qualifier, timestamp, and value if(result.raw().length==0){ System.out.println("stu 表数据为空"); }else{ for(KeyValue kv : result.raw()){ // 得到组成result的各个单元cell, 这些cell是由row, family, qualifier,value等组成 System.out.println(Bytes.toString(kv.getRow())); System.out.println(Bytes.toString(kv.getFamily())); System.out.println(Bytes.toString(kv.getQualifier())); System.out.println(Bytes.toString(kv.getValue())); } } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); }*/ // scan + filter过滤器 try { HBaseAdmin hBaseAdmin = new HBaseAdmin(conf); HTable htable = new HTable(conf, "stu"); Scan scan = new Scan(); scan.setStartRow("1".getBytes()); // 增加开始rowkey 包含 scan.setStopRow("3".getBytes()); //增加结束rowkey 不包含 scan.addColumn("base".getBytes(), "name".getBytes());// 扫描中增加 列族 列名 ResultScanner resultScanner = htable.getScanner(scan); for(Result result : resultScanner){ //result: HBase cell defined by the row, family, qualifier, timestamp, and value if(result.raw().length==0){ System.out.println("stu 表数据为空"); }else{ for(KeyValue kv : result.raw()){ // 得到组成result的各个单元cell, 这些cell是由row, family, qualifier,value等组成 System.out.println(Bytes.toString(kv.getRow())); System.out.println(Bytes.toString(kv.getFamily())); System.out.println(Bytes.toString(kv.getQualifier())); System.out.println(Bytes.toString(kv.getValue())); } } } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } // filter过滤器 见别的链接 } private static Configuration getConf() { Configuration conf = null; conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "hadoop3"); conf.set("hbase.rootdir", "hdfs://hadoop3:9000/hbase"); return conf; } }
filter过滤器, 实际上定义好你过滤规则filter后, 直接丢给 scan即可然后让scan扫描:
scan.setFilter(filter); // 增加过滤器
关于过滤器,见: hbase scan filter以及hbase逻辑存储图和hbase打印数据效果
2 在 hbase + hadoop2 + zk 构建的集群的时候注意事项:
hbase: h2master主 h2sliver113 从 h2sliver114从
hadoop: h2single
zookeeper: h2master h2sliver113 h2sliver114
将上面代码打包后 放在hadoop上通过执行:
bin/hadoop jar XX.jar 包.类执行的时候,报错如下:
错误原因: hadoop2节点无法加载到hbase包
处理方式1:
将hbase jar拷贝到hadoop 节点上,注意请将hbase/lib下的所有包都拷贝过去,比如 /usr/local/hbaselib。
曾经尝试过只将
hbase client, hbase common, hbase server拷贝过去,但是在build-hbase.xml执行时报错:
NoClassDefFoundError: org/cloudera/htrace/Trace
然后在 hadooop/etc/hadoop/hadoop-env.sh中 增加如下:
for f in /usr/local/hbaselib/*.jar; do if [ "$HADOOP_CLASSPATH" ]; then export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f else export HADOOP_CLASSPATH=$f fi done
最后 可以在 hadoop/bin/mapred classpath下 查看是否已经将hbase lib 加载进来。
处理方式2:
或者直接都不需要拷贝hase jar包, hadooop/etc/hadoop/hadoop-env.sh中 增加
export HADOOP_CLASSPATH=/opt/hbase-0.98.12/lib/*
这两种方式都测试过,现在用方式2, 下面截图是这两种方式的设置:
处理方式3: 下面写法是执行 bulkload引入外部jar的写法
使用shell脚本执行jar程序
使用shell脚本执行jar程序 pt='.' for jar in `ls /某路径/*.jar /某路径/*.jar /某路径/*.jar` do pt=$pt:$jar done echo $pt
贴上以前这种使用方式写法:
jars=`ls $PROJECT_HOME/lib` for jar in $jars do CLASSPATH="$CLASSPATH:$PROJECT_HOME/lib/$jar" done CLASSPATH=/etc/hdfs1/conf:/etc/hadoop/conf:/etc/hbase/conf:$CLASSPATH java -Djava.library.path=/usr/lib/hadoop/lib/native -cp $CLASSPATH com.transwarp.hbase.bulkload.ImportTextFile2HBase $CONF_FILE -test