1 hbase依赖zookeeper
保存Hmaster的地址和backup-master地址
- 管理HregionServer
- 做增删改查表的节点
- 管理HregionServer中的表分配
保存表-ROOT-的地址
hbase默认的根表,检索表。HRegionServer列表
表的增删改查数据;和hdfs交互,存取数据。
2 hbase API
2.1 配置
HBaseConfiguration
包:org.apache.hadoop.hbase.HBaseConfiguration
作用:通过此类可以对HBase进行配置
用法实例:
Configuration config = HBaseConfiguration.create();
说明: HBaseConfiguration.create() 默认会从classpath 中查找 hbase-site.xml 中的配置信息,初始化 Configuration。
这里要特别注意端口 2181,要和 zookeeper 中 zoo.cfg
的配置文件中的端口一致
使用方法:
static Configuration config = null;
static {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "slave1,slave2,slave3");
config.set("hbase.zookeeper.property.clientPort", "2181");
}
2.1.1 测试代码
首先集群要启动 hadoop,zookeeper,hbase
package demo1;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Table;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
public class HbaseTest {
static Configuration config = null;
private Connection connection = null;
private Table table = null;
@Before
public void init() throws IOException {
config = HBaseConfiguration.create();
config.set("hbase.master","node1:60000");
config.set("hbase.zookeeper.quorum", "node1,node2,node3");
config.set("hbase.zookeeper.property.clientPort", "2181");
connection = ConnectionFactory.createConnection(config);
table = connection.getTable(TableName.valueOf("user"));
}
@Test
public void createTable() throws IOException {
//创建表管理类
HBaseAdmin admin = new HBaseAdmin(config);
//创建表描述类
TableName tableName = TableName.valueOf("test3");//表名字
HTableDescriptor desc = new HTableDescriptor(tableName);
//创建列族描述类
HColumnDescriptor family = new HColumnDescriptor("info"); //列族
//将列族加入到表
desc.addFamily(family);
HColumnDescriptor family2=new HColumnDescriptor("info2");//列族
desc.addFamily(family2);
admin.createTable(desc);
}
@After
public void close() throws Exception {
table.close();
connection.close();
}
}
2.2 表管理类
HBaseAdmin
包:org.apache.hadoop.hbase.client.HBaseAdmin
作用:提供接口关系HBase 数据库中的表信息
用法:
HBaseAdmin admin = new HBaseAdmin(config);
2.3 表描述类
HTableDescriptor
包:org.apache.hadoop.hbase.HTableDescriptor
作用:HTableDescriptor 类包含了表的名字以及表的列族信息
表的schema(设计)
用法:
HTableDescriptor htd =new HTableDescriptor(tablename);
htd.addFamily(new HColumnDescriptor(“myFamily”));
2.4 列族的描述类
HColumnDescriptor
包:org.apache.hadoop.hbase.HColumnDescriptor
作用:HColumnDescriptor 维护列族的信息
用法:
htd.addFamily(new HColumnDescriptor(“myFamily”));
2.5 创建表操作
static Configuration config = null;
static {
config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "slave1,slave2,slave3");
config.set("hbase.zookeeper.property.clientPort", "2181");
}
HBaseAdmin admin = new HBaseAdmin(config);
HTableDescriptor desc = new HTableDescriptor(tableName);
HColumnDescriptor family1 = new HColumnDescriptor(“f1”);
HColumnDescriptor family2 = new HColumnDescriptor(“f2”);
desc.addFamily(family1);
desc.addFamily(family2);
admin.createTable(desc);
2.6 删除表
HBaseAdmin admin = new HBaseAdmin(config);
admin.disableTable(tableName);
admin.deleteTable(tableName);
2.6.1 测试
@Test
public void deleteTable() throws IOException {
HBaseAdmin admin = new HBaseAdmin(config);
admin.disableTable("test3");
admin.deleteTable("test3");
admin.close();
}
2.7 插入数据
@Test
public void insertData() throws IOException {
ArrayList<Put> list =new ArrayList<>();
//创建的数据封装类
Put put = new Put(Bytes.toBytes("zhangsanfen_1234"));
put.add(Bytes.toBytes("info1"),Bytes.toBytes("name"),Bytes.toBytes("zhangsanfeng"));
put.add(Bytes.toBytes("info1"),Bytes.toBytes("age"),Bytes.toBytes(23));
put.add(Bytes.toBytes("info1"),Bytes.toBytes("sex"),Bytes.toBytes(0));
put.add(Bytes.toBytes("info1"),Bytes.toBytes("address"),Bytes.toBytes("Shanghai"));
Put put1 = new Put(Bytes.toBytes("John_1234"));
put1.add(Bytes.toBytes("info1"),Bytes.toBytes("name"),Bytes.toBytes("John"));
put1.add(Bytes.toBytes("info1"),Bytes.toBytes("age"),Bytes.toBytes(20));
put1.add(Bytes.toBytes("info1"),Bytes.toBytes("sex"),Bytes.toBytes(1));
put1.add(Bytes.toBytes("info1"),Bytes.toBytes("address"),Bytes.toBytes("USA"));
list.add(put);
list.add(put1);
//添加数据
table.put(list);
}
2.8 查询
2.8.1 单条查询
/*
* 单条查询
* */
@Test
public void queryData() throws IOException {
//创建查询封装的类
Get get=new Get(Bytes.toBytes("John_1234"));
Result result = table.get(get);
byte[] value=result.getValue(Bytes.toBytes("info1"),Bytes.toBytes("name"));
byte[] sex=result.getValue(Bytes.toBytes("info1"),Bytes.toBytes("sex"));
byte[] address=result.getValue(Bytes.toBytes("info1"),Bytes.toBytes("address"));
byte[] age=result.getValue(Bytes.toBytes("info1"),Bytes.toBytes("age"));
System.out.println(Bytes.toString(value));
System.out.println(Bytes.toInt(sex));
System.out.println(Bytes.toString(address));
System.out.println(Bytes.toInt(age));
}
2.8.2 全表扫描
@Test
public void scanData() throws IOException {
Scan scan = new Scan();
ResultScanner scanner = table.getScanner(scan);
for(Result result:scanner){
byte[] value = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("name"));
byte[] sex = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("sex"));
byte[] address = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("address"));
byte[] age = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("age"));
System.out.println(Bytes.toString(value));
System.out.println(Bytes.toInt(sex));
System.out.println(Bytes.toString(address));
System.out.println(Bytes.toInt(age));
}
}
3 过滤器
3.1 过滤器种类
- 列值过滤器—SingleColumnValueFilter:过滤列值的相等、不等、范围等;
- 列名前缀过滤器—ColumnPrefixFilter:过滤指定前缀的列名;
- 多个列名前缀过滤器—MultipleColumnPrefixFilter:过滤多个指定前缀的列名;
- rowKey过滤器—RowFilter:通过正则,过滤rowKey值。
3.2 列值过滤器—SingleColumnValueFilter
@Test
public void scanDataByFilter1() throws IOException {
SingleColumnValueFilter singleColumnValueFilter =
new SingleColumnValueFilter(Bytes.toBytes("info1"), Bytes.toBytes("name"), CompareFilter.CompareOp.EQUAL, Bytes.toBytes("John"));
Scan scan = new Scan();
scan.setFilter(singleColumnValueFilter);
ResultScanner scanner = table.getScanner(scan);
for (Result result : scanner) {
byte[] value = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("name"));
byte[] sex = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("sex"));
byte[] address = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("address"));
byte[] age = result.getValue(Bytes.toBytes("info1"), Bytes.toBytes("age"));
System.out.println(Bytes.toString(value));
System.out.println(Bytes.toInt(sex));
System.out.println(Bytes.toString(address));
System.out.println(Bytes.toInt(age));
}
}
3.3 列名前缀过滤器—ColumnPrefixFilter
ColumnPrefixFilter 用于指定列名前缀值相等
ColumnPrefixFilter f = new ColumnPrefixFilter(Bytes.toBytes(“values”));
s1.setFilter(f);
3.4 多个列值前缀过滤器—MultipleColumnPrefixFilter
MultipleColumnPrefixFilter 和 ColumnPrefixFilter 行为差不多,但可以指定多个前缀
byte[][] prefixes = new byte[][] {Bytes.toBytes(“value1”),Bytes.toBytes(“value2”)};
Filter f = new MultipleColumnPrefixFilter(prefixes);
s1.setFilter(f);
3.5 rowKey过滤器—RowFilter
RowFilter 是rowkey过滤器
通常根据rowkey来指定范围时,使用scan扫描器的StartRow和StopRow方法比较好。
Filter f = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(“^1234”)); //匹配以1234开头的rowkey
s1.setFilter(f);
3.6 FilterList
FilterList 代表一个过滤器列表,可以添加多个过滤器进行查询,多个过滤器之间的关系有:
与关系(符合所有):FilterList.Operator.MUST_PASS_ALL
或关系(符合任一):FilterList.Operator.MUST_PASS_ONE
FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);
Scan s1 = new Scan();
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”), Bytes.toBytes(“c1”), CompareOp.EQUAL,Bytes.toBytes(“v1”) ) );
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”), Bytes.toBytes(“c2”), CompareOp.EQUAL,Bytes.toBytes(“v2”) ) );
// 添加下面这一行后,则只返回指定的cell,同一行中的其他cell不返回
s1.addColumn(Bytes.toBytes(“f1”), Bytes.toBytes(“c1”));
s1.setFilter(filterList); //设置filter
ResultScanner ResultScannerFilterList = table.getScanner(s1); //返回结果列表