Hbase中主要的数据读取函数是get()和scan(),它们都支持直接访问数据和通过指定起止行键访问数据的功能。我们可以在查询中添加更多的限制条件来减少查询得到的数据量,这些限制可以是指定列族、列、时间戳以及版本号。这些方法可以帮助我们控制哪些数据在查询时被包含其中,但是它们缺少一些细粒度的筛选功能,比如基于正则表达式对行键或是值进行筛选。Get和Scan两个类都支持过滤器,理由如下:这类对象提供的基本API不能对行键、列名或列值进行过滤,但是通过过滤器可以达到这个目的。过滤器最基本的接口叫Filter,除此之外,还有一些由Hbase提供的无需编程就可以直接使用的类。
同时,我们还可以通过继承Filter类来实现自己的需求。所有的过滤器都在服务器端生效,叫做谓词下推。这样可以保证被过滤掉的数据不会被传送到客户端。我们可以在客户端代买中实现过滤的功能(但会影响系统性能),因为在这种情况下服务器端需要传输更多的数据到客户端,我们应当尽量避免这种情况。
如下图所示,过滤器在客户端创建,通过RPC传送到服务器端,然后在服务器端执行过滤操作。
在过滤器层级结构的最底层是Filter接口和FilterBase抽象类,它们实现了过滤器的空壳和骨架,这使得实际的过滤器类可以许多重复的结构代码。大部分实体过滤器类一般都直接继承自FilterBase,也有一些间接继承自该类。不过它们的使用流程都是相同的,用户定义一个需要的过滤器实例,同时把定义好的过滤器实例传递给Get或Scan实例:setFilter(filter)。在实例化过滤器的时候,用户需要提供一些参数来设定过滤器的用途。其基本格式是:所需类型的过滤器(例如:行过滤器,值过滤器,依赖列过滤器等等),在过滤器中包含三个基本的参数,第一个是要过滤什么(比如说过滤id,过滤name等等),第二个是要比较过滤的方式(例如等于,不等于,大于等于等等),第三个就是要比较过滤的值
代码实例:
/**
* 测试RowFilter过滤器
*/
@Test
public void testRowFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t1");
Scan scan = new Scan();
RowFilter rowFilter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("row0100")));
scan.setFilter(rowFilter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
System.out.println(Bytes.toString(r.getRow()));
}
}
/**
* 测试FamilyFilter过滤器
*/
@Test
public void testFamilyFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
FamilyFilter filter = new FamilyFilter(CompareFilter.CompareOp.LESS, new BinaryComparator(Bytes.toBytes("f2")));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
System.out.println(f1id + " : " + f2id);
}
}
/**
* 测试QualifierFilter(列过滤器)
*/
@Test
public void testColFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
QualifierFilter colfilter = new QualifierFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("id")));
scan.setFilter(colfilter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + f2name);
}
}
/**
* 测试ValueFilter(值过滤器)
* 过滤value的值,含有指定的字符子串
*/
@Test
public void testValueFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("to"));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* 依赖列过滤器
*/
@Test
public void testDepFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
DependentColumnFilter filter = new DependentColumnFilter(Bytes.toBytes("f2"),
Bytes.toBytes("name"),
false,
CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("tom2.2"))
);
//ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("to"));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* 单列值value过滤,对列上的value进行过滤,不符合整行删除。
*/
@Test
public void testSingleColumValueFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("f2"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.NOT_EQUAL,
new BinaryComparator(Bytes.toBytes("tom2.2")));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* 单列值排除过滤器,去掉过滤使用的列,对列的值进行过滤
*/
@Test
public void testSingleColumValueExcludeFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
SingleColumnValueExcludeFilter filter = new SingleColumnValueExcludeFilter(Bytes.toBytes("f2"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.EQUAL,
new BinaryComparator(Bytes.toBytes("tom2.2")));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* 前缀过滤,是rowkey过滤. where rowkey like 'row22%'
*/
@Test
public void testPrefixFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t1");
Scan scan = new Scan();
PrefixFilter filter = new PrefixFilter(Bytes.toBytes("row222"));
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* 分页过滤,是rowkey过滤,在region上扫描时,对每次page设置的大小。
* 返回到到client,设计到每个Region结果的合并。
*/
@Test
public void testPageFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t1");
Scan scan = new Scan();
PageFilter filter = new PageFilter(10);
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* keyOnly过滤器,只提取key,丢弃value.
* @throws IOException
*/
@Test
public void testKeyOnlyFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t1");
Scan scan = new Scan();
KeyOnlyFilter filter = new KeyOnlyFilter();
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
* ColumnPageFilter,列分页过滤器,过滤指定范围列,
* select ,,a,b from ns1:t7
*/
@Test
public void testColumnPageFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
ColumnPaginationFilter filter = new ColumnPaginationFilter(2,2);
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
/**
*
*/
@Test
public void testLike() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,
new RegexStringComparator("^tom2")
);
scan.setFilter(filter);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
@Test
//select * from t7 where ((age <= 13) and (name like '%t') or (age > 13) and (name like 't%'))
public void testComboFilter() throws IOException {
Configuration conf = HBaseConfiguration.create();
Connection conn = ConnectionFactory.createConnection(conf);
TableName tname = TableName.valueOf("ns1:t7");
Scan scan = new Scan();
//where ... f2:age <= 13
SingleColumnValueFilter ftl = new SingleColumnValueFilter(
Bytes.toBytes("f2"),
Bytes.toBytes("age"),
CompareFilter.CompareOp.LESS_OR_EQUAL,
new BinaryComparator(Bytes.toBytes("13"))
);
//where ... f2:name like %t
SingleColumnValueFilter ftr = new SingleColumnValueFilter(
Bytes.toBytes("f2"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.EQUAL,
new RegexStringComparator("^t")
);
//ft
FilterList ft = new FilterList(FilterList.Operator.MUST_PASS_ALL);
ft.addFilter(ftl);
ft.addFilter(ftr);
//where ... f2:age > 13
SingleColumnValueFilter fbl = new SingleColumnValueFilter(
Bytes.toBytes("f2"),
Bytes.toBytes("age"),
CompareFilter.CompareOp.GREATER,
new BinaryComparator(Bytes.toBytes("13"))
);
//where ... f2:name like %t
SingleColumnValueFilter fbr = new SingleColumnValueFilter(
Bytes.toBytes("f2"),
Bytes.toBytes("name"),
CompareFilter.CompareOp.EQUAL,
new RegexStringComparator("t$")
);
//ft
FilterList fb = new FilterList(FilterList.Operator.MUST_PASS_ALL);
fb.addFilter(fbl);
fb.addFilter(fbr);
FilterList fall = new FilterList(FilterList.Operator.MUST_PASS_ONE);
fall.addFilter(ft);
fall.addFilter(fb);
scan.setFilter(fall);
Table t = conn.getTable(tname);
ResultScanner rs = t.getScanner(scan);
Iterator<Result> it = rs.iterator();
while (it.hasNext()) {
Result r = it.next();
byte[] f1id = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("id"));
byte[] f2id = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("id"));
byte[] f1name = r.getValue(Bytes.toBytes("f1"), Bytes.toBytes("name"));
byte[] f2name = r.getValue(Bytes.toBytes("f2"), Bytes.toBytes("name"));
System.out.println(f1id + " : " + f2id + " : " + Bytes.toString(f1name) + " : " + Bytes.toString(f2name));
}
}
有时候我们需要按照各自的需求来实现自定义过滤器。我们需要实现Filter接口或者直接继承FilterBase类,后者已经为接口中的所有成员方法提供了默认的实现
下图展示了处理一行数据时,过滤器中各个方法的逻辑流程。