Hbase过滤器(一):比较过滤器API

过滤器(filter)

目录

一:行过滤器(rowFilter)

解析:行过滤器基于rowkey来过滤数据。使用多种运算符返回符合条件的行键,同时过滤掉不符合条件的rowkey。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class rowfilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据rowkey查询
    */
    @Test
    public void rowfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row3")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age")));
            System.out.println(name);
            System.out.println(age);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }

    /*
    * 根据rowkey正则表达式查询
    */
    @Test
    public void rowRegexfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator(".3"));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }

    /*
    * 根据rowkey字符串查询
    */
    @Test
    public void rowSubStringfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("name"));
        //添加扫描的列族 参数1.列族 参数2.列名
        scan.addColumn(Bytes.toBytes("cf1"),Bytes.toBytes("age"));
        //创建过滤器实体
        Filter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator("3"));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

二:列族过滤器(FamilyFilter)

解析:列族过滤器于行过滤器相似,不过它是通过比较列族而不是比较rowkey来返回结果的。通过使用不同组合的运算符和比较器,用户可以在列族一级筛选所需的数据。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class familyfilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据列族查询
    */
    @Test
    public void familyfilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //创建过滤器实体
        Filter filter = new FamilyFilter(CompareFilter.CompareOp.LESS,new BinaryComparator(Bytes.toBytes("cf2")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        Get get = new Get(Bytes.toBytes("row1"));
        get.setFilter(filter);
        Result result = table.get(get);
        System.out.println("result:"+result);
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

三:列名过滤器(QualifierFilter)

解析:使用列名进行筛选的类似逻辑,这种操作可以帮助用户筛选特定的列。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.QualifierFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class qualifierfilter {


    private Configuration configuration = null;
    private Connection connection = null;
    /*
    * 根据列名查询
    */
    @Test
    public void familynamefilter() throws IOException {
        System.out.print("begin\n");
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        //根据表名获取表实体
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //创建扫描实体
        Scan scan = new Scan();
        //创建过滤器实体
        Filter filter = new QualifierFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("age")));
        //将filter实体放入扫描实体
        scan.setFilter(filter);
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        Get get = new Get(Bytes.toBytes("row1"));
        get.setFilter(filter);
        Result result = table.get(get);
        System.out.println("result:"+result);
        resultScanner.close();
        table.close();
        System.out.print("end\n");
    }
}

四:值过滤器(ValueFilter)

解析:这个过滤器可以帮助用户筛选某个特定值得单元格,与RegexStringComparator配合使用,可以使用功能强大的表达式来进行筛选,需要注意的是,在使用特定比较器的时候,只能与部分运算符配合使用。

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.junit.Test;

import java.io.IOException;

public class valuefilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据值查询
    */
    @Test
    public void valueFilter() throws IOException {
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        Scan scan = new Scan();
        Filter filter = new ValueFilter(CompareFilter.CompareOp.EQUAL,new SubstringComparator(".4"));
        //创建扫描返回类
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            for (KeyValue kv :result.raw())
            {
                System.out.println(kv);
                System.out.println(kv.getValue());
            }
        }
        resultScanner.close();
        table.close();
    }
}

五:参考列过滤器(DependentColumnFilter)

           解析:DependentColumnFilter主要根据所选列的时间戳的时间过滤所要查询的数据

此过滤器提供了四种构造函数:

(1)DependentColumnFilter()

(2)DependentColumnFilter(byte[] family,byte[] qulifier)

(3)DependentColumnFilter(byte[] family,byte[] qulifier,boolean dropDependentColumn)    

(4)DependentColumnFilter(byte[]family,byte[]qulifier,boolean dropDependentColumn,CompareOp valueCompareOp, WritableByteArrayComparable valueComparator)

相关参数:

boolean dropDependentColumn -- 决定参考列被返回还是丢弃,为true时表示参考列被返回,为false时表示被丢弃

CompareOp valueCompareOp --  比较运算符

WritableByteArrayComparable valueComparator --  比较器

package compareFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class valuefilter {

    private Configuration configuration = null;
    private Connection connection = null;

    /*
    * 根据参考列查询
    */
    @Test
    public void valueFilter() throws IOException {
        //创建Hbase配置文件
        configuration = HBaseConfiguration.create();
        //创建连接
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        Scan scan = new Scan();
        Filter filter = new DependentColumnFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),false);
        //创建扫描返回类
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for (Result result:resultScanner){
            System.out.println(result);
        }
        resultScanner.close();
        table.close();
    }
}

猜你喜欢

转载自blog.csdn.net/weixin_41925975/article/details/81362492