Hbase过滤器(二):专用过滤器API

目录

一:单列值过滤器(SingleColumnValueFilter)

方法类:

singleColumnValueFilter(byte[] family,byte[] qualifier,compareOp compareOp,btye[] value)

singleColumnValueFilter(byte[] family,byte[] qualifier,compareOp compareOp,WritableByteArrayComparable comparator)

boolean getFilterIfMissing()

void setFilterIfMissing(boolean filterIfMissing)----当前参考列不存在时,如果结果中包含该列设置true过滤掉

boolean getLatestVersionOnly()

void setLatestVersionOnly(boolean latestVersionOnly)----设置为true时,只检查参考列最近版本

Java API:

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class singleColumnValueFilter {
    private Configuration configuration = null;
    private Connection connection = null;

    @Test
    public void singleColumnValueFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        /*设置单列值过滤器
         * cf1  ----列族
         * name ----列名
         */
        SingleColumnValueFilter singleColumnValueFilter = new SingleColumnValueFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),
                CompareFilter.CompareOp.NOT_EQUAL,new SubstringComparator("csn"));
        //设置当前参考列过滤掉不显示
        singleColumnValueFilter.setFilterIfMissing(true);
        Scan scan = new Scan();
        scan.setFilter(singleColumnValueFilter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
//            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

二:单列排除过滤器(SingleColumnValueExcludeFilter)

单列排除过滤器继承自单列值过滤器(singleColumnValueFilter),唯一不同于单列值过滤器就是Result实例中永远不会获得作为检查目标的参考列。

Java API:

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueExcludeFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.SubstringComparator;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class singleColumnValueExcludeFilter {

    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void singleColumnValueExcludeFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        /*设置单列值过滤器
         * cf1  ----列族
         * name ----列名
         */
        SingleColumnValueExcludeFilter singleColumnValueExcludeFilter = new SingleColumnValueExcludeFilter(Bytes.toBytes("cf1"),Bytes.toBytes("name"),
                CompareFilter.CompareOp.NOT_EQUAL,new SubstringComparator("csn"));
        Scan scan = new Scan();
        scan.setFilter(singleColumnValueExcludeFilter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            Integer age = Bytes.toInt(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("age")));
            System.out.println(age);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

三:前缀过滤器(PrefixFilter)

前缀过滤器是通过给定的rowkey前缀匹配出固定的行----前缀匹配只用于scan中,get作用不大

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class prefixFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void prefixFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //row3是匹配rowkey的浅醉
        Filter filter = new PrefixFilter(Bytes.toBytes("row3"));
        Scan scan = new Scan();
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

四:分页过滤器(PageFilter)

分页过滤器:对habse中的数据,按照所设置的一页的行数,进行分页。

注:如果先指定了PageFilter..后指定SingleColumnValueFilter的话..条件正确但是获取不到数据..

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class pageFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void pageFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        Filter filter = new PageFilter(2);
        int totalRows =0;
        //设置最后一行行键
        byte[] lastRow = null;
        //长度为零的字节数组
        byte[] POSTFIX = new byte[] { 0x00 };
        while(true){
            Scan scan = new Scan();
            scan.setFilter(filter);
            //当最后一行行键不为空时,将最后一行行键加0,作为下一页起始行行键
            if(lastRow!=null){
                byte[] startRow = Bytes.add(lastRow,POSTFIX);
                System.out.println(startRow + Bytes.toStringBinary(startRow));
                scan.setStartRow(startRow);
            }
            ResultScanner resultScanner = table.getScanner(scan);
            int localRows = 0;
            Result result;
            while((result = resultScanner.next())!=null){
                System.out.println(localRows++ +":"+result);
                totalRows++;
                lastRow = result.getRow();
            }
            resultScanner.close();
            if(localRows==0)
                break;
        }
        System.out.println("total rows:" + totalRows);
    }
}

五:行键过滤器(KeyOnlyFilter)

在一些应用中只需要将结果中keyValue实例的键返回,而不需要返回实际的数据时使用

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class keyOnlyFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void prefixFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //设置为true时,返回值,设置为false只返回键
        KeyOnlyFilter filter = new KeyOnlyFilter(false);
        Scan scan = new Scan();
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

六:首次行键过滤器(FirstKeyOnlyFilter)

如果用户需要访问一行中的第一列(HBase隐式排序),则这种过滤器可以满足需求,这种过滤器通常在行计数统计(row counter)的应用场景中使用,这种场景只需要检查这一行是否存在。

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class firstKeyOnlyFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void firstKeyOnlyFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        FirstKeyOnlyFilter filter = new FirstKeyOnlyFilter();
        Scan scan = new Scan();
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

七:包含结束的过滤器(InclusiveStopFilter)

扫描操作中的开始行被包含到结果中,但终止行被排除在外,使用这个过滤器时,用户也可以将结束行包括到结果中。

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.filter.InclusiveStopFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class inclusiveStopFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void firstKeyOnlyFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        Filter filter = new InclusiveStopFilter(Bytes.toBytes("row3"));
        Scan scan = new Scan();
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

八:时间戳过滤器(TimestampsFilter)

用户需要在扫描结果中对版本进行细粒度的控制时,这个过滤器可以满足需求----TimestampsFilter(List<Long> timestamps)

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.TimestampsFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class timestampsFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void timestampsFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));

        List<Long> ts = new ArrayList<Long>();
        //根据特定时间过滤时间戳
//        ts.add(new Long(1522469029503l));
        TimestampsFilter filter = new TimestampsFilter(ts);
        Scan scan = new Scan();
        scan.setFilter(filter);
        //根据时间范围过滤时间戳
        scan.setTimeRange(1522469029503l,1522479029503l);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

九:列计数过滤器(ColumnCountGetFilter)

用户可以使用这个过滤器来限制每行最多取回多少列----ColumnCountGetFilter(int n)

当一行的列数达到设定的最大值时,这个过滤器会停止整个扫描操作----比较适合在get()方法中使用

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import java.io.IOException;

public class ColumnCountGetFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void ColumnCountGetFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        //2表示显示的列数
        org.apache.hadoop.hbase.filter.ColumnCountGetFilter filter = new org.apache.hadoop.hbase.filter.ColumnCountGetFilter(2);
        Scan scan = new Scan();
        scan.setFilter(filter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

十:列分页过滤器(ColumnPaginationFilter)

ColumnPaginationFilter(int limit,int offset)----因此构造函数中的两个数字是用来规定返回的其实位置和返回数据数量的。

limit----每次返回limit个列

offset----从第offset个列开始返回,偏移量

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnPaginationFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class columnPaginationFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void columnPaginationFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        ColumnPaginationFilter columnPaginationFilter = new ColumnPaginationFilter(3,2);
        Scan scan = new Scan();
        scan.setFilter(columnPaginationFilter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

十一:列前缀过滤器(ColumnPrefixFilter)

类似于PrefixFilter,这个过滤器通过对列名称进行前缀匹配过滤

ColumnPrefixFilter(byte[] prefix)

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnPaginationFilter;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class columnPrefixFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void columnPrefixFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        ColumnPrefixFilter columnPrefixFilter = new ColumnPrefixFilter(Bytes.toBytes("na"));
        Scan scan = new Scan();
        scan.setFilter(columnPrefixFilter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

十二:随机行过滤器(RandomRowFilte)

RandomRowFilter(float chance)

chance:取值在0.0-1.0之间

1.当chance赋一个负值会导致所有结果都被过滤掉

2.当chance大于1.0则结果集中包含所有行

package specialFilter;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.RandomRowFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;

import java.io.IOException;

public class randomRowFilter {
    private Configuration configuration = null;
    private Connection connection = null;
    @Test
    public void randomRowFilter() throws IOException {
        System.out.println("begin");
        configuration = HBaseConfiguration.create();
        connection = ConnectionFactory.createConnection(configuration);
        Table table = connection.getTable(TableName.valueOf("ns1:t1"));
        RandomRowFilter randomRowFilter = new RandomRowFilter(new Float(0.4));
        Scan scan = new Scan();
        scan.setFilter(randomRowFilter);
        ResultScanner resultScanner = table.getScanner(scan);
        for(Result result:resultScanner){
            System.out.println(result);
            String name = Bytes.toString(result.getValue(Bytes.toBytes("cf1"),Bytes.toBytes("name")));
            System.out.println(name);
        }
        resultScanner.close();
        table.close();
        System.out.println("end");
    }
}

猜你喜欢

转载自blog.csdn.net/weixin_41925975/article/details/81383849