1. HBase query method
The query implementation of hbase only provides two ways:
- Get a unique record by the specified rowkey: get method.
- Get a batch of records according to specified conditions: scan method.
The scan method is used to implement the conditional query function. When using scan, pay attention to the following values:
- The speed of scan can be improved by the setCaching and setBatch methods (space for time)
- scan can be limited by setStartRow and setEndRow. The smaller the range, the higher the performance.
- Scan can add filters through the setFilter method, which is also the basis for paging and multi-condition queries.
Second, the use of RowFilter
operator | description |
less |
less than |
less_or_equal |
less than or equal to |
equal | equal |
not_equal | not equal to |
greater_or_equal | greater or equal to |
greater | more than the |
no_op | exclude all |
Comparator | description |
BinaryComparator | Compare using bytes.comparaTo() |
BinaryPrefixComparator | Similar to BinaryComparator, start comparing from the front |
NullComparator | |
BitComparator | |
RegexStringComparator | regular expression |
subStringComparator | Treat numbers as strings and use contains() to judge |
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BinaryPrefixComparator; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; public class TestHbaseRowFilter { String tableName = "test_row_filter"; Configuration config = HBaseConfiguration.create(); /** * Part of the code comes from the hbase authoritative guide * @throws IOException */ public void testRowFilter() throws IOException { HTable table = new HTable(config, tableName); Scan scan = new Scan(); System.out.println("Row less than or equal to row010"); Filter filter1 = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL, new BinaryComparator("row010".getBytes())); scan.setFilter(filter1); ResultScanner scanner1 = table.getScanner(scan); for (Result res : scanner1) { System.out.println(res); } scanner1.close(); System.out.println("Regularly get the line ending with 5"); Filter filter2 = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*5[ DISCUZ_CODE_0 ]quot;)); scan.setFilter(filter2); ResultScanner scanner2 = table.getScanner(scan); for (Result res : scanner2) { System.out.println(res); } scanner2.close(); System.out.println("Line containing 5"); Filter filter3 = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator("5")); scan.setFilter(filter3); ResultScanner scanner3 = table.getScanner(scan); for (Result res : scanner3) { System.out.println(res); } scanner3.close(); System.out.println("The beginning is row01"); Filter filter4 = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator("row01".getBytes())); scan.setFilter(filter4); ResultScanner scanner4 = table.getScanner(scan); for (Result res : scanner4) { System.out.println(res); } scanner3.close(); } /** * Initialization data */ public void init() { // create table and initialize data try { HBaseAdmin admin = new HBaseAdmin(config); if (!admin.tableExists(tableName)) { HTableDescriptor htd = new HTableDescriptor(tableName); HColumnDescriptor hcd1 = new HColumnDescriptor("data"); htd.addFamily (hcd1); HColumnDescriptor hcd2 = new HColumnDescriptor("url"); htd.addFamily (hcd2); admin.createTable(htd); } HTable table = new HTable(config, tableName); table.setAutoFlush(false); int count = 50; for (int i = 1; i <= count; ++i) { Put p = new Put(String.format("row%03d", i).getBytes()); p.add("data".getBytes(), String.format("col%01d", i % 10) .getBytes(), String.format("data%03d", i).getBytes()); p.add("url".getBytes(), String.format("col%01d", i % 10) .getBytes(), String.format("url%03d", i).getBytes()); table.put(p); } table.close(); } catch (IOException e) { e.printStackTrace (); } } /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { TestHbaseRowFilter test = new TestHbaseRowFilter(); test.init(); test.testRowFilter(); } }
Rows less than or equal to row010 keyvalues = {row001 / data: col1 / 1364133382268 / Put / vlen = 7, row001 / url: col1 / 1364133382268 / Put / vlen = 6} keyvalues = {row002 / data: col2 / 1364133382268 / Put / vlen = 7, row002 / url: col2 / 1364133382268 / Put / vlen = 6} keyvalues = {row003 / data: col3 / 1364133382268 / Put / vlen = 7, row003 / url: col3 / 1364133382268 / Put / vlen = 6} keyvalues = {row004 / data: col4 / 1364133382268 / Put / vlen = 7, row004 / url: col4 / 1364133382268 / Put / vlen = 6} keyvalues = {row005 / data: col5 / 1364133382268 / Put / vlen = 7, row005 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row006 / data: col6 / 1364133382268 / Put / vlen = 7, row006 / url: col6 / 1364133382268 / Put / vlen = 6} keyvalues = {row007 / data: col7 / 1364133382268 / Put / vlen = 7, row007 / url: col7 / 1364133382268 / Put / vlen = 6} keyvalues = {row008 / data: col8 / 1364133382268 / Put / vlen = 7, row008 / url: col8 / 1364133382268 / Put / vlen = 6} keyvalues = {row009 / data: col9 / 1364133382268 / Put / vlen = 7, row009 / url: col9 / 1364133382268 / Put / vlen = 6} keyvalues = {row010 / data: col0 / 1364133382268 / Put / vlen = 7, row010 / url: col0 / 1364133382268 / Put / vlen = 6} regex to get lines ending with 5 keyvalues = {row005 / data: col5 / 1364133382268 / Put / vlen = 7, row005 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row015 / data: col5 / 1364133382268 / Put / vlen = 7, row015 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row025 / data: col5 / 1364133382268 / Put / vlen = 7, row025 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row035 / data: col5 / 1364133382268 / Put / vlen = 7, row035 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row045 / data: col5 / 1364133382268 / Put / vlen = 7, row045 / url: col5 / 1364133382268 / Put / vlen = 6} The package line has 5 lines keyvalues = {row005 / data: col5 / 1364133382268 / Put / vlen = 7, row005 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row015 / data: col5 / 1364133382268 / Put / vlen = 7, row015 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row025 / data: col5 / 1364133382268 / Put / vlen = 7, row025 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row035 / data: col5 / 1364133382268 / Put / vlen = 7, row035 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row045 / data: col5 / 1364133382268 / Put / vlen = 7, row045 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row050 / data: col0 / 1364133382268 / Put / vlen = 7, row050 / url: col0 / 1364133382268 / Put / vlen = 6} Beginning with row01 keyvalues = {row010 / data: col0 / 1364133382268 / Put / vlen = 7, row010 / url: col0 / 1364133382268 / Put / vlen = 6} keyvalues = {row011 / data: col1 / 1364133382268 / Put / vlen = 7, row011 / url: col1 / 1364133382268 / Put / vlen = 6} keyvalues = {row012 / data: col2 / 1364133382268 / Put / vlen = 7, row012 / url: col2 / 1364133382268 / Put / vlen = 6} keyvalues = {row013 / data: col3 / 1364133382268 / Put / vlen = 7, row013 / url: col3 / 1364133382268 / Put / vlen = 6} keyvalues = {row014 / data: col4 / 1364133382268 / Put / vlen = 7, row014 / url: col4 / 1364133382268 / Put / vlen = 6} keyvalues = {row015 / data: col5 / 1364133382268 / Put / vlen = 7, row015 / url: col5 / 1364133382268 / Put / vlen = 6} keyvalues = {row016 / data: col6 / 1364133382268 / Put / vlen = 7, row016 / url: col6 / 1364133382268 / Put / vlen = 6} keyvalues = {row017 / data: col7 / 1364133382268 / Put / vlen = 7, row017 / url: col7 / 1364133382268 / Put / vlen = 6} keyvalues = {row018 / data: col8 / 1364133382268 / Put / vlen = 7, row018 / url: col8 / 1364133382268 / Put / vlen = 6} keyvalues = {row019 / data: col9 / 1364133382268 / Put / vlen = 7, row019 / url: col9 / 1364133382268 / Put / vlen = 6}