Spark read HBase data

1, pom.xml

version number

<properties>
    <hbase.version>2.2.2</hbase.version>
    <hadoop.version>2.10.0</hadoop.version>
    <spark.version>2.4.2</spark.version>
</properties>

Dependencies

<dependencies>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>commons-beanutils</groupId>
                    <artifactId>commons-beanutils-core</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- Spark SQL-->
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
        </dependency>

        <!-- Hadoop-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!-- HBase -->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-hadoop2-compat</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>${hbase.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-mapreduce</artifactId>
            <version>2.2.2</version>
        </dependency>

        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.8.1</version>
        </dependency>

        <dependency>
            <groupId>org.scalaj</groupId>
            <artifactId>scalaj-http_2.11</artifactId>
            <version>2.2.2</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.62</version>
        </dependency>
</dependencies>

2, Java HBase, additions and deletions to change search operation

　　(. 1) Junit way to implement the operation, initially need to connect Hbase.

Connection connection = null
Admin admin = null
Table table = null

@Before
public void setUp() throw IOException {
    Configuration conf = new Configuration();
    conf.set("hbase.rootdir", "hdfs://localhost:8020/hbase");
    conf.set("hbase.zookeeper.quorum", "localhost:2181");

    try {
        connection = ConnectionFactory.createConnection(conf);
        admin = connection.getAdmin();
    } catch(IOException e) {
        if(null != admin) {
            e.printStackTrace();
        }
    }
}

　　　　First HBase configuration, the Configuration need to configure hbase.rootdi r and hbase.zookeeper.quorum . hbase.rootdir is hbase files in the folder HBase address, hbase.zookeeper.quorum is Zookeeper address. HBase 's connection is through ConnectionFactory to get where you need parameters conf .

Then we need to get a connection in the ADMIN , ADMIN role is to do some of the deleted table, table creation.

　　(2) Create a HBase table　　

@Test
 public  void the createTable () throws Exception { 
    the TableName Table = TableName.valueOf (tableName); 
    
    // first determines whether there is a table of pre-created HBase 
    IF (admin.tableExists (Table)) { 
        System.out.println (tableName + "already exists ..." ) 
    } the else {
         // set table 
        HTableDescriptor tableDescriptor = new new HTableDescriptor (table);
         // column family setting table, the column is set in the group need to table creation 
        tableDescriptor.addFamily ( new new HColumnDescriptor ( "info" )); 
        tableDescriptor.addFamily ( new newHColumnDescriptor ( "address" )); 

        // use admin create a table 
        admin.createTable (tableDescriptor); 

        System.out.println (tableName + "successfully created ..." ); 
    } 
}

　　(3) inserting data into a table created

@Test
public void testPut() throws Exception {
    table = connection.getTable(TableName.valueOf(tableName));

    // 插入单条诗句
    // 添加 rowKey
    Put put = new Put(Bytes.toBytes("hunter"));
    // 通过Put设置要添加的数据的CF、qualifier、value
    put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes("23"));
    put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes("1997-12-12"));
    put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"), Bytes.toBytes("HaHa"));

    put.addColumn (Bytes.toBytes ( "address"), Bytes.toBytes ( "Country"), Bytes.toBytes ( "the CN" )); 
    put.addColumn (Bytes.toBytes ( "address"), Bytes.toBytes ( " provimce "), Bytes.toBytes (" LN " )); 
    put.addColumn (Bytes.toBytes ( " address "), Bytes.toBytes (" City "), Bytes.toBytes (" the DL " )); 

    // data HBase put to go 
    table.put (put); 

    // insert multiple verses
     // Create a Put the List, as the plurality of pieces of data into which put 
    List <Put> = the puts new new the ArrayList <Put> (); 
    
    // The first of Put 
    of Put put1 = new new of Put (Bytes.toBytes ( "Jepson"));
    put1.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes("15"));
    put1.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes("xxxx-xx-xx"));
    put1.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"), Bytes.toBytes("WaWa"));
    put1.addColumn(Bytes.toBytes("address"), Bytes.toBytes("country"), Bytes.toBytes("CN"));
    put1.addColumn(Bytes.toBytes("address"), Bytes.toBytes("provimce"), Bytes.toBytes("BJ"));
    put1.addColumn(Bytes.toBytes("address"), Bytes.toBytes("city"), Bytes.toBytes("BJ"));
      
    // 第二个 Put
    Put put2 = new Put(Bytes.toBytes("okey"));
    put2.addColumn(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes("19"));
    put2.addColumn(Bytes.toBytes("info"), Bytes.toBytes("birthday"), Bytes.toBytes("yy-yy-yy"));
    put2.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"), Bytes.toBytes("DuoDuo"));
    put2.addColumn(Bytes.toBytes("address"), Bytes.toBytes("country"), Bytes.toBytes("CN"));
    put2.addColumn(Bytes.toBytes("address"), Bytes.toBytes("provimce"), Bytes.toBytes("SH"));
    put2.addColumn(Bytes.toBytes("address"), Bytes.toBytes("city"), Bytes.toBytes("SH"));

    // 全部 add 到 puts list 中
    puts.add(put1);
    puts.add(put2);

    // 保存到 HBase 中
    table.put(puts);
}

　　(4) HBase table query data

@Test
 // acquired data Hbase the RowKey 
public  void testGetByRowkey () throws IOException {
     // Get Table 
    Table = Connection.GetTable (TableName.valueOf (tableName));
     //  
    the Get GET = new new the Get ( "20200107_1003629081" .getBytes ( ));
     // to set the return of the column. Parameter Group :( column, column name)
     // get.addColumn (Bytes.toBytes ( "info"), Bytes.toBytes ( "Age")); 

    the Result Result = table.get (GET); 
    printResult (Result); 
    } 

    Private  void printResult (the Result Result) {
         for(Cell cell: result.rawCells()) {
            System.out.println(Bytes.toString(result.getRow())+ "\t"
                    + Bytes.toString(CellUtil.cloneFamily(cell)) + "\t"
                    + Bytes.toString(CellUtil.cloneQualifier(cell)) + "\t"
                    + Bytes.toString(CellUtil.cloneValue(cell)) + "\t"
                    + cell.getTimestamp()
            );
        }
    }

    @Test
    
    public void testScan() throws IOException {
        table = connection.getTable(TableName.valueOf(tableName));

      Scan scan = new Scan();
      scan.addFamily(Bytes.toBytes("info"));
     // scan.addColumn(Bytes.toBytes("info"), Bytes.toBytes("company"));
     // Scan scan = new Scan(Bytes.toBytes("jepson")); // >=
     // Scan scan = new Scan(new Get(Bytes.toBytes("jepson"))); 
     // 明确查询
     // Scan scan = new Scan(Bytes.toBytes("jepson"), 
    Bytes.toBytes("okey")); // 包含起始，不包含结束

     // ResultScanner results = table.getScanner(Bytes.toBytes("info"), Bytes.toBytes("company"));
    ResultScanner results =  table.getScanner(scan);

        
            printResult (result);for(Result result: results) {
   System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~");
        }
    }

Spark read HBase data

Guess you like