Hbase编程实战(JavaApi操作编写hbase工具类)

1.开发环境准备及搭建

(1)构建maven工程

       maven代码如下:

      1) 相关依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <artifactId>shadowsync</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.3.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.3.1</version>
        </dependency>
   </dependencies>
</project>

    2)添加打包和编译插件

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>1.6</source>
                    <target>1.6</target>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.0.0</version>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>需要打包的类名</mainClass>
                                </transformer>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>需要打包的类名</mainClass>
                                </transformer>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>需要打包的类名</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

(2) 配置windows的hosts文件,地址:C:\Windows\System32\drivers\etc(这一步非常重要,hbase对IP很敏感,否则会一直处于连接状态)

         一直连接不上会出现如下所示状态:

DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Use SIMPLE authentication for service ClientService, sasl=false
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Not trying to connect to bigdata4.jiaxun.com/10.0.5.14:16020 this server is in the failed servers list
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 795,4  replyHeader:: 795,536871130376,0  request:: '/hbase-unsecure/meta-region-server,F  response:: #ffffffff0001a726567696f6e7365727665723a31363032306affffff8344ffffffca6cffffffd4ffffffaf3c50425546a1fa1362696764617461342e6a696178756e2e636f6d10ffffff947d18ffffffdaffffff99ffffffeeffffff99ffffff882e100183,s{532575944989,532575945628,1582749573464,1582749802674,1,0,0,0,72,0,532575944989} 
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 796,8  replyHeader:: 796,536871130376,0  request:: '/hbase-unsecure,F  response:: v{'replication,'meta-region-server,'rs,'splitWAL,'backup-masters,'table-lock,'flush-table-proc,'master-maintenance,'region-in-transition,'online-snapshot,'switch,'master,'running,'recovering-regions,'draining,'namespace,'hbaseid,'table} 
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Use SIMPLE authentication for service ClientService, sasl=false
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Not trying to connect to bigdata4.jiaxun.com/10.0.5.14:16020 this server is in the failed servers list
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 797,4  replyHeader:: 797,536871130376,0  request:: '/hbase-unsecure/meta-region-server,F  response:: #ffffffff0001a726567696f6e7365727665723a31363032306affffff8344ffffffca6cffffffd4ffffffaf3c50425546a1fa1362696764617461342e6a696178756e2e636f6d10ffffff947d18ffffffdaffffff99ffffffeeffffff99ffffff882e100183,s{532575944989,532575945628,1582749573464,1582749802674,1,0,0,0,72,0,532575944989} 
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 798,8  replyHeader:: 798,536871130376,0  request:: '/hbase-unsecure,F  response:: v{'replication,'meta-region-server,'rs,'splitWAL,'backup-masters,'table-lock,'flush-table-proc,'master-maintenance,'region-in-transition,'online-snapshot,'switch,'master,'running,'recovering-regions,'draining,'namespace,'hbaseid,'table} 
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Use SIMPLE authentication for service ClientService, sasl=false
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Not trying to connect to bigdata4.jiaxun.com/10.0.5.14:16020 this server is in the failed servers list
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 799,4  replyHeader:: 799,536871130376,0  request:: '/hbase-unsecure/meta-region-server,F  response:: #ffffffff0001a726567696f6e7365727665723a31363032306affffff8344ffffffca6cffffffd4ffffffaf3c50425546a1fa1362696764617461342e6a696178756e2e636f6d10ffffff947d18ffffffdaffffff99ffffffeeffffff99ffffff882e100183,s{532575944989,532575945628,1582749573464,1582749802674,1,0,0,0,72,0,532575944989} 
DEBUG main-SendThread(10.9.1.43:2181) org.apache.zookeeper.ClientCnxn - Reading reply sessionid:0x370878cbdc400f1, packet:: clientPath:null serverPath:null finished:false header:: 800,8  replyHeader:: 800,536871130376,0  request:: '/hbase-unsecure,F  response:: v{'replication,'meta-region-server,'rs,'splitWAL,'backup-masters,'table-lock,'flush-table-proc,'master-maintenance,'region-in-transition,'online-snapshot,'switch,'master,'running,'recovering-regions,'draining,'namespace,'hbaseid,'table} 
DEBUG hconnection-0xb7dd107-metaLookup-shared--pool2-t32 org.apache.hadoop.hbase.ipc.RpcClientImpl - Use SIMPLE authentication for service ClientService, sasl=false
.......

          配置虚拟机的ip对应的主机名:注意此处一定是绑定的真实IP地址而不是虚拟IP。

        

# Copyright (c) 1993-2009 Microsoft Corp.
#
# This is a sample HOSTS file used by Microsoft TCP/IP for Windows.
#
# This file contains the mappings of IP addresses to host names. Each
# entry should be kept on an individual line. The IP address should
# be placed in the first column followed by the corresponding host name.
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
#      102.54.94.97     rhino.acme.com          # source server
#       38.25.63.10     x.acme.com              # x client host
10.9.1.50  bigdata6
10.9.1.49  bigdata5
10.9.1.48  bigdata4
10.9.1.47  bigdata3
10.9.1.46  bigdata2
10.9.1.45  bigdata1

(3)在集群中下载hbase的hbase-site.xml文件及log4j.properties文件(通过远程工具的ftp功能来进行下载)

        如下图所示:

(4) 使用开发工具创建hbase客户端工程,并将hbase-site.xml以及log4j.properties文件放入工程的resources资源下。

          如下图所示:

          将hbase-site.xml放入该目下,可在代码工程中直接使用其配置中hostname等信息(前提windows中已经配好ip映射关系)

2.HbaseUtils工具类编写

     (1)构建utils包,代码如下

package HbaseUtils;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
import org.apache.hadoop.hbase.client.coprocessor.LongColumnInterpreter;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class HbaseUtils {

    /*封装共用模块*/
    private static Configuration conf = null;
    private static Connection conn = null;

    static {
        conf = HBaseConfiguration.create();
        //配置hbase的zookeeper及相关环境
        conf.set("hbase.zookeeper.quorum", "bigdata1.jiaxun.com,bigdata2.jiaxun.com,bigdata3.jiaxun.com");
        conf.set("hbase.zookeeper.property.clientPort", "2181");
        conf.set("hbase.master", "10.9.1.16:16000");
        conf.set("zookeeper.znode.parent", "/hbase-unsecure");
        //conn
        try {
            conn = ConnectionFactory.createConnection(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //添加一条数据
    public static void addOneData(String tablename, String rowkey, String cf, String column, String value) throws Exception {
        //创建hbase表对象
        Table htable = conn.getTable(TableName.valueOf(tablename));
        try {
            //创建put对象
            //创建rowkey
            Put put = new Put(Bytes.toBytes(rowkey));
            put.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value));
            //添加put到表对象
            htable.put(put);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            //关闭资源
            htable.close();
            conn.close();
            System.out.println("===========数据插入成功===========");
        }
        

    }

    //批量添加数据
    public static void addBatchData(String tablename, List<Put> puts) throws Exception {

        Table htable = conn.getTable(TableName.valueOf(tablename));

        try {

            htable.put(puts);
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            //关闭资源
            htable.close();
            conn.close();
            System.out.println("===========数据插入成功===========");
        }

    }

    //批量创建数据
    public static void putBachData(String tablename, String rowKey, String cf, String column, String value,int num) throws IOException {

        Table htable = conn.getTable(TableName.valueOf(tablename));
        List<Put> puts = new ArrayList<Put>(2048);
        int count=0;
        try {
            for (int i = 0; i < num; i++) {
                Put put = new Put(Bytes.toBytes(rowKey+"_"+i));
                put.addColumn(Bytes.toBytes(cf), Bytes.toBytes(column), Bytes.toBytes(value + i));
                puts.add(put);
                count++;
                if (count % 3000 == 0) {
                    System.out.println("count:" + count);
                    htable.put(puts);
                    puts = new ArrayList<Put>(2048);
                }

            }
            // 提交最后的内容
            System.out.println("Total count:" + count);
            htable.put(puts);
        }catch(Exception e){
            e.printStackTrace();
        }finally {

            htable.close();
            conn.close();
            System.out.println("===========数据插入成功===========");

        }



    }

    //获得表描述方法
    public static void getTableDesc(String tableName) throws Exception {
        Table htable = conn.getTable(TableName.valueOf(tableName));//HTable htable = new HTable(conf, "people") ;			//获得表对象
        try {
            HTableDescriptor htabledesc = htable.getTableDescriptor();        //获得表描述对象
            HColumnDescriptor[] hcd = htabledesc.getColumnFamilies();    //获得列描述对象数组
            for (HColumnDescriptor hd : hcd) {
                String name = hd.getNameAsString();                //列族名
                int bs = hd.getBlocksize();  //块大小
                int minVers = hd.getMinVersions(); //最小版本号
                int maxVers = hd.getMaxVersions(); //最大版本号
                int defVers = HColumnDescriptor.DEFAULT_VERSIONS; //默认版本号
                System.out.println("name : " + name +
                        " blocksize : " + bs +
                        " minVers : " + minVers +
                        " maxVers : " + maxVers + " defVers : " + defVers);

            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            //释放资源
            htable.close();
            conn.close();
        }
    }

    //扫描表的所有数据
    public static void scanTable(String tableName) throws Exception{
        Table htable = conn.getTable(TableName.valueOf(tableName)) ;
        try {
            Scan scan = new Scan() ;
            //ResultScanner是客户端获取值的接口
            ResultScanner scanner = htable.getScanner(scan);
            //每行的数据就是Result,存储GET获得SCAN操作后获得单行的值
            for(Result res : scanner){
                for(Cell cell : res.listCells()){
                    System.out.println("================================================");
                    System.out.println("行键:rowkey ===========" + Bytes.toString(res.getRow()));
                    System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
                    System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                    System.out.println("时间戳:timestamp =======" + cell.getTimestamp());
                    System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }finally {

            htable.close();
            conn.close();
        }
    }
    //获得多行的scan数据
    public static void scanForRange(String tableName,String rowKey1,String rowKey2,String cf,String colmun) throws Exception{
        Table htable = conn.getTable(TableName.valueOf(tableName)) ;
        try {
            Scan scan = new Scan(Bytes.toBytes(rowKey1), Bytes.toBytes(rowKey2)) ;
            ResultScanner scanner = htable.getScanner(scan);
            for(Result rs : scanner){
                //获得某个列的值
                String res = Bytes.toString(rs.getValue(Bytes.toBytes(cf), Bytes.toBytes(colmun))) ;
                System.out.println(res);
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {

            htable.close();
            conn.close();
        }
    }

    //获得单行的数据
    public static void getForRowKey(String tableName,String rowkey) throws Exception{
        Table htable = conn.getTable(TableName.valueOf(tableName)) ;			//获得表对象
        try {
            Get get = new Get(Bytes.toBytes(rowkey)) ;
            Result result = htable.get(get);
            if( result == null || result.size() == 0){
                System.out.println("没有这个rowkey");
                htable.close();
                return ;
            }
            for(Cell cell : result.listCells()){
                System.out.println("================================================");
                System.out.println("行键:rowkey ===========" + Bytes.toString(result.getRow()));
                System.out.println("列族:columnFam ========" + Bytes.toString(CellUtil.cloneFamily(cell)));
                System.out.println("列:column ============" + Bytes.toString(CellUtil.cloneQualifier(cell)));
                System.out.println("时间戳:timestamp =======" + cell.getTimestamp());
                System.out.println("值:value ==============" + Bytes.toString(CellUtil.cloneValue(cell)));
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            htable.close();
            conn.close();
        }
    }

    //删除数据
    public static void deleteRow(String tableName,String rowkey) throws Exception{
        Table htable = conn.getTable(TableName.valueOf(tableName)) ;			//获得表对象
        try {
            Delete delete = new Delete(Bytes.toBytes(rowkey)) ;
            htable.delete(delete);
        } catch (Exception e) {
            e.printStackTrace();
        }finally {

            htable.close();
            conn.close();
        }


    }

    //修改表,添加列族(管理员干的事)
    public static void alterTableAddCls(String tableName,String cf) throws Exception {
        //创建数据库管理员
        Admin admin = conn.getAdmin() ;
        try {
            admin.disableTable(TableName.valueOf(tableName));
            HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cf)) ;

            //1:获得表描述对象进行修改
            HTableDescriptor htd = admin.getTableDescriptor(TableName.valueOf(tableName));
            htd.addFamily(hcd);

            //通过admin来进行实际的修改
            admin.modifyTable(TableName.valueOf((tableName)), htd);
            admin.enableTable(TableName.valueOf(tableName));
        } catch (Exception e) {
            e.printStackTrace();
        }finally {

            admin.close();
            conn.close();
            System.out.println("====添加列族成功====");
        }
    }

    //删除该表
    public static void deleteTable(String tableName) throws Exception{
        Admin admin = conn.getAdmin() ;
        try {
            if(admin.tableExists(TableName.valueOf(tableName))){
                admin.disableTable(TableName.valueOf(tableName));
                admin.deleteTable(TableName.valueOf(tableName));
                System.out.println("删除表成功");
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            admin.close();
            conn.close();
        }

    }

    //行计数
    public static long rowCount(String tableName) {
        long rowCount = 0;
        @SuppressWarnings("resource")
        AggregationClient aggregationClient = new AggregationClient(conf);
        Scan scan = new Scan();
        try {
            rowCount = aggregationClient.rowCount(TableName.valueOf(tableName),
                    new LongColumnInterpreter(), scan);
        } catch (Throwable e) {
            e.printStackTrace();
        }
        return rowCount;
    }

}

(2)编写测试类

package testHbase;

import HbaseUtils.HbaseUtils;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Table;
import sun.util.calendar.LocalGregorianCalendar;

import java.util.Date;
import java.util.Random;

import static HbaseUtils.HbaseUtils.getTableDesc;

public class TestHbase {

    public static void main(String[] args) throws Exception {
        long startTime=System.currentTimeMillis();   //获取开始时间
        HbaseUtils hb = new HbaseUtils();
        String tableName="test_dd";
        Random r = new Random();
        int num = r.nextInt(11);
        Date date = new Date();
        String timestamp = String.valueOf(date.getTime()/1000);
        System.out.println(timestamp);
        String rowkey="138873118"+num+"_"+timestamp;
        System.out.println(rowkey);
        String cf="cf";
        String[] columnArray = {"name","subject","score"};
        String[] valuesArray = {"guozixuezhang","math","80"};
        String column=columnArray[2];
        String value=valuesArray[2];
        //获得表描述信息
//		hb.getTableDesc(tableName) ;
        //批量插入数据
        hb.putBachData(tableName,rowkey,cf,column,value,10000000);
        long endTime=System.currentTimeMillis(); //获取结束时间
        System.out.println("程序运行时间: "+(endTime-startTime)/1000+"s");

    }
    
    
}

3.小结

     本文主要探讨了Hbase java-API的编写环境搭建及操作数据库具体方法,环境搭建中特别注意hosts文件的配置及连接hbase时hbase master地址的设置,往往这两个地方是容易被忽略的地方,API使用的时候要注意各个API的使用方法,需要不断的测试完成,最后熟练掌握。

发布了11 篇原创文章 · 获赞 165 · 访问量 5373

猜你喜欢

转载自blog.csdn.net/godlovedaniel/article/details/104619185
今日推荐