docker-compose deploys hbase cluster - the road to dream building

1. Brief introduction

HBase It is an open source  NoSQL columnar distributed database , which mainly  Hadoop runs on the distributed file system (HDFS). HBase Originally  Facebook contributed by the company, its development based on  Googlethe  Bigtable model provides storage that can expand vertically on the basis of strong horizontal scalability and high availability.

HBase The main features are as follows:

  • Columnar storage : HBase Columnar storage is used to store data. It is used  HDFS as the underlying file system and stores data in  HDFS multiple  Region files, each of which  Region can store multiple rows of data. This storage method makes it  HBase possible to support a very large amount of data and has better write performance.

  • Distributed architecture : HBase It is a distributed system that supports the scattered storage of data on multiple machines, and increases storage and computing capabilities through horizontal expansion to meet the needs of large-scale data storage and processing. At the same time, it can  RegionServer automatically migrate through the crash of the process  Regionto achieve high availability.

  • High reliability : HBase When storing data, multiple  RegionServer data will be used to persist data, so that even if a certain  RegionServer crash or failure occurs, all data will not be lost or inaccessible, thus ensuring the high reliability of the system.

  • Linear scalability : HBase With very strong linear scalability, storage and computing capabilities can be expanded by adding new nodes to meet the needs of large-scale data storage and processing.

All in all, HBase it is a database that is very suitable for processing unstructured and massive data  NoSQL . It has the advantages of high availability, high reliability, and high performance, and can provide solutions for various large-scale data storage and processing scenarios.

References:

Distributed database for columnar storage - HBase (environmental deployment) 

Distributed database with columnar storage - HBase Shell and SQL actual operation (HBase Master high availability implementation)

[Cloud native] HBase on k8s orchestration and deployment explanation and actual operation 

2. docker create network

docker network create hadoop-network

# 查看
docker network ls

3. Download the binaries

wget https://dlcdn.apache.org/hbase/2.5.4/hbase-2.5.4-bin.tar.gz --no-check-certificate

4. Prepare configuration files

cat > conf/hbase-env.sh << EOF

export JAVA_HOME=/opt/apache/jdk
export HBASE_CLASSPATH=/opt/apache/hbase/conf
export HBASE_MANAGES_ZK=false
EOF

cat > conf/hbase-site.xml << EOF

<configuration>
    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://hadoop-hdfs-nn:9000/hbase</value>
        <!-- hdfs://ns1/hbase 对应hdfs-site.xml的dfs.nameservices属性值 -->
    </property>

    <property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
    </property>
    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>zookeeper-node1,zookeeper-node2,zookeeper-node3</value>
    </property>
    <property>
        <name>hbase.zookeeper.property.clientPort</name>
        <value>2181</value>
    </property>

    <property>
        <name>hbase.master</name>
        <value>60000</value>
        <description>单机版需要配主机名/IP和端口,HA方式只需要配端口</description>
    </property>
    <property>
        <name>hbase.master.info.bindAddress</name>
        <value>0.0.0.0</value>
    </property>
    <property>
        <name>hbase.master.port</name>
        <value>16000</value>
    </property>
    <property>
        <name>hbase.master.info.port</name>
        <value>16010</value>
    </property>
    <property>
        <name>hbase.regionserver.port</name>
        <value>16020</value>
    </property>
    <property>
        <name>hbase.regionserver.info.port</name>
        <value>16030</value>
    </property>

    <property>
        <name>hbase.wal.provider</name>
        <value>filesystem</value> <!--也可以用multiwal-->
    </property>
</configuration>
EOF
cat > conf/backup-masters << EOF
hbase-master-2
EOF
cat > conf/regionservers << EOF
hbase-regionserver-1
hbase-regionserver-2
hbase-regionserver-3
EOF
cat > conf/hadoop/core-site.xml << EOF
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <!--配置namenode的地址 -->
        <property>
                <name>fs.defaultFS</name>
                <value>hdfs://hadoop-hdfs-nn:9000</value>
        </property>

        <!-- 文件的缓冲区大小(128KB),默认值是4KB -->
        <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
        </property>

        <!-- 文件系统垃圾桶保存时间 -->
        <property>
                <name>fs.trash.interval</name>
                <value>1440</value>
        </property>

        <!-- 配置hadoop临时目录,存储元数据用的,请确保该目录(/opt/apache/hadoop/data/hdfs/)已被手动创建,tmp目录会自动创建 -->
        <property>
                <name>hadoop.tmp.dir</name>
                <value>/opt/apache/hadoop/data/hdfs/tmp</value>
        </property>

        <!--配置HDFS网页登录使用的静态用户为root-->
        <property>
                <name>hadoop.http.staticuser.user</name>
                <value>root</value>
        </property>

        <!--配置root(超级用户)允许通过代理访问的主机节点-->
        <property>
                <name>hadoop.proxyuser.root.hosts</name>
                <value>*</value>
        </property>

        <!--配置root(超级用户)允许通过代理用户所属组-->
        <property>
                <name>hadoop.proxyuser.root.groups</name>
                <value>*</value>
        </property>

        <!--配置root(超级用户)允许通过代理的用户-->
        <property>
                <name>hadoop.proxyuser.root.user</name>
                <value>*</value>
        </property>

        <!--配置hive允许通过代理访问的主机节点-->
        <property>
                <name>hadoop.proxyuser.hive.hosts</name>
                <value>*</value>
        </property>

        <!--配置hive允许通过代理用户所属组-->
        <property>
                <name>hadoop.proxyuser.hive.groups</name>
                <value>*</value>
        </property>

        <!--配置hive允许通过代理访问的主机节点-->
        <property>
                <name>hadoop.proxyuser.hadoop.hosts</name>
                <value>*</value>
        </property>

        <!--配置hive允许通过代理用户所属组-->
        <property>
                <name>hadoop.proxyuser.hadoop.groups</name>
                <value>*</value>
        </property>

</configuration>

EOF
cat > conf/hadoop/hdfs-site.xml << EOF

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
        <!-- namenode web访问配置 -->
        <property>
                <name>dfs.namenode.http-address</name>
                <value>0.0.0.0:9870</value>
        </property>

        <!-- 必须将dfs.webhdfs.enabled属性设置为true,否则就不能使用webhdfs的LISTSTATUS、LISTFILESTATUS等需要列出文件、文件夹状态的命令,因为这些信息都是由namenode来保存的。 -->
        <property>
                <name>dfs.webhdfs.enabled</name>
                <value>true</value>
        </property>

        <property>
                <name>dfs.namenode.name.dir</name>
                <value>/opt/apache/hadoop/data/hdfs/namenode</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>/opt/apache/hadoop/data/hdfs/datanode/data1,/opt/apache/hadoop/data/hdfs/datanode/data2,/opt/apache/hadoop/data/hdfs/datanode/data3</value>
        </property>

        <property>
                <name>dfs.replication</name>
                <value>3</value>
        </property>

        <!-- 设置SNN进程运行机器位置信息 -->
        <property>
                <name>dfs.namenode.secondary.http-address</name>
                <value>hadoop-hdfs-nn2:9868</value>
        </property>

        <property>
                <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
                <value>false</value>
        </property>

        <!-- 白名单 -->
        <property>
                <name>dfs.hosts</name>
                <value>/opt/apache/hadoop/etc/hadoop/dfs.hosts</value>
        </property>

        <!-- 黑名单 -->
        <property>
                <name>dfs.hosts.exclude</name>
                <value>/opt/apache/hadoop/etc/hadoop/dfs.hosts.exclude</value>
        </property>

</configuration>

EOF
cat > bootstrap.sh << EOF

#!/usr/bin/env sh


wait_for() {
        echo Waiting for $1 to listen on $2...
        while ! nc -z $1 $2; do echo waiting...; sleep 1s; done
}

start_hbase_master() {
        if [ -n "$1" -a -n "$2" ];then
                wait_for $1 $2
        fi

        ${HBASE_HOME}/bin/hbase-daemon.sh start master

        tail -f ${HBASE_HOME}/logs/*master*.out
}

start_hbase_regionserver() {

        wait_for $1 $2

        ${HBASE_HOME}/bin/hbase-daemon.sh start regionserver

        tail -f ${HBASE_HOME}/logs/*regionserver*.log
}


case $1 in
        hbase-master)
                start_hbase_master $2 $3
                ;;
        hbase-regionserver)
                start_hbase_regionserver $2 $3
                ;;
        *)
                echo "请输入正确的服务启动命令~"
        ;;
esac

EOF

5. Build Dockerfile

FROM centos:7.9

RUN rm -f /etc/localtime && ln -sv /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone

RUN export LANG=zh_CN.UTF-8

# 创建用户和用户组,跟yaml编排里的user: 10000:10000
RUN groupadd --system --gid=10000 hadoop && useradd --system --home-dir /home/hadoop --uid=10000 --gid=hadoop hadoop -m

# 安装sudo
RUN yum -y install sudo net-tools telnet wget nc less tree; chmod 640 /etc/sudoers

# 给hadoop添加sudo权限
RUN echo "hadoop ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers

RUN mkdir /opt/apache/

# 添加配置 JDK
ADD jdk-8u212-linux-x64.tar.gz /opt/apache/
ENV JAVA_HOME /opt/apache/jdk
ENV PATH $JAVA_HOME/bin:$PATH
RUN ln -s /opt/apache/jdk1.8.0_212 $JAVA_HOME

# HBase
ENV HBASE_VERSION 2.5.4
ADD hbase-${HBASE_VERSION}-bin.tar.gz /opt/apache/
ENV HBASE_HOME /opt/apache/hbase
ENV PATH $HBASE_HOME/bin:$PATH
RUN ln -s /opt/apache/hbase-${HBASE_VERSION} $HBASE_HOME

# copy bootstrap.sh
COPY bootstrap.sh /opt/apache/
RUN chmod +x /opt/apache/bootstrap.sh

RUN chown -R hadoop:hadoop /opt/apache

WORKDIR $HBASE_HOME
docker build -t hbase:2.5.4 . --no-cache

### 参数解释
# -t:指定镜像名称
# . :当前目录Dockerfile
# -f:指定Dockerfile路径
#  --no-cache:不缓存

6. docker-compose.yml

version: '3'
services:
  hbase-master-1:
    image: hbase:2.5.4
    user: "hadoop:hadoop"
    container_name: hbase-master-1
    hostname: hbase-master-1
    restart: always
    privileged: true
    env_file:
      - .env
    volumes:
      - ./conf/hbase-env.sh:${HBASE_HOME}/conf/hbase-env.sh
      - ./conf/hbase-site.xml:${HBASE_HOME}/conf/hbase-site.xml
      - ./conf/backup-masters:${HBASE_HOME}/conf/backup-masters
      - ./conf/regionservers:${HBASE_HOME}/conf/regionservers
      - ./conf/hadoop/core-site.xml:${HBASE_HOME}/conf/core-site.xml
      - ./conf/hadoop/hdfs-site.xml:${HBASE_HOME}/conf/hdfs-site.xml
    ports:
      - "36010:${HBASE_MASTER_PORT}"
    command: ["sh","-c","/opt/apache/bootstrap.sh hbase-master"]
    networks:
      - hadoop-network
    healthcheck:
      test: ["CMD-SHELL", "netstat -tnlp|grep :${HBASE_MASTER_PORT} || exit 1"]
      interval: 10s
      timeout: 20s
      retries: 3
  hbase-master-2:
    image: hbase:2.5.4
    user: "hadoop:hadoop"
    container_name: hbase-master-2
    hostname: hbase-master-2
    restart: always
    privileged: true
    env_file:
      - .env
    volumes:
      - ./conf/hbase-env.sh:${HBASE_HOME}/conf/hbase-env.sh
      - ./conf/hbase-site.xml:${HBASE_HOME}/conf/hbase-site.xml
      - ./conf/backup-masters:${HBASE_HOME}/conf/backup-masters
      - ./conf/regionservers:${HBASE_HOME}/conf/regionservers
      - ./conf/hadoop/core-site.xml:${HBASE_HOME}/conf/core-site.xml
      - ./conf/hadoop/hdfs-site.xml:${HBASE_HOME}/conf/hdfs-site.xml
    ports:
      - "36011:${HBASE_MASTER_PORT}"
    command: ["sh","-c","/opt/apache/bootstrap.sh hbase-master hbase-master-1 ${HBASE_MASTER_PORT}"]
    networks:
      - hadoop-network
    healthcheck:
      test: ["CMD-SHELL", "netstat -tnlp|grep :${HBASE_MASTER_PORT} || exit 1"]
      interval: 10s
      timeout: 20s
      retries: 3
  hbase-regionserver-1:
    image: hbase:2.5.4
    user: "hadoop:hadoop"
    container_name: hbase-regionserver-1
    hostname: hbase-regionserver-1
    restart: always
    privileged: true
    env_file:
      - .env
    volumes:
      - ./conf/hbase-env.sh:${HBASE_HOME}/conf/hbase-env.sh
      - ./conf/hbase-site.xml:${HBASE_HOME}/conf/hbase-site.xml
      - ./conf/backup-masters:${HBASE_HOME}/conf/backup-masters
      - ./conf/regionservers:${HBASE_HOME}/conf/regionservers
      - ./conf/hadoop/core-site.xml:${HBASE_HOME}/conf/core-site.xml
      - ./conf/hadoop/hdfs-site.xml:${HBASE_HOME}/conf/hdfs-site.xml
    ports:
      - "36030:${HBASE_REGIONSERVER_PORT}"
    command: ["sh","-c","/opt/apache/bootstrap.sh hbase-regionserver hbase-master-1 ${HBASE_MASTER_PORT}"]
    networks:
      - hadoop-network
    healthcheck:
      test: ["CMD-SHELL", "netstat -tnlp|grep :${HBASE_REGIONSERVER_PORT} || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 3
  hbase-regionserver-2:
    image: hbase:2.5.4
    user: "hadoop:hadoop"
    container_name: hbase-regionserver-2
    hostname: hbase-regionserver-2
    restart: always
    privileged: true
    env_file:
      - .env
    volumes:
      - ./conf/hbase-env.sh:${HBASE_HOME}/conf/hbase-env.sh
      - ./conf/hbase-site.xml:${HBASE_HOME}/conf/hbase-site.xml
      - ./conf/backup-masters:${HBASE_HOME}/conf/backup-masters
      - ./conf/regionservers:${HBASE_HOME}/conf/regionservers
      - ./conf/hadoop/core-site.xml:${HBASE_HOME}/conf/core-site.xml
      - ./conf/hadoop/hdfs-site.xml:${HBASE_HOME}/conf/hdfs-site.xml
    ports:
      - "36031:${HBASE_REGIONSERVER_PORT}"
    command: ["sh","-c","/opt/apache/bootstrap.sh hbase-regionserver hbase-master-1 ${HBASE_MASTER_PORT}"]
    networks:
      - hadoop-network
    healthcheck:
      test: ["CMD-SHELL", "netstat -tnlp|grep :${HBASE_REGIONSERVER_PORT} || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 3
  hbase-regionserver-3:
    image: hbase:2.5.4
    user: "hadoop:hadoop"
    container_name: hbase-regionserver-3
    hostname: hbase-regionserver-3
    restart: always
    privileged: true
    env_file:
      - .env
    volumes:
      - ./conf/hbase-env.sh:${HBASE_HOME}/conf/hbase-env.sh
      - ./conf/hbase-site.xml:${HBASE_HOME}/conf/hbase-site.xml
      - ./conf/backup-masters:${HBASE_HOME}/conf/backup-masters
      - ./conf/regionservers:${HBASE_HOME}/conf/regionservers
      - ./conf/hadoop/core-site.xml:${HBASE_HOME}/conf/core-site.xml
      - ./conf/hadoop/hdfs-site.xml:${HBASE_HOME}/conf/hdfs-site.xml
    ports:
      - "36032:${HBASE_REGIONSERVER_PORT}"
    command: ["sh","-c","/opt/apache/bootstrap.sh hbase-regionserver hbase-master-1 ${HBASE_MASTER_PORT}"]
    networks:
      - hadoop-network
    healthcheck:
      test: ["CMD-SHELL", "netstat -tnlp|grep :${HBASE_REGIONSERVER_PORT} || exit 1"]
      interval: 10s
      timeout: 10s
      retries: 3

# 连接外部网络
networks:
  hadoop-network:
    external: true

7. Access check verification

Access the web:http://ip:36010/

docker exec -it hbase-master-1 bash

hbase shell

### 查看状态
status

### 简单的建表
create 'user', 'info', 'data'
# user是表名
# info是列族1的名字
# data 是列族2的名字

### 查看表信息
desc 'user'

8. Common commands

# 连接shell

hbase shell

# 创建表

create 'table_name', 'column_family1', 'column_family2', ...

# 查看已有表

list

# 查看表结构

describe 'table_name'

# 插入数据

put 'table_name', 'row_key', 'column_family:column', 'value'

# 获取数据

get 'table_name', 'row_key'

# 扫描表数据

scan 'table_name'

# 删除数据

delete 'table_name', 'row_key', 'column_family:column', 'timestamp'

# 禁用表

disable 'table_name'

# 启用表

enable 'table_name'

# 删除表

disable 'table_name'
drop 'table_name'

# 修改表

alter 'table_name', {NAME => 'column_family', VERSIONS => 'new_version'}

Guess you like

Origin blog.csdn.net/qq_34777982/article/details/131046834