CK集群搭建部署

一、规划

1、服务器配置

属性 详情
服务器配置 4C8G
操作系统版本 CentOS Linux release 7.8.2003 (Core)
内核版本 3.10.0-1127.el7.x86_64
ZK版本 ZooKeeper-3.6.2
CK版本 ClickHouse-20.9.5.5

2、ZK

1)服务器规划

主机 IP PORT
sdw1 172.16.104.12 2181
sdw2 172.16.104.13 2181
sdw3 172.16.104.14 2181

2)目录规划

软件 目录
软件安装 /usr/local/zookeeper
数据 /data/zookeeper/data
配置文件 /usr/local/zookeeper/conf/zoo.cfg

3、CK

1)服务器规划

主机 IP http_port tcp_port interserver_http_port ck集群
sdw1 172.16.104.12 8123 9000 9009 cluster01-01-1(shard1_repl1)
sdw2 172.16.104.13 8123 9000 9009 cluster01-01-2(shard1_repl1)
sdw1 172.16.104.12 8123 9000 9009 cluster02-01(shard2_repl0)
sdw2 172.16.104.13 8123 9000 9009 cluster02-02(shard2_repl0)

2)目录规划

软件 目录
clickhouse-server软件 /etc/clickhouse-server
clickhouse-click软件 /etc/clickhouse-click
配置文件 /etc/clickhouse-server
数据目录 /data/clickhouse-server/data
日志目录 /data/clickhouse-server/logs

二、安装JDK

# yum install java

三、安装Zookeeper

1、下载安装包

zk主要分为源码安装包与二进制安装包,以下是二进制安装包安装步骤:

# wget -c https://mirror.bit.edu.cn/apache/zookeeper/zookeeper-3.6.2/apache-zookeeper-3.6.2-bin.tar.gz
# tar xf apache-zookeeper-3.6.2-bin.tar.gz -C /usr/local/
# mv apache-zookeeper-3.6.2-bin zookeeper

2、修改配置文件

1)修改zoo.cfg配置文件

# cd /usr/local/zookeeper/conf
# cp -r ./zoo_sample.cfg zoo.cfg
# cat zoo.cfg  | grep -v "^#"           //修改coo.cfg文件如下
tickTime=2000                           //客户端与zk服务器的超时时间、客户端会话的超时时间
initLimit=10                            
syncLimit=5
dataDir=/data/zookeeper/data            //存储快照文件数据目录
dataLogDir=/data/zookeeper/logs         //日志目录
clientPort=2181                         //服务端监控TCP端口
server.1=sdw1:2888:3888                 //server.${myid}=
server.2=sdw2:2888:3888
server.3=sdw3:2888:3888

# scp -r ./zoo.cfg  sdw1:/usr/local/zookeeper/conf/zoo.cfg
# scp -r ./zoo.cfg  sdw2:/usr/local/zookeeper/conf/zoo.cfg

2)修改myid文件

# vi  /data/zookeeper/data/myid
1                                       //与zoo.cfg配置文件中对应,sdw为server.1,所以该处填写为1

3)配置环境变量

#Zookeeper Environment
export ZK_HOME=/usr/local/zookeeper
export PATH=$PATH:$ZK_HOME/bin

3、启动ZK

-- 后台启动zk
# zkServer.sh start

-- 查看zk状态
# zkServer.sh status

-- 前台启动zk并显示具体报错(若启动zk时报错可使用该方式启动排查问题)
# zkServer.sh start-foreground

四、安装部署CK

1、下载RPM安装包

下载可参考:https://clickhouse.tech/docs/zh/getting-started/install/#from-rpm-packages

-- 下载
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-client-20.9.5.5-2.noarch.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-common-static-20.9.5.5-2.x86_64.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-common-static-dbg-20.9.5.5-2.x86_64.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-server-20.9.5.5-2.noarch.rpm
# wget -c https://repo.yandex.ru/clickhouse/rpm/stable/x86_64/clickhouse-test-20.9.5.5-2.noarch.rpm

-- 安装
# rpm -ihv clickhouse-common-static-20.9.5.5-2.x86_64.rpm
# rpm -ihv clickhouse-common-static-dbg-20.9.5.5-2.x86_64.rpm
# rpm -ihv clickhouse-server-20.9.5.5-2.noarch.rpm
# rpm -ihv clickhouse-client-20.9.5.5-2.noarch.rpm

-- 安装成功后有以下两目录
/etc/clickhouse-client/ /etc/clickhouse-server/

2、创建磁盘目录并授权

# mkdir -p /data/clickhouse-server/{access,data,format_schemas,logs,tmp,user_files}
# chown -R clickhouse:clickhouse /data/clickhouse-server

3、修改配置文件

1) config.xml

<yandex>
<!--日志-->
<logger>
    <!-- Possible levels: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105 -->
    <level>trace</level>
    <log>/data/clickhouse-server/logs/clickhouse-server.log</log>
    <errorlog>/data/clickhouse-server/logs/clickhouse-server.err.log</errorlog>
    <size>1000M</size>
    <count>10</count>
</logger>    

<!--端口-->
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<interserver_http_port>9009</interserver_http_port>
<interserver_http_host>sdw1</interserver_http_host>

<!--监听-->
<listen_host>0.0.0.0</listen_host>

<!--连接相关-->
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<max_concurrent_queries>100</max_concurrent_queries>

<!--内存限制相关-->
<max_server_memory_usage>0</max_server_memory_usage>
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
<total_memory_profiler_step>4194304</total_memory_profiler_step>
<total_memory_tracker_sample_probability>0</total_memory_tracker_sample_probability>
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<mark_cache_size>5368709120</mark_cache_size>

<!--路径相关-->
<path>/data/clickhouse-server/data/</path>
<tmp_path>/data/clickhouse-server/tmp/</tmp_path>
<user_files_path>/data/clickhouse-server/user_files/</user_files_path>
<access_control_path>/data/clickhouse-server/access/</access_control_path>

<!--集群配置相关-->
<include_from>/etc/clickhouse-server/config.d/metrika.xml</include_from>
<remote_servers incl="clickhouse_remote_servers" />
<zookeeper incl="zookeeper-servers" optional="true" />
<macros incl="macros" optional="true" />
<yandex>

2)metrika.xml(集群配置文件)

<?xml version="1.0"?>
<yandex>
<!--zookeeper相关配置-->
<zookeeper-servers>     <!--需要与config.xml中zookerper的incl名称匹配-->
	<node index="1">    <!--node节点配置-->
		<host>sdw1</host>   <!--zk_host-->
		<port>2181</port>   <!--zk_port-->
	</node>
	<node index="2">
		<host>sdw2</host>
		<port>2181</port>
	</node>
	<node index="3">
		<host>sdw3</host>
		<port>2181</port>
	</node>
</zookeeper-servers>

<!--clickhouse集群配置-->
<clickhouse_remote_servers>
    <!-- Test only shard config for testing distributed storage -->
    <shard1_repl1>          <!--1分片1副本配置-->
        <shard>
            <!-- Optional. Shard weight when writing data. Default: 1. -->
            <weight>1</weight>
            <!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). -->
            <internal_replication>true</internal_replication>
            <replica>
                <host>sdw1</host>   <!--副本节点host-->
                <port>9000</port>   <!--副本节点端口-->
            </replica>
            <replica>               <!--若集群有其他副本,可继续配置replica-->
                <host>sdw2</host>
                <port>9000</port>
            </replica>
        </shard>
    </shard1_repl1>
    
    <shard2_repl0>      <!--2分片0副本配置-->
        <shard>
            <replica>
                <host>sdw1</host>
                <port>9000</port>
            </replica>
        </shard>
        <shard>
            <replica>
                <host>sdw2</host>
                <port>9000</port>
            </replica>
        </shard>
    </shard2_repl0>
    
    <!--
    <shard2_repl1>      <!--2分片1副本配置-->
        <shard>
            <replica>
                <host>sdw1</host>
                <port>9000</port>
            </replica>
            <replica>
                <host>sdw2</host>
                <port>9000</port>
            </replica>            
        </shard>
        <shard>
            <replica>
                <host>sdw3</host>
                <port>9000</port>
            </replica>
            <replica>
                <host>sdw4</host>
                <port>9000</port>
            </replica>            
        </shard>
    </shard2_repl1>
    -->
</clickhouse_remote_servers>

<!--复制标识,需根据实际修改-->
<macros>                    <!--1分片1副本配置-->
    <layer>01</layer>       <!--集群标识-->
    <shard>01</shard>       <!--分片标识-->
    <replica>cluster01-01-1</replica>       <!--副本标识-->
</macros>

<macros>                    <!--2分片0副本配置-->
    <layer>02</layer>       <!--集群标识-->
    <shard>01</shard>       <!--分片标识-->
    <replica>cluster02-01-1</replica>       <!--副本标识,对于shardNreplica0的集群,使用标识相同即可,方便后续分布式DDL使用{replica}-->
</macros>


<macros>                    <!--2分片1副本配置-->
    <layer>03</layer>       <!--集群标识-->
    <shard>01</shard>       <!--分片标识-->
    <replica>cluster03-01-1</replica>       <!--副本标识-->
</macros>

</yandex>

3)users.xml

<?xml version="1.0"?>
<yandex>
    <!-- Profiles of settings. -->
    <profiles>
        <!-- Default settings. -->
        <default>           <!--默认default角色-->
            <max_memory_usage>10000000000</max_memory_usage>
            <use_uncompressed_cache>0</use_uncompressed_cache>
            <load_balancing>random</load_balancing>
        </default>

    	<rw_role>           <!--超级读写权限角色-->  
	        <distributed_product_mode>allow</distributed_product_mode>
	        <constraints>
		        <max_memory_usage>
		            <min>5000000000</min>
		            <max>20000000000</max>
		        </max_memory_usage>
	        </constraints>

            <readonly>0</readonly>
            <allow_ddl>1</allow_ddl>
	    </rw_role>

        <ro_role>       <!--拥有set权限的读权限角色-->
            <distributed_product_mode>allow</distributed_product_mode>
            <constraints>
                <max_memory_usage>
                    <min>5000000000</min>
                    <max>10000000000</max>
                </max_memory_usage>
            </constraints>

            <readonly>2</readonly>
            <allow_ddl>1</allow_ddl>
        </ro_role>

    </profiles>

    <!-- Users and ACL. -->
    <users>
        <!-- If user name was not specified, 'default' user is used. -->
        <default>               <!--默认default账号-->
            <password></password>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>

            <!-- Settings profile for user. -->
            <profile>default</profile>

            <!-- Quota for user. -->
            <quota>default</quota>

            <!-- User can create other users and grant rights to them. -->
            <!-- <access_management>1</access_management> -->
        </default>

	<sansi_rw>                  <!--超级读写权限账号-->
	    <!--echo -n 123 | openssl dgst -sha256-->
	    <password_sha256_hex>a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3</password_sha256_hex>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>
	    <profile>rw_role</profile>
	    <quota>default</quota>
	</sansi_rw>


	<sansi_ro>                  <!--拥有set权限的只读账号-->
	    <!--echo -n 123 | openssl dgst -sha256-->
	    <password_sha256_hex>a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3</password_sha256_hex>
            <networks incl="networks" replace="replace">
                <ip>::/0</ip>
            </networks>
	    <profile>ro_role</profile>
	    <quota>default</quota>
	</sansi_ro>

    </users>

    <!-- Quotas. -->
    <quotas>
        <!-- Name of quota. -->
        <default>
            <!-- Limits for time interval. You could specify many intervals with different limits. -->
            <interval>
                <!-- Length of interval. -->
                <duration>3600</duration>

                <!-- No limits. Just calculate resource usage for time interval. -->
                <queries>0</queries>
                <errors>0</errors>
                <result_rows>0</result_rows>
                <read_rows>0</read_rows>
                <execution_time>0</execution_time>
            </interval>
        </default>
    </quotas>
</yandex>

4、启动服务

# systemctl start clickhouse-server         //启动ck服务
# systemctl status clickhouse-server        //检查ck服务状态

5、集群基本信息查看

-- 该信息是配置1节点2副本的情况
# clickhouse-client --host 172.16.104.12 --port 9000
ClickHouse client version 20.9.5.5 (official build).
Connecting to 172.16.104.12:9000 as user default.
Connected to ClickHouse server version 20.9.5 revision 54439.

sdw2 :) SELECT * FROM system.clusters;

SELECT *
FROM system.clusters

┌─cluster──────┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name─┬─host_address──┬─port─┬─is_local─┬─user────┬─default_database─┬─errors_count─┬─estimated_recovery_time─┐
│ shard1_repl1 │         1 │            1 │           1 │ sdw1      │ 172.16.104.12 │ 9000 │        0 │ default │                  │            0 │                       0 │
│ shard1_repl1 │         1 │            1 │           2 │ sdw2      │ 172.16.104.13 │ 9000 │        1 │ default │                  │            0 │                       0 │
│ shard2_repl0 │         1 │            1 │           1 │ sdw1      │ 172.16.104.12 │ 9000 │        0 │ default │                  │            0 │                       0 │
│ shard2_repl0 │         2 │            1 │           1 │ sdw2      │ 172.16.104.13 │ 9000 │        1 │ default │                  │            0 │                       0 │
└──────────────┴───────────┴──────────────┴─────────────┴───────────┴───────────────┴──────┴──────────┴─────────┴──────────────────┴──────────────┴─────────────────────────┘

4 rows in set. Elapsed: 0.009 sec.

sdw2 :) select * from system.zookeeper where path='/clickhouse'

SELECT *
FROM system.zookeeper
WHERE path = '/clickhouse'

┌─name───────┬─value─┬──────czxid─┬──────mzxid─┬───────────────ctime─┬───────────────mtime─┬─version─┬─cversion─┬─aversion─┬─ephemeralOwner─┬─dataLength─┬─numChildren─┬──────pzxid─┬─path────────┐
│ tables     │       │ 8589938637 │ 8589938637 │ 2020-12-05 18:15:55 │ 2020-12-05 18:15:55 │       0 │        4 │        0 │              0 │          0 │           4 │ 8589939065 │ /clickhouse │
│ task_queue │       │ 8589934595 │ 8589934595 │ 2020-12-05 16:24:39 │ 2020-12-05 16:24:39 │       0 │        1 │        0 │              0 │          0 │           1 │ 8589934596 │ /clickhouse │
└────────────┴───────┴────────────┴────────────┴─────────────────────┴─────────────────────┴─────────┴──────────┴──────────┴────────────────┴────────────┴─────────────┴────────────┴─────────────┘

2 rows in set. Elapsed: 0.016 sec.

五、集群高可用使用

在CK集群的副本使用中,不需外特别的配置文件配置,我们可以利用replicated相关引擎并结合zk来完成所有功能。

1、基本语法

1)ReplicatedMergeTree 基本语法

engine = ReplicatedMergeTree('zk_path','replica_name')
  • zk_path : 指定在zk中创建数据表的路径,配置模版为 /clickhouse/tables/ s h a r d / {shard}/ shard/{table_name},{shard}为分片编号、${table_name}为数据表名称
  • replica_name : 副本名称
  • 同一分片下不同副本的数据表,应该定义相同的zk_path、不同的replica_name
  • 不同分片下不同副本的数据表,应该定义不同的zk_path、不同的replica_name

2)分布表基本语法

enginge = Distributed(cluster,database,table [,sharding_key])
  • cluster : 集群名字,可由metrika.xml配置文件中的<clickhouse_remote_servers>查看
  • database、table : 分布式表所对应的数据库以及local表名(做distribute表和local表的映射关系)
  • sharding_key : 分片键,选填参数

[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-0PldN3ji-1607320734632)(http://note.youdao.com/yws/res/73825/F1C3A6F834314F67B132FE3E6EA811F5)]

2、示例

1)0分片多副本模式创建表

表数据创建:

-- shard1-repl1,创建t1的复制表
-- /clickhouse/tables/01-01/image_label : 01-01 通过 集群标识-分片标识确定
-- cluster01-01-1   : 集群标识-分片标识:副本标识
sdw1 :) create database db2;
sdw1 :) CREATE TABLE t1(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/t1','cluster01-01-1') ORDER BY id ;

-- shard1-repl2,创建t1的复制表
sdw2 :) create database db2;
sdw2 :) CREATE TABLE t1(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/01-01/t1','cluster01-01-2') ORDER BY id ;

数据写入:

-- shard1-repl1
sdw1 :) insert into t3 values(1,'aa'),(2,'bb'),(3,'cc');

-- shard1-repl2
sdw2 :) insert into t3 values(4,'dd');
sdw2 :) insert into t3 values(5,'ee');

数据查询:

-- shard1-repl1
sdw1 :) select * from t1;

SELECT *
FROM t1

┌─id─┬─name─┐
│  1 │ aa   │
│  2 │ bb   │
│  3 │ cc   │
└────┴──────┘
┌─id─┬─name─┐
│  4 │ dd   │
└────┴──────┘
┌─id─┬─name─┐
│  5 │ ee   │
└────┴──────┘

5 rows in set. Elapsed: 0.009 sec.

-- shard1-repl2
sdw2 :) select * from t1;

SELECT *
FROM t1

┌─id─┬─name─┐
│  5 │ ee   │
└────┴──────┘
┌─id─┬─name─┐
│  4 │ dd   │
└────┴──────┘
┌─id─┬─name─┐
│  1 │ aa   │
│  2 │ bb   │
│  3 │ cc   │
└────┴──────┘

5 rows in set. Elapsed: 0.008 sec.

2)2分片0副本模式创建分布式表

表数据创建:

-- shard1
sdw1 :) CREATE TABLE t3(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/02-01/t3','cluster02-01') ORDER BY id ;
sdw1 :) CREATE TABLE t3_all(`id` Int32,`name` String) ENGINE = Distributed(shard2_repl0,db2,t3,rand())

-- shard2
sdw2 :) CREATE TABLE t3(`id` Int32,`name` String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/02-02/t3','cluster02-02') ORDER BY id ;
sdw2 :) CREATE TABLE t3_all(`id` Int32,`name` String) ENGINE = Distributed(shard2_repl0,db2,t3,rand())

数据写入:

-- shard1
sdw1 :) insert into t3 values(1,'aa'),(2,'bb'),(3,'cc');

-- shard2
sdw2 :) insert into t3 values(4,'dd'),(5,'ee'),(6,'ff');

数据查询:

-- shard1
sdw1 :) select * from t3;

SELECT *
FROM t3

┌─id─┬─name─┐
│  1 │ aa   │
│  2 │ bb   │
│  3 │ cc   │
└────┴──────┘

3 rows in set. Elapsed: 0.006 sec.

sdw1 :) select * from t3_all;

SELECT *
FROM t3_all

┌─id─┬─name─┐
│  1 │ aa   │
│  2 │ bb   │
│  3 │ cc   │
└────┴──────┘
┌─id─┬─name─┐
│  4 │ dd   │
│  5 │ ee   │
│  6 │ ff   │
└────┴──────┘

6 rows in set. Elapsed: 0.019 sec.

-- shard2
sdw2 :) select * from t3;

SELECT *
FROM t3

┌─id─┬─name─┐
│  4 │ dd   │
│  5 │ ee   │
│  6 │ ff   │
└────┴──────┘

3 rows in set. Elapsed: 0.006 sec.

sdw2 :) select * from t3_all;

SELECT *
FROM t3_all

┌─id─┬─name─┐
│  4 │ dd   │
│  5 │ ee   │
│  6 │ ff   │
└────┴──────┘
┌─id─┬─name─┐
│  1 │ aa   │
│  2 │ bb   │
│  3 │ cc   │
└────┴──────┘

6 rows in set. Elapsed: 0.017 sec.

猜你喜欢

转载自blog.csdn.net/weixin_37692493/article/details/110817397