一、哨兵机制的概述:
哨兵机制是一种特殊的模式,Redis自身提供了哨兵的管理命令,哨兵机制是作为一个独立的进程并独立运行的。它的实现原理是哨兵通过发送命令,等待Redis服务器的响应,从而监控多个Redis实例。当一个主服务器宕机时,sentinel哨兵监控服务器会探测到主服务器状态,根据多个sentinel监控服务器的监控报告情况进行投票协商,判断此主服务器是否确实是宕机;此时又分为主观下线和客观下线,主观下线是指:一个sentinel实例判断出某节点为下线状态即为最终状态,客观下线是指:多个sentinel节点协商后判断出某节点最终状态为下线状态。
二、sentinel机制配置
首先,我们需要配置redis数据库集群,此实验是配置一个主服务器和两个从服务器。
1)创建多实例目录并拷贝配置文件
[root@server redis]# mkdir /redis/db{1,2,3}
[root@server ~]# cp /etc/redis.conf /redis/
[root@server ~]#
[root@server ~]# cd /redis/
[root@server redis]# ls
db1 db2 db3 redis.conf
[root@server redis]#
[root@server redis]# cp redis.conf db1/redis.conf.1
[root@server redis]# cp redis.conf db2/redis.conf.2
[root@server redis]# cp redis.conf db3/redis.conf.3
[root@server redis]# ls db1/
redis.conf.1
[root@server redis]# ls db2/
redis.conf.2
[root@server redis]# ls db3/
redis.conf.3
[root@server redis]#
2)修改个配置文件的配置信息(port,pidfile,logfile,dir)
实例1:
[root@server redis]# cd db1/
[root@server db1]# ls
redis.conf.1
[root@server db1]# vim redis.conf.1
port 6379
logfile /var/log/redis/redis.log
pidfile /var/run/redis_6379.pid
dir /redis/db1 //把主库的dir目录位置改为多实例的目录位置;
[root@server db1]# redis-se
redis-sentinel redis-server
[root@server db1]# redis-server redis.conf.1
[root@server db1]#
[root@server db1]# ss -tunlp | grep 6379 //主库已然配置完成;
tcp LISTEN 0 128 192.168.126.141:6379 *:* users:(("redis-server",pid=6543,fd=5))
tcp LISTEN 0 128 127.0.0.1:6379 *:* users:(("redis-server",pid=6543,fd=4))
实例二:
[root@server db1]# cd ../db2/
[root@server db2]# ls
redis.conf.2
[root@server db2]# vim redis.conf.2
port 6380 //修改默认端口为6379:
pidfile /var/run/redis_6380.pid //修改pid文件;
logfile /var/log/redis/redis2.log //修改实例2的日志保存文件;
dir /redis/db2 //修改实例2的dir目录;
daemonize yes //在后台以守护进程启动;
[root@server db2]# chown -R redis.redis /redis/*
[root@server db2]# redis-server redis.conf.2
[root@server db2]#
[root@server db2]#
[root@server db2]# ss -tunlp | grep 6380
tcp LISTEN 0 128 192.168.126.141:6380 *:* users:(("redis-server",pid=15590,fd=5))
tcp LISTEN 0 128 127.0.0.1:6380 *:* users:(("redis-server",pid=15590,fd=4))
[root@server db2]#
实例三:
[root@server db2]# cd ../db3/
[root@server db3]# ls
redis.conf.3
[root@server db3]# vim redis.conf.3
port 6381
pidfile /var/run/redis_6381.pid
logfile /var/log/redis/redis3.log
dir /redis/db3
daemonize yes
[root@server db3]# redis-server redis.conf.3
[root@server db3]#
[root@server db3]# ss -tunlp | grep 6381
tcp LISTEN 0 128 192.168.126.141:6381 *:* users:(("redis-server",pid=15637,fd=5))
tcp LISTEN 0 128 127.0.0.1:6381 *:* users:(("redis-server",pid=15637,fd=4))
[root@server db3]#
3)配置数据库主从(6379为主服务器,6380/6381为从服务器)
[root@server ~]# redis-cli -p 6379 -h 192.168.126.141
192.168.126.141:6379> keys *
(empty list or set)
192.168.126.141:6379>
192.168.126.141:6379> SET name jiangyiyang
OK
192.168.126.141:6379> set age 20
OK
192.168.126.141:6379> KEYS *
1) "age"
2) "name"
192.168.126.141:6379>
[root@server ~]# redis-cli -p 6380 -h 192.168.126.141
192.168.126.141:6380> KEYS *
(empty list or set)
192.168.126.141:6380> SLAVEOF 192.168.126.141 6379
OK
192.168.126.141:6380> KEYS *
1) "age"
2) "name"
192.168.126.141:6380>
[root@server ~]# redis-cli -p 6381 -h 192.168.126.141
192.168.126.141:6381>
192.168.126.141:6381> KEYS *
(empty list or set)
192.168.126.141:6381> SLAVEOF 192.168.126.141 6379
OK
192.168.126.141:6381> KEYS *
1) "name"
2) "age"
192.168.126.141:6381>
192.168.126.141:6379> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=127.0.0.1,port=6380,state=online,offset=645,lag=0 //两个从节点已经开始复制;
slave1:ip=127.0.0.1,port=6381,state=online,offset=645,lag=0
master_repl_offset:645
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:644
192.168.126.141:6379>
4)修改sentinel监控脚本并开启该监控机制
[root@server redis]# cp /etc/redis-sentinel.conf /redis/
[root@server redis]#
[root@server redis]# cd /redis/
[root@server redis]# ls
db1 db2 db3 redis.conf redis-sentinel.conf
[root@server redis]# vim redis-sentinel.conf
sentinel monitor mymaster 192.168.126.141 6379 1 //修改监控主服务器的地址和端口信息,
只要有一个节点判断该服务器宕机,那么此服务器的最终状态就为宕机状态
sentinel down-after-milliseconds mymaster 5000 //多少毫秒连接不上此服务器就判定为宕机状态,默认为30秒;
sentinel parallel-syncs mymaster 1 //规定新指定的主服务器最多允许多少个从服务器来同步数据;
sentinel failover-timeout mymaster 180000 //故障转移的超时时间,超过此时间就认定故障转移失败了(默认为毫秒);
[root@server redis]# redis-sentinel /redis/redis-sentinel.conf &
[root@server ~]# ss -tunlp | grep 26379
tcp LISTEN 0 128 *:26379 *:* users:(("redis-sentinel",pid=16245,fd=5))
tcp LISTEN 0 128 :::26379 :::* users:(("redis-sentinel",pid=16245,fd=4))
[root@server ~]#
[root@server redis]# redis-cli -h 127.0.0.1 -p 26379 //服务器地址都用127本地地址登录而不要使用网络地址;
127.0.0.1:26379>
127.0.0.1:26379>
127.0.0.1:26379>
127.0.0.1:26379> info
# Server
redis_version:3.2.12
redis_git_sha1:00000000
redis_git_dirty:0
redis_build_id:7897e7d0e13773f
redis_mode:sentinel
os:Linux 3.10.0-957.el7.x86_64 x86_64
arch_bits:64
multiplexing_api:epoll
gcc_version:4.8.5
process_id:17253
run_id:fe470872980fa80498d4a28cec28d7223da6ef42
tcp_port:26379
uptime_in_seconds:13
uptime_in_days:0
hz:16
lru_clock:15301075
executable:/redis/redis-sentinel
config_file:/redis/redis-sentinel.conf
# Clients
connected_clients:1
client_longest_output_list:0
client_biggest_input_buf:0
blocked_clients:0
# CPU
used_cpu_sys:0.04
used_cpu_user:0.01
used_cpu_sys_children:0.00
used_cpu_user_children:0.00
# Stats
total_connections_received:1
total_commands_processed:0
instantaneous_ops_per_sec:0
total_net_input_bytes:31
total_net_output_bytes:32
instantaneous_input_kbps:0.00
instantaneous_output_kbps:0.00
rejected_connections:1
sync_full:0
sync_partial_ok:0
sync_partial_err:0
expired_keys:0
evicted_keys:0
keyspace_hits:0
keyspace_misses:0
pubsub_channels:0
pubsub_patterns:0
latest_fork_usec:0
migrate_cached_sockets:0
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=192.168.126.141:6379,slaves=2,sentinels=1
127.0.0.1:26379> SENTINEL masters //查看主服务器信息;
1) 1) "name"
2) "mymaster"
3) "ip"
4) "192.168.126.141"
5) "port"
6) "6379"
7) "runid"
8) "d2f6d470563d0dd6333caf4049c7af20da7948f2"
9) "flags"
10) "master"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "719"
19) "last-ping-reply"
20) "719"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "1982"
25) "role-reported"
26) "master"
27) "role-reported-time"
28) "343282"
29) "config-epoch"
30) "0"
31) "num-slaves"
32) "2"
33) "num-other-sentinels"
34) "0"
35) "quorum"
36) "1"
37) "failover-timeout"
38) "180000"
39) "parallel-syncs"
40) "1"
127.0.0.1:26379>
127.0.0.1:26379> SENTINEL slaves mymaster //查看从服务器信息;
1) 1) "name"
2) "127.0.0.1:6381"
3) "ip"
4) "127.0.0.1"
5) "port"
6) "6381"
7) "runid"
8) "ceac9aa8506a5d7eed90499ae19f3310e14a1d47"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "331"
19) "last-ping-reply"
20) "331"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "2585"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "494365"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "192.168.126.141"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "232383"
2) 1) "name"
2) "127.0.0.1:6380"
3) "ip"
4) "127.0.0.1"
5) "port"
6) "6380"
7) "runid"
8) "c2f588662d0416eae47caf263eb7271ab47e4cb0"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "934"
19) "last-ping-reply"
20) "934"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "2585"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "494365"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "192.168.126.141"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "232383"
127.0.0.1:26379>
5)停止主库服务,实现主从切换
[root@server db3]# kill -9 17532 //杀死主服务器进程;
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=127.0.0.1:6381,slaves=2,sentinels=1
//此时查看主库已经切换为6381端口了;
127.0.0.1:26379> SENTINEL masters //查看主库信息;
1) 1) "name"
2) "mymaster"
3) "ip"
4) "127.0.0.1"
5) "port"
6) "6381"
7) "runid"
8) "2f2274122ea56ec25ba1a08fcfd94e7b3aed00fc"
9) "flags"
10) "master"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "611"
19) "last-ping-reply"
20) "611"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "6302"
25) "role-reported"
26) "master"
27) "role-reported-time"
28) "96797"
29) "config-epoch"
30) "3"
31) "num-slaves"
32) "2"
33) "num-other-sentinels"
34) "0"
35) "quorum"
36) "1"
37) "failover-timeout"
38) "180000"
39) "parallel-syncs"
40) "1"
127.0.0.1:26379>
6)再把主库服务启动,再次查看主从状态
[root@server db1]# redis-server redis.conf.1
127.0.0.1:26379> SENTINEL slaves mymaster //再次查看6379端口实例服务器已经变为从服务器,主从切换实现!
1) 1) "name"
2) "192.168.126.141:6379"
3) "ip"
4) "192.168.126.141"
5) "port"
6) "6379"
7) "runid"
8) "a5669a4200a811282fc141331a85122b227bc02b"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "469"
19) "last-ping-reply"
20) "469"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "5348"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "5348"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "127.0.0.1"
35) "master-port"
36) "6381"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "18114"
2) 1) "name"
2) "127.0.0.1:6380"
3) "ip"
4) "127.0.0.1"
5) "port"
6) "6380"
7) "runid"
8) "7a4065a8bd5323efd186fafd43432c22c1e8e726"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "470"
19) "last-ping-reply"
20) "470"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "7397"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "278657"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "127.0.0.1"
35) "master-port"
36) "6381"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "17967"