redis的Sentine哨兵模式

我们知道在主从复制模式下,如果主节点因故障不能正常提供服务,需要人为将从节点晋升为主节点,而且同时要通知应用方更新主节点地址,在实际场景中,这种故障处理方式显然是不合理的。Redis从2.8开始提供了Redis Sentinel(哨兵)架构来实现故障自动转移,Sentine哨兵是redis官方提供的高可用方案,可以用它来监控多个redis服务实例的运行情况。

监控

1)Sentinel会不断检查master和slave是否正常
2)如果Sentinel挂了,就无法监控,所有需要多个哨兵,组成Sentinel网络
3)监控同一个master的Sentinel会自动连接,组成一个分布式的Sentine网络,互相通信并交换彼此关于被监控服务器的信息
4)当一个Sentinel认为被监控的服务器已经下线时,它会向网络中的其它Sentinel进行确认,判断该服务器是否真的已经下线
5)如果下线的服务器为主服务器,nameSentinel网络将对下线主服务器进行自动故障转移,通过将下线主服务器的某个从服务器提升为新的主服务器,并让其从服务器转移到新的主服务器下,以此来将系统重新回到正常状态,在主从切换的过程中,存在访问瞬断的情况。
6)下线的旧主服务器重新上线,Sentinel会让它成为从,挂到新的主服务器下

sentinel哨兵配置

配置3三个哨兵,组成Sentinel网络,具体配置信息如下:

[root@k8smaster config]# grep -v "#" sentinel26380.conf |grep -v "^$"
daemonize yes #添加守护进程模式
logfile "/data/redis/redis/log/sentinel26380.log" #添加指明日志文件名
bind 192.168.23.100 #监听的ip地址,根据节点不同进行调整
port 26380 #端口
dir "/data/redis/redis/data/26380"  #哨兵sentinel的工作目录
sentinel monitor mymaster 192.168.23.100 6381 2 #哨兵监控的master,选择新主需要几个哨兵投票
sentinel auth-pass mymaster 123456  #如果redis配置了密码,那这里必须配置认证,否则不能自动切换
sentinel down-after-milliseconds mymaster 30000  #监sentinel与master的心跳时间(毫秒),默认30秒
sentinel parallel-syncs mymaster 1 #指在failover过程中,能够被sentinel并行配置的从节点的数量
sentinel failover-timeout mymaster 180000 #故障转移超时时间(毫秒),默认180秒。

[root@k8smaster config]# 
[root@k8smaster config]# grep -v "#" sentinel26381.conf |grep -v "^$" 
daemonize yes
logfile "/data/redis/redis/log/sentinel26381.log"
bind 192.168.23.100
port 26381
dir "/data/redis/redis/data/26381"
sentinel monitor mymaster 192.168.23.100 6381 2
sentinel auth-pass mymaster 123456
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000
[root@k8smaster config]# 
[root@k8smaster config]# grep -v "#" sentinel26382.conf |grep -v "^$"
daemonize yes
logfile "/data/redis/redis/log/sentinel26382.log"
bind 192.168.23.100
port 26382
dir "/data/redis/redis/data/26382"
sentinel monitor mymaster 192.168.23.100 6381 2
sentinel auth-pass mymaster 123456
sentinel down-after-milliseconds mymaster 30000
sentinel parallel-syncs mymaster 1
sentinel failover-timeout mymaster 180000
[root@k8smaster config]# 

哨兵日志
哨兵启动后,会监控主redis和从redis
5500:X 12 Feb 05:32:57.837 # Sentinel runid is 4b1bc5f719e3698fecdfd70074075fb91534d5d3
5500:X 12 Feb 05:32:57.837 # +monitor master mymaster 192.168.23.100 6381 quorum 2
5500:X 12 Feb 05:32:57.837 * +slave slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:32:57.843 * +slave slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:32:58.650 * +sentinel sentinel 192.168.23.100:26380 192.168.23.100 26380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:33:02.214 * +sentinel sentinel 192.168.23.100:26382 192.168.23.100 26382 @ mymaster 192.168.23.100 6381

[root@k8smaster bin]# ./redis-cli -h 192.168.23.100 -p 6381 -a 123456 #当前的主从角色情况
192.168.23.100:6381> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=192.168.23.100,port=6382,state=online,offset=164399,lag=1
slave1:ip=192.168.23.100,port=6380,state=online,offset=164256,lag=1

master_repl_offset:164542
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:164541
192.168.23.100:6381>

查看哨兵状态
[root@k8smaster bin]# ./redis-cli -h 192.168.23.100 -p 26381
192.168.23.100:26381> info
# Server
redis_version:3.0.7
redis_git_sha1:00000000
redis_git_dirty:0
redis_build_id:ab5e1e9c6570ed79
redis_mode:sentinel
os:Linux 3.10.0-123.el7.x86_64 x86_64
arch_bits:64
multiplexing_api:epoll
gcc_version:4.8.5
process_id:5500
run_id:4b1bc5f719e3698fecdfd70074075fb91534d5d3
tcp_port:26381
uptime_in_seconds:212
uptime_in_days:0
hz:17
lru_clock:4445485
config_file:/data/redis/redis/config/sentinel26381.conf
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
master0:name=mymaster,status=ok,address=192.168.23.100:6381,slaves=2,sentinels=3
192.168.23.100:26381> 
192.168.23.100:26381> sentinel master mymaster  #查看主redis
 1) "name"
 2) "mymaster"
 3) "ip"
 4) "192.168.23.100"
 5) "port"
 6) "6381"

 7) "runid"
 8) "0f47b0f0665bca13cea1299ccf4be0ffca162d18"
 9) "flags"
10) "master"
11) "pending-commands"
12) "0"
13) "last-ping-sent"
14) "0"
15) "last-ok-ping-reply"
16) "482"
17) "last-ping-reply"
18) "482"
19) "down-after-milliseconds"
20) "30000"
21) "info-refresh"
22) "8570"
23) "role-reported"
24) "master"
25) "role-reported-time"
26) "299561"
27) "config-epoch"
28) "0"
29) "num-slaves"  #从redis数量
30) "2"

31) "num-other-sentinels"
32) "2"
33) "quorum"
34) "2"
35) "failover-timeout"
36) "180000"
37) "parallel-syncs"
38) "1"
192.168.23.100:26381>
192.168.23.100:26381> sentinel slaves mymaster #redis的从节点信息
1)  1) "name"
    2) "192.168.23.100:6380"
    3) "ip"
    4) "192.168.23.100"
    5) "port"
    6) "6380"

    7) "runid"
    8) "91f858d501a66e63b5fe10932ebd412fd075e784"
    9) "flags"
   10) "slave"
   11) "pending-commands"
   12) "0"
   13) "last-ping-sent"
   14) "0"
   15) "last-ok-ping-reply"
   16) "813"
   17) "last-ping-reply"
   18) "813"
   19) "down-after-milliseconds"
   20) "30000"
   21) "info-refresh"
   22) "2388"
   23) "role-reported"
   24) "slave"
   25) "role-reported-time"
   26) "323882"
   27) "master-link-down-time"
   28) "0"
   29) "master-link-status"
   30) "ok"
   31) "master-host"
   32) "192.168.23.100"
   33) "master-port"
   34) "6381"
   35) "slave-priority"
   36) "100"
   37) "slave-repl-offset"
   38) "192037"
2)  1) "name"
    2) "192.168.23.100:6382"
    3) "ip"
    4) "192.168.23.100"
    5) "port"
    6) "6382"

    7) "runid"
    8) "696f02bd78782e2518bf67fb7771fe5c6d8248df"
    9) "flags"
   10) "slave"
   11) "pending-commands"
   12) "0"
   13) "last-ping-sent"
   14) "0"
   15) "last-ok-ping-reply"
   16) "813"
   17) "last-ping-reply"
   18) "813"
   19) "down-after-milliseconds"
   20) "30000"
   21) "info-refresh"
   22) "2388"
   23) "role-reported"
   24) "slave"
   25) "role-reported-time"
   26) "323888"
   27) "master-link-down-time"
   28) "0"
   29) "master-link-status"
   30) "ok"
   31) "master-host"
   32) "192.168.23.100"
   33) "master-port"
   34) "6381"
   35) "slave-priority"
   36) "100"
   37) "slave-repl-offset"
   38) "192037"
192.168.23.100:26381>
192.168.23.100:26381> sentinel sentinels mymaster  #其他哨兵
1)  1) "name"
    2) "192.168.23.100:26380"
    3) "ip"
    4) "192.168.23.100"
    5) "port"
    6) "26380"

    7) "runid"
    8) "28397c385ecd7fbcd1b570ac879511943fecc086"
    9) "flags"
   10) "sentinel"
   11) "pending-commands"
   12) "0"
   13) "last-ping-sent"
   14) "0"
   15) "last-ok-ping-reply"
   16) "132"
   17) "last-ping-reply"
   18) "132"
   19) "down-after-milliseconds"
   20) "30000"
   21) "last-hello-message"
   22) "867"
   23) "voted-leader"
   24) "?"
   25) "voted-leader-epoch"
   26) "0"
2)  1) "name"
    2) "192.168.23.100:26382"
    3) "ip"
    4) "192.168.23.100"
    5) "port"
    6) "26382"

    7) "runid"
    8) "6cdfe14263030ce126184c4f6bdec8696dcc9e57"
    9) "flags"
   10) "sentinel"
   11) "pending-commands"
   12) "0"
   13) "last-ping-sent"
   14) "0"
   15) "last-ok-ping-reply"
   16) "968"
   17) "last-ping-reply"
   18) "968"
   19) "down-after-milliseconds"
   20) "30000"
   21) "last-hello-message"
   22) "163"
   23) "voted-leader"
   24) "?"
   25) "voted-leader-epoch"
   26) "0"
192.168.23.100:26381>

命令切换redis主从

192.168.23.100:26381> sentinel failover  mymaster #主redis6381命令下线failover
OK
192.168.23.100:26381> sentinel get-master-addr-by-name mymaster #redis的主由6381切换为6382
1) "192.168.23.100"
2) "6382"

192.168.23.100:26381>

[root@k8smaster bin]# ./redis-cli -h 192.168.23.100 -p 6382 -a 123456
192.168.23.100:6382> info replication
# Replication
role:master
connected_slaves:2
slave0:ip=192.168.23.100,port=6380,state=online,offset=239310,lag=1
slave1:ip=192.168.23.100,port=6381,state=online,offset=239310,lag=1

master_repl_offset:239453
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:211913
repl_backlog_histlen:27541
192.168.23.100:6382>

哨兵切换日志
5500:X 12 Feb 05:39:53.790 # Executing user requested FAILOVER of 'mymaster' #redis主failover
5500:X 12 Feb 05:39:53.790 # +new-epoch 1
5500:X 12 Feb 05:39:53.790 # +try-failover master mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:53.838 # +vote-for-leader 4b1bc5f719e3698fecdfd70074075fb91534d5d3 1
5500:X 12 Feb 05:39:53.838 # +elected-leader master mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:53.838 # +failover-state-select-slave master mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:53.891 # +selected-slave slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:53.891 * +failover-state-send-slaveof-noone slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:53.974 * +failover-state-wait-promotion slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:54.851 # +promoted-slave slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:54.851 # +failover-state-reconf-slaves master mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:54.928 * +slave-reconf-sent slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:55.910 * +slave-reconf-inprog slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:55.911 * +slave-reconf-done slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:55.992 # +failover-end master mymaster 192.168.23.100 6381
5500:X 12 Feb 05:39:55.993 # +switch-master mymaster 192.168.23.100 6381 192.168.23.100 6382 #redis主切换到6382
5500:X 12 Feb 05:39:55.993 * +slave slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6382
5500:X 12 Feb 05:39:55.993 * +slave slave 192.168.23.100:6381 192.168.23.100 6381 @ mymaster 192.168.23.100 6382

扫描二维码关注公众号,回复: 10162217 查看本文章

模拟主redis宕机故障

192.168.23.100:6382> shutdown #关闭redis主6382服务
not connected>

5500:X 12 Feb 05:45:29.251 # +sdown master mymaster 192.168.23.100 6382 #redis主6382下线
5500:X 12 Feb 05:45:29.685 # +new-epoch 2
5500:X 12 Feb 05:45:29.690 # +vote-for-leader 6cdfe14263030ce126184c4f6bdec8696dcc9e57 2
5500:X 12 Feb 05:45:30.370 # +odown master mymaster 192.168.23.100 6382 #quorum 3/2
5500:X 12 Feb 05:45:30.370 # Next failover delay: I will not start a failover before Wed Feb 12 05:51:29 2020
5500:X 12 Feb 05:45:30.520 # +config-update-from sentinel 192.168.23.100:26382 192.168.23.100 26382 @ mymaster 192.168.23.100 6382
5500:X 12 Feb 05:45:30.520 # +switch-master mymaster 192.168.23.100 6382 192.168.23.100 6381 #redis的主切换到6381
5500:X 12 Feb 05:45:30.520 * +slave slave 192.168.23.100:6380 192.168.23.100 6380 @ mymaster 192.168.23.100 6381
5500:X 12 Feb 05:45:30.521 * +slave slave 192.168.23.100:6382 192.168.23.100 6382 @ mymaster 192.168.23.100 6381

192.168.23.100:26381> sentinel get-master-addr-by-name mymaster #redis的主切换到6381
1) "192.168.23.100"
2) "6381"

192.168.23.100:26381> 

192.168.23.100:6381> info replication
# Replication
role:master
connected_slaves:1
slave0:ip=192.168.23.100,port=6380,state=online,offset=16152,lag=0

master_repl_offset:16152
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:16151
192.168.23.100:6381>

[root@k8smaster bin]# ./redis-server ../config/redis6382.conf  #恢复6382的redis服务
[root@k8smaster bin]# 

192.168.23.100:6381> info replication #redis6382自动加入redis集群的从节点
# Replication
role:master
connected_slaves:2
slave0:ip=192.168.23.100,port=6380,state=online,offset=29821,lag=1
slave1:ip=192.168.23.100,port=6382,state=online,offset=0,lag=1

master_repl_offset:29987
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:29986
192.168.23.100:6381> 

发布了60 篇原创文章 · 获赞 20 · 访问量 4576

猜你喜欢

转载自blog.csdn.net/zhaikaiyun/article/details/105027093