【Redis】Setninel 哨兵机制

发布时间 2023-04-19 21:01:09作者: Janzen_Q
一、sentinel 工作原理

在Redis2.6+引入哨兵机制,在2.8版本后趋于稳定状态,在生产环境中建议使用2.8版本以上的sentinel服务。sentinel集群用于监控redis集群中Master节点工作状态,在Master节点发生故障时,可以实现主从切换,保证系统的高可用。

  • 哨兵(Sentinel)是一个分布式集群架构,可在一个架构中运行多个Sentinel进程,这些进程使用流言协议(gossip protocols)l来接收关于master节点是否故障的信息,并使用投票协议(vote protocols)来决定是否执行故障切换,以及选择使用哪一个slave节点提升为master。哨兵机制可以解决故障发生时master与slave的自动切换,但无法解决单机性能瓶颈。
  • 每个哨兵(Sentinel)进程会定时向其他节点(Sentinel、Master、Slave)发送消息,当对方节点在指定配置时间内没有进行回应,则判定该节点故障,这个过程就是“主观认为宕机”(Subjective down),简称sdown。每个节点判定sdown状态的结果可能相同或不同。
  • 当同一个哨兵集群中的多个(超过集群节点数一半以上)sentinel进程判定Master节点发生sdown后,并通过 SENTINEL is master down-by-addr 命令进行信息同步交流后,最终判定Master节点发生故障,这个通常称之为“客观宕机”(Objectively down),简称odown。
  • 当哨兵集群判定Master发生odown后,通过一定vote算法从其余正常的slave节点中选举出一台服务器提升为Master节点,然后自动修改相关配置,并开启故障转移(failover)
  • 客户端初始连接配置,应该选择连接Sentinel节点集合,而不再是单独连接redis节点,并声明选择连接的redis主节点或集群名称(Master name),Sentinel接收到客户端请求将会返回消息通知客户端Master节点和Slave节点信息,由客户端程序执行相应请求(Sentinel节点仅作为配置中心,而非proxy代理)。
  • 哨兵集群节点个数应该要配置为 大于或等于3的奇数 

 

 

 

二、sentinel哨兵配置文件详解

三、sentinel哨兵机制部署

1、哨兵机制主从架构准备

#master节点redis配置文件修改内容
[root@Redis-Ubuntu-1804-p20:~]# cat /app/redis/etc/redis_6380.conf | grep -e "^bind\|^masterauth\|^replicaof\|^requirepass"
bind 0.0.0.0
masterauth redis
requirepass redis
[root@Redis-Ubuntu-1804-p20:~]# 

#slave节点redis配置文件修改内容
[root@Redis-Ubuntu-1804-p21:~]# cat /redis/etc/redis_6380.conf | grep -e "^bind\|^masterauth\|^replicaof\|^requirepass"
cat: /redis/etc/redis_6380.conf: No such file or directory
[root@Redis-Ubuntu-1804-p21:~]# cat /app/redis/etc/redis_6380.conf | grep -e "^bind\|^masterauth\|^replicaof\|^requirepass"
bind 0.0.0.0
replicaof 10.0.0.20 6380
masterauth redis
requirepass redis
[root@Redis-Ubuntu-1804-p21:~]#


[root@Redis-Ubuntu-1804-p22:~]# cat /redis/etc/redis_6380.conf | grep -e "^bind\|^masterauth\|^replicaof\|^requirepass"
cat: /redis/etc/redis_6380.conf: No such file or directory
[root@Redis-Ubuntu-1804-p22:~]# cat /app/redis/etc/redis_6380.conf | grep -e "^bind\|^masterauth\|^replicaof\|^requirepass"
bind 0.0.0.0
replicaof 10.0.0.20 6380
masterauth redis
requirepass redis
[root@Redis-Ubuntu-1804-p22:~]# 

 

2、修改配置启动sentinel

#在每个节点上创建sentinel配置文件
[root@Redis-Ubuntu-1804-p20:~]# cat > /app/redis/etc/sentinel.conf <<EOF port 26379 daemonize yes pidfile /app/redis/run/redis-sentinel.pid logfile "/app/redis/log/sentinel.log" dir /app/redis sentinel monitor mymaster 10.0.0.20 6380 2 sentinel auth-pass mymaster redis sentinel parallel-syncs mymaster 1 sentinel failover-timeout mymaster 180000 sentinel deny-scripts-reconfig yes EOF
#启动sentinel [root@Redis-Ubuntu-1804-p20:~]# redis-sentinel /app/redis/etc/sentinel.conf [root@Redis-Ubuntu-1804-p20:~]# ss -ntlp State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 511 0.0.0.0:26379 0.0.0.0:* users:(("redis-sentinel",pid=2702,fd=7)) LISTEN 0 511 0.0.0.0:6379 0.0.0.0:* users:(("redis-server",pid=851,fd=6)) LISTEN 0 511 0.0.0.0:6380 0.0.0.0:* users:(("redis-server",pid=2101,fd=6)) LISTEN 0 511 0.0.0.0:6381 0.0.0.0:* users:(("redis-server",pid=881,fd=6)) LISTEN 0 128 127.0.0.53%lo:53 0.0.0.0:* users:(("systemd-resolve",pid=801,fd=13)) LISTEN 0 128 0.0.0.0:22 0.0.0.0:* users:(("sshd",pid=885,fd=3)) LISTEN 0 128 127.0.0.1:6010 0.0.0.0:* users:(("sshd",pid=1467,fd=10)) LISTEN 0 511 [::]:26379 [::]:* users:(("redis-sentinel",pid=2702,fd=6)) LISTEN 0 128 [::]:22 [::]:* users:(("sshd",pid=885,fd=4)) LISTEN 0 128 [::1]:6010 [::]:* users:(("sshd",pid=1467,fd=9))

  

3、观察sentinel配置文件变化

##启动后对比配置文件变化
[root@Redis-Ubuntu-1804-p20:~]# cat /app/redis/etc/sentinel.conf 
port 26379
daemonize yes
pidfile "/app/redis/run/redis-sentinel.pid"
logfile "/app/redis/log/sentinel.log"
dir "/app/redis"
sentinel myid 4e21cfb28d765283301f98584961adc0d755ac21  ##每个sentinel节点的id都需确保不同
sentinel deny-scripts-reconfig yes
sentinel monitor mymaster 10.0.0.20 6380 2
sentinel auth-pass mymaster redis
sentinel config-epoch mymaster 0
# Generated by CONFIG REWRITE
protected-mode no
sentinel leader-epoch mymaster 0
sentinel known-replica mymaster 10.0.0.22 6380
sentinel known-replica mymaster 10.0.0.21 6380
sentinel known-sentinel mymaster 10.0.0.22 26379 f41684db3a063d4468b5023f57c62a7a5bb3130e
sentinel known-sentinel mymaster 10.0.0.21 26379 49b72e4d02964472051d55fd2980dd598332ac9f
sentinel current-epoch 0

[root@Redis-Ubuntu-1804-p21:~]# cat /app/redis/etc/sentinel.conf
port 26379
daemonize yes
pidfile "/app/redis/run/redis-sentinel.pid"
logfile "/app/redis/log/sentinel.log"
dir "/app/redis"
sentinel myid 49b72e4d02964472051d55fd2980dd598332ac9f   ##每个sentinel节点的id都需确保不同
sentinel deny-scripts-reconfig yes
sentinel monitor mymaster 10.0.0.20 6380 2
sentinel auth-pass mymaster redis
sentinel config-epoch mymaster 0
# Generated by CONFIG REWRITE
protected-mode no
sentinel leader-epoch mymaster 0
sentinel known-replica mymaster 10.0.0.22 6380
sentinel known-replica mymaster 10.0.0.21 6380
sentinel known-sentinel mymaster 10.0.0.20 26379 4e21cfb28d765283301f98584961adc0d755ac21
sentinel known-sentinel mymaster 10.0.0.22 26379 f41684db3a063d4468b5023f57c62a7a5bb3130e
sentinel current-epoch 0


[root@Redis-Ubuntu-1804-p22:~]# cat /app/redis/etc/sentinel.conf 
port 26379
daemonize yes
pidfile "/app/redis/run/redis-sentinel.pid"
logfile "/app/redis/log/sentinel.log"
dir "/app/redis"
sentinel myid f41684db3a063d4468b5023f57c62a7a5bb3130e  ##每个sentinel节点的id都需确保不同
sentinel deny-scripts-reconfig yes
sentinel monitor mymaster 10.0.0.20 6380 2
sentinel auth-pass mymaster redis
sentinel config-epoch mymaster 0
# Generated by CONFIG REWRITE
protected-mode no
sentinel leader-epoch mymaster 0
sentinel known-replica mymaster 10.0.0.21 6380
sentinel known-replica mymaster 10.0.0.22 6380
sentinel known-sentinel mymaster 10.0.0.21 26379 49b72e4d02964472051d55fd2980dd598332ac9f
sentinel known-sentinel mymaster 10.0.0.20 26379 4e21cfb28d765283301f98584961adc0d755ac21
sentinel current-epoch 0

  

4、查看sentinel状态

[root@Redis-Ubuntu-1804-p20:~]# redis-cli -a redis -p 26379 --no-auth-warning info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=10.0.0.20:6380,slaves=2,sentinels=3

[root@Redis-Ubuntu-1804-p21:~]# redis-cli -a redis -p 26379 --no-auth-warning info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=10.0.0.20:6380,slaves=2,sentinels=3


[root@Redis-Ubuntu-1804-p22:~]# redis-cli -a redis -p 26379 --no-auth-warning info sentinel
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=10.0.0.20:6380,slaves=2,sentinels=3

  

python 脚本连接sentinel进行数据写入

#!/usr/bin/python3
# -*- coding: UTF-8 -*-
#********************************************************************
#Author:                janzen
#Date:                  2023-04-18
#FileName:             redis_sentinel_newData.py
#Description:          The python3 script
#Copyright (C):        2023 All rights reserved
#********************************************************************
import redis
from redis.sentinel import Sentinel
import sys,time
key=sys.argv[1]
value=sys.argv[2]
num=1
redis_auth_pass='redis'
mastername='mymaster'
sentinel = Sentinel([
        ('10.0.0.20',26379),
        ('10.0.0.21',26379),
        ('10.0.0.22',26379)],
        socket_timeout=0.5
        )
while 1:
    time.sleep(0.5)
    try:
        master = sentinel.discover_master(mastername)
        slave = sentinel.discover_slaves=(mastername)
        print("Master:%s ; Slave:%s" % (master,slave))
        master = sentinel.master_for(mastername,socket_timeout=0.5,password=redis_auth_pass,db=0)
        w_ret=master.set(key+str(num),value+str(num))
        
        slave = sentinel.slave_for(mastername,socket_timeout=0.5,password=redis_auth_pass,db=0)
        r_ret=master.get(key)
        print("get data %s:%s" % (key+str(num),r_ret))
    except Exception as err:
        print(err)
        continue
    num+=1

 

模拟数据库故障观察结果

[root@Redis-Ubuntu-1804-p20:~]# systemctl stop redis_6380.service 

 三个节点日志

##10.0.0.20:26379
2702:X 18 Apr 2023 05:23:31.362 # +sdown master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.464 # +odown master mymaster 10.0.0.20 6380 #quorum 2/2
2702:X 18 Apr 2023 05:23:31.464 # +new-epoch 11
2702:X 18 Apr 2023 05:23:31.464 # +try-failover master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.465 # +vote-for-leader 4e21cfb28d765283301f98584961adc0d755ac21 11
2702:X 18 Apr 2023 05:23:31.477 # 49b72e4d02964472051d55fd2980dd598332ac9f voted for 4e21cfb28d765283301f98584961adc0d755ac21 11
2702:X 18 Apr 2023 05:23:31.477 # f41684db3a063d4468b5023f57c62a7a5bb3130e voted for 4e21cfb28d765283301f98584961adc0d755ac21 11
2702:X 18 Apr 2023 05:23:31.556 # +elected-leader master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.557 # +failover-state-select-slave master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.609 # +selected-slave slave 10.0.0.22:6380 10.0.0.22 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.610 * +failover-state-send-slaveof-noone slave 10.0.0.22:6380 10.0.0.22 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:31.711 * +failover-state-wait-promotion slave 10.0.0.22:6380 10.0.0.22 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:32.236 # +promoted-slave slave 10.0.0.22:6380 10.0.0.22 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:32.237 # +failover-state-reconf-slaves master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:32.289 * +slave-reconf-sent slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:32.499 * +slave-reconf-inprog slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:32.551 # -odown master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:33.516 * +slave-reconf-done slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:33.598 # +failover-end master mymaster 10.0.0.20 6380
2702:X 18 Apr 2023 05:23:33.598 # +switch-master mymaster 10.0.0.20 6380 10.0.0.22 6380
2702:X 18 Apr 2023 05:23:33.599 * +slave slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.22 6380
2702:X 18 Apr 2023 05:23:33.599 * +slave slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380
2702:X 18 Apr 2023 05:24:03.600 # +sdown slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380

##10.0.0.21:26379
2553:X 18 Apr 2023 05:23:31.416 # +sdown master mymaster 10.0.0.20 6380
2553:X 18 Apr 2023 05:23:31.474 # +new-epoch 11
2553:X 18 Apr 2023 05:23:31.482 # +vote-for-leader 4e21cfb28d765283301f98584961adc0d755ac21 11
2553:X 18 Apr 2023 05:23:31.507 # +odown master mymaster 10.0.0.20 6380 #quorum 3/2
2553:X 18 Apr 2023 05:23:31.507 # Next failover delay: I will not start a failover before Tue Apr 18 05:29:32 2023
2553:X 18 Apr 2023 05:23:32.296 # +config-update-from sentinel 4e21cfb28d765283301f98584961adc0d755ac21 10.0.0.20 26379 @ mymaster 10.0.0.20 6380
2553:X 18 Apr 2023 05:23:32.297 # +switch-master mymaster 10.0.0.20 6380 10.0.0.22 6380
2553:X 18 Apr 2023 05:23:32.297 * +slave slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.22 6380
2553:X 18 Apr 2023 05:23:32.297 * +slave slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380
2553:X 18 Apr 2023 05:24:02.315 # +sdown slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380

##10.0.0.22:26379
5545:X 18 Apr 2023 05:23:31.326 # +sdown master mymaster 10.0.0.20 6380
5545:X 18 Apr 2023 05:23:31.465 # +new-epoch 11
5545:X 18 Apr 2023 05:23:31.474 # +vote-for-leader 4e21cfb28d765283301f98584961adc0d755ac21 11
5545:X 18 Apr 2023 05:23:32.287 # +config-update-from sentinel 4e21cfb28d765283301f98584961adc0d755ac21 10.0.0.20 26379 @ mymaster 10.0.0.20 6380
5545:X 18 Apr 2023 05:23:32.288 # +switch-master mymaster 10.0.0.20 6380 10.0.0.22 6380
5545:X 18 Apr 2023 05:23:32.288 * +slave slave 10.0.0.21:6380 10.0.0.21 6380 @ mymaster 10.0.0.22 6380
5545:X 18 Apr 2023 05:23:32.288 * +slave slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380
5545:X 18 Apr 2023 05:24:02.317 # +sdown slave 10.0.0.20:6380 10.0.0.20 6380 @ mymaster 10.0.0.22 6380

##写程序

Master:('10.0.0.20', 6380) ; Slave:mymaster
set data key_229:b'value_229'
Master:('10.0.0.20', 6380) ; Slave:mymaster
set data key_230:b'value_230'
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
Master:('10.0.0.20', 6380) ; Slave:mymaster
Error 111 connecting to 10.0.0.20:6380. Connection refused.
No master found for 'mymaster'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_231:b'value_231'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_232:b'value_232'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_233:b'value_233'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_234:b'value_234'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_235:b'value_235'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_236:b'value_236'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_237:b'value_237'
Master:('10.0.0.22', 6380) ; Slave:mymaster
set data key_238:b'value_238'

 

##读程序

Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.20', 6380) ; Slave:mymaster
get data key1:b'value1'
No master found for 'mymaster'
No master found for 'mymaster'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'
Master:('10.0.0.22', 6380) ; Slave:mymaster
get data key1:b'value1'