推荐渐进式解析 Redis 源码 - 哨兵 sentinel 上面比较全面,我这也是参考上面的。
文章目录
- sentinelFailoverStateMachine(故障转移的主流程)
- sentinelFailoverWaitStart(第一步故障转移开始)
- sentinelFailoverSelectSlave(第二步,选择晋升的服务器节点函数)
- sentinelSelectSlave(选举一个sentinel来主导故障转移的)
- sentinelFailoverSendSlaveOfNoOne(第三步发送 slaveof no one 命令,从服务器变主服务器 )
- sentinelFailoverWaitPromotion(等待检查)
- sentinelFailoverReconfNextSlave(发送主服务器的地址发送给还没有更新的从服务器)
sentinelFailoverStateMachine(故障转移的主流程)
sentinelHandleRedisInstance
方法下的sentinelFailoverStateMachine
方法是故障转移主流程方法,想看sentinelFailoverStateMachine位于什么位置,可以前往Redis 6.2 哨兵定时检查源码找到
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
// 要求必须是主节点
serverAssert(ri->flags & SRI_MASTER);
// 如果不是故障转移状态则直接返回
if (!(ri->flags & SRI_FAILOVER_IN_PROGRESS)) return;
// 故障转移过程为循环一个个状态,所以这个函数会进行多次循环判断
switch(ri->failover_state) {
// 故障转移开始
case SENTINEL_FAILOVER_STATE_WAIT_START:
sentinelFailoverWaitStart(ri);
break;
// 选择要晋升的从服务器节点
case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:
sentinelFailoverSelectSlave(ri);
break;
// 发送 slaveof no one 命令,从变主
case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:
sentinelFailoverSendSlaveOfNoOne(ri);
break;
// 等待选择的从服务器节点变成主服务器节点,如果超时就重新选择新的从服务器节
case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION:
sentinelFailoverWaitPromotion(ri);
break;
// 给所有从服务器发送 slaveof 通知,让他们同步 新的主服务器节点
case SENTINEL_FAILOVER_STATE_RECONF_SLAVES:
sentinelFailoverReconfNextSlave(ri);
break;
}
}
故障转移步骤是分步但是也是连续的,成功执行完一个状态,failover_state
就会变成下一个时间状态,虽然用的是switch,但是更感觉像步骤条,当然某一步可能因为某个原因整个步骤条都停止了
sentinelFailoverWaitStart(第一步故障转移开始)
/* ---------------- Failover state machine implementation 故障转移状态机实现------------------- */
void sentinelFailoverWaitStart(sentinelRedisInstance *ri) {
char *leader;
int isleader;
/* Check if we are the leader for the failover epoch. 检查我们是否是故障转移时期的领导者*/
// 选举出来一个 sentinel 领导者,用来主导故障转移,并更新状态,这里的选举使用了 Raft 一致性算法
leader = sentinelGetLeader(ri, ri->failover_epoch);
// 当前 sentinel 节点是否就是领导节点
isleader = leader && strcasecmp(leader,sentinel.myid) == 0;
sdsfree(leader);
/* If I'm not the leader, and it is not a forced failover via
* SENTINEL FAILOVER, then I can't continue with the failover.
如果我不是领导者,并且不是通过 SENTINEL FAILOVER 进行的强制故障转移,那么我将无法继续进行故障转移*/
if (!isleader && !(ri->flags & SRI_FORCE_FAILOVER)) {
int election_timeout = SENTINEL_ELECTION_TIMEOUT;
/* The election timeout is the MIN between SENTINEL_ELECTION_TIMEOUT
* and the configured failover timeout.
选举超时时间 最大为 配置的 failover_timeout 故障转移时间*/
if (election_timeout > ri->failover_timeout)
election_timeout = ri->failover_timeout;
/* Abort the failover if I'm not the leader after some time.
如果选举超时,则取消故障转移*/
if (mstime() - ri->failover_start_time > election_timeout) {
// 发送事件通知
sentinelEvent(LL_WARNING,"-failover-abort-not-elected",ri,"%@");
// 取消故障转移
sentinelAbortFailover(ri);
}
return;
}
// 如果当前 sentinel 为领导者,则发送 赢取选举的 事件通知
sentinelEvent(LL_WARNING,"+elected-leader",ri,"%@");
// 是否指定了故障模拟,并且是 当选之后的模拟故障
if (sentinel.simfailure_flags & SENTINEL_SIMFAILURE_CRASH_AFTER_ELECTION)
// 退出当前节点程序
sentinelSimFailureCrash();
// 故障转移状态 改为后一个,等待选择一个 晋升的从服务器节点
ri->failover_state = SENTINEL_FAILOVER_STATE_SELECT_SLAVE;
// 故障转移状态变更时间
ri->failover_state_change_time = mstime();
// 发送事件通知
sentinelEvent(LL_WARNING,"+failover-state-select-slave",ri,"%@");
}
sentinelFailoverSelectSlave(第二步,选择晋升的服务器节点函数)
void sentinelFailoverSelectSlave(sentinelRedisInstance *ri) {
// 往实例中挑选一个 从服务器节点
sentinelRedisInstance *slave = sentinelSelectSlave(ri);
/* We don't handle the timeout in this state as the function aborts
* the failover or go forward in the next state. 我们不处理此状态下的超时,因为函数会中止故障转移或进入下一个状态*/
if (slave == NULL) {
// 发送事件通知
sentinelEvent(LL_WARNING,"-failover-abort-no-good-slave",ri,"%@");
// 停止故障转移
sentinelAbortFailover(ri);
} else {
// 有合适的从服务器节点
// 发送成功选择晋升节点的事件通知
sentinelEvent(LL_WARNING,"+selected-slave",slave,"%@");
// 设置晋升标识
slave->flags |= SRI_PROMOTED;
// 设置晋升从服务器节点
ri->promoted_slave = slave;
// 故障转移状态修改为 SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE
ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
// 故障转移状态修改时间
ri->failover_state_change_time = mstime();
// 发送事件通知
sentinelEvent(LL_NOTICE,"+failover-state-send-slaveof-noone",
slave, "%@");
}
}
sentinelSelectSlave(选举一个sentinel来主导故障转移的)
sentinelRedisInstance *sentinelSelectSlave(sentinelRedisInstance *master) {
// 申请从节点数组空间
sentinelRedisInstance **instance =
zmalloc(sizeof(instance[0])*dictSize(master->slaves));
sentinelRedisInstance *selected = NULL;
int instances = 0;
dictIterator *di;
dictEntry *de;
mstime_t max_master_down_time = 0;
// 如果主服务器节点处于主观下线状态,则计算下线的最大时长
if (master->flags & SRI_S_DOWN)
max_master_down_time += mstime() - master->s_down_since_time;
max_master_down_time += master->down_after_period * 10;
di = dictGetIterator(master->slaves);
// 遍历所有从服务器节点
while((de = dictNext(di)) != NULL) {
// 获取实例
sentinelRedisInstance *slave = dictGetVal(de);
mstime_t info_validity_time;
// 如果当前从服务器节点下线了,就跳过去
if (slave->flags & (SRI_S_DOWN|SRI_O_DOWN)) continue;
// 如果当前从服务器节点掉线了,也跳过去
if (slave->link->disconnected) continue;
// PING 命令响应事件太久,也跳过去
if (mstime() - slave->link->last_avail_time > SENTINEL_PING_PERIOD*5) continue;
// 跳过优先级为 0 的节点
if (slave->slave_priority == 0) continue;
/* If the master is in SDOWN state we get INFO for slaves every second.
* Otherwise we get it with the usual period so we need to account for
* a larger delay. */
// 如果主服务器节点主观下线状态,sentinel 每秒发送 INFO 命令给 从服务器节点,否则就按照默认事件走
if (master->flags & SRI_S_DOWN)
info_validity_time = SENTINEL_PING_PERIOD*5;
else
info_validity_time = SENTINEL_INFO_PERIOD*3;
// 如果从服务器节点接收 INFO 命令回复时间已经过期,也跳过去
if (mstime() - slave->info_refresh > info_validity_time) continue;
// 如果是下线时间过长的节点,也跳过去
if (slave->master_link_down_time > max_master_down_time) continue;
// 将剩余选中的节点放到备选数组中
instance[instances++] = slave;
}
dictReleaseIterator(di);
// 处理备选数组
if (instances) {
// 将备选数组中的元素排序,取第一个,作为晋升节点
qsort(instance,instances,sizeof(sentinelRedisInstance*),
compareSlavesForPromotion);
selected = instance[0];
}
zfree(instance);
return selected;
}
sentinelFailoverSendSlaveOfNoOne(第三步发送 slaveof no one 命令,从服务器变主服务器 )
void sentinelFailoverSendSlaveOfNoOne(sentinelRedisInstance *ri) {
int retval;
/* We can't send the command to the promoted slave if it is now
* disconnected. Retry again and again with this state until the timeout
* is reached, then abort the failover.
如果现在断开连接,我们无法将命令发送到提升的从站。在此状态下一次又一次地重试,直到达到超时,然后中止故障转移*/
if (ri->promoted_slave->link->disconnected) {
// 如果已经超时,就只能取消故障转移了
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
// 发送事件通知
sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
// 取消故障转移
sentinelAbortFailover(ri);
}
return;
}
/* Send SLAVEOF NO ONE command to turn the slave into a master.
* We actually register a generic callback for this command as we don't
* really care about the reply. We check if it worked indirectly observing
* if INFO returns a different role (master instead of slave).
发送 SLAVEOF NO ONE 命令使从机变为主机。我们实际上为这个命令注册了一个通用回调,因为我们并不真正关心回复。
我们检查它是否工作,间接观察 INFO 是否返回不同的角色(主而不是从)*/
retval = sentinelSendSlaveOf(ri->promoted_slave,NULL);
if (retval != C_OK) return;
// 发送成功的事件通知
sentinelEvent(LL_NOTICE, "+failover-state-wait-promotion",
ri->promoted_slave,"%@");
// 故障转移状态改为 SENTINEL_FAILOVER_STATE_WAIT_PROMOTION
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
// 修改故障转移状态时间
ri->failover_state_change_time = mstime();
}
sentinelFailoverWaitPromotion(等待检查)
/* We actually wait for promotion indirectly checking with INFO when the
* slave turns into a master. 当slave变成master时,我们实际上是在等待升级间接检查INFO*/
void sentinelFailoverWaitPromotion(sentinelRedisInstance *ri) {
/* Just handle the timeout. Switching to the next state is handled
* by the function parsing the INFO command of the promoted slave.
只需处理超时。切换到下一个状态由解析提升从属的 INFO 命令的函数处理*/
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
sentinelEvent(LL_WARNING,"-failover-abort-slave-timeout",ri,"%@");
sentinelAbortFailover(ri);
}
}
sentinelFailoverReconfNextSlave(发送主服务器的地址发送给还没有更新的从服务器)
/* Send SLAVE OF <new master address> to all the remaining slaves that
* still don't appear to have the configuration updated.
将 SLAVE OF <new master address> 发送给所有剩余的仍然没有更新配置的从站*/
void sentinelFailoverReconfNextSlave(sentinelRedisInstance *master) {
dictIterator *di;
dictEntry *de;
int in_progress = 0;
// 获取遍历所有从服务器节点的迭代器
di = dictGetIterator(master->slaves);
// 遍历所有从服务器节点
while((de = dictNext(di)) != NULL) {
// 节点实例
sentinelRedisInstance *slave = dictGetVal(de);
// 计算已经发送同步命令或者正在同步的从服务器节点
if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG))
in_progress++;
}
// 释放迭代器
dictReleaseIterator(di);
di = dictGetIterator(master->slaves);
// 如果已经发送同步命令或者已经正在同步的从节点个数小于设置的同步个数限制,那么遍历所有的从节点
while(in_progress < master->parallel_syncs &&
(de = dictNext(di)) != NULL)
{
sentinelRedisInstance *slave = dictGetVal(de);
int retval;
/* Skip the promoted slave, and already configured slaves.
// 跳过被晋升的节点和已经完成同步的节点*/
if (slave->flags & (SRI_PROMOTED|SRI_RECONF_DONE)) continue;
/* If too much time elapsed without the slave moving forward to
* the next state, consider it reconfigured even if it is not.
* Sentinels will detect the slave as misconfigured and fix its
* configuration later. */
// 如果从服务器节点已经发送了 salveof 命令,但是在 故障转移到下一状态的时候 超时了
if ((slave->flags & SRI_RECONF_SENT) &&
(mstime() - slave->slave_reconf_sent_time) >
SENTINEL_SLAVE_RECONF_TIMEOUT)
{
// 发送事件通知
sentinelEvent(LL_NOTICE,"-slave-reconf-sent-timeout",slave,"%@");
// 清除发送 slaveof 命令的标识
slave->flags &= ~SRI_RECONF_SENT;
// 设置为完成同步的标识,随后重新发送 slaveof 命令,再尝试同步
slave->flags |= SRI_RECONF_DONE;
}
/* Nothing to do for instances that are disconnected or already
* in RECONF_SENT state. 对于已断开连接或已处于 RECONF_SENT 状态的实例无需执行任何操作*/
// 跳过已经发送过命令和正在同步的从服务器节点
if (slave->flags & (SRI_RECONF_SENT|SRI_RECONF_INPROG)) continue;
// 如果连接断开了就跳过
if (slave->link->disconnected) continue;
/* Send SLAVEOF <new master>. // 向所有从节点发送 slaveof 命令 */
retval = sentinelSendSlaveOf(slave,master->promoted_slave->addr);
// 发送成功
if (retval == C_OK) {
// 状态变更为已经发送了 slaveof 命令标识
slave->flags |= SRI_RECONF_SENT;
// 发送 slaveof 命令时间
slave->slave_reconf_sent_time = mstime();
// 发送事件通知
sentinelEvent(LL_NOTICE,"+slave-reconf-sent",slave,"%@");
in_progress++;
}
}
dictReleaseIterator(di);
/* Check if all the slaves are reconfigured and handle timeout. */
// 判断故障转移是否结束
sentinelFailoverDetectEnd(master);
}