-- 故障检测
orch使用了一种整体性的方法去探测主库和中间主库是否正常。
一种比较天真的方法,比如,监控工具探测到主库无法连接或者查询,就发出报警。这种方法容易受到网络故障而造成误报。为了降低误报,会通过运行n次每次间隔t时间的方式。在某些情况下,这减少了误报的机会,但是增加了真正故障的响应时间。
orchestrator会利用复制拓扑。orch不仅会监测主库,也会检测从库。比如,要诊断出主库挂了的情况,orch必须满足以下两个条件:
    - 联系不到主库。
    - 可以联系到主库对应的从库,并且这些从库也连不上主库。
orch没有将错误按时间来进行分类,而是按复制拓扑服务器(也就是所谓的multiple observers)本身进行分类。实际上,当所有的从库都连不上主库的时候,说明复制拓扑实际上就被破坏了,有理由需要进行故障转移。
orch的这种整体故障检测方式在生产环境是非常可靠的。

-- 探测机制
orch会每隔InstancePollSeconds(默认5s)时间去被监控的实例上拉取实例状态,并将这些状态信息存入orch的元数据库的orchestrator.database_instance表中,然后
orch会每隔InstancePollSeconds秒从元数据库中获取每个instance的状态,展示在web界面上。
拉取实例状态的语句如下:
show variables like "maxscale%";
show global status like "Uptime";
select @@global.hostname, ifnull(@@global.report_host, ""), @@global.server_id, @@global.version, @@global.version_comment, @@global.read_only, @@global.binlog_format, @@global.log_bin, @@global.log_slave_updates;
show master status;
show global status like "rpl_semi_sync_%_status";
select @@global.gtid_mode, @@global.server_uuid, @@global.gtid_executed, @@global.gtid_purged, @@global.master_info_repository = "TABLE", @@global.binlog_row_image;
show slave status;
select count(*) > 0 and MAX(User_name) != "" from mysql.slave_master_info;
show slave hosts;
select substring_index(host, ":", 1) as slave_hostname from information_schema.processlist where command IN ("Binlog Dump", "Binlog Dump GTID");
SELECT SUBSTRING_INDEX(@@hostname, ".", 1);

拉取得到实例状态之后,通过下面语句将状态值存入到orch的元数据库中:
注:values后面的值就是上面拉取到的实例状态值。
INSERT INTO database_instance
                (hostname, port, last_checked, last_attempted_check, last_check_partial_success, uptime, server_id, server_uuid, version, major_version, version_comment, binlog_server, read_only, binlog_format, binlog_row_image, log_bin, log_slave_updates, binary_log_file, binary_log_pos, master_host, master_port, slave_sql_running, slave_io_running, replication_sql_thread_state, replication_io_thread_state, has_replication_filters, supports_oracle_gtid, oracle_gtid, master_uuid, ancestry_uuid, executed_gtid_set, gtid_mode, gtid_purged, gtid_errant, mariadb_gtid, pseudo_gtid, master_log_file, read_master_log_pos, relay_master_log_file, exec_master_log_pos, relay_log_file, relay_log_pos, last_sql_error, last_io_error, seconds_behind_master, slave_lag_seconds, sql_delay, num_slave_hosts, slave_hosts, cluster_name, suggested_cluster_alias, data_center, region, physical_environment, replication_depth, is_co_master, replication_credentials_available, has_replication_credentials, allow_tls, semi_sync_enforced, semi_sync_master_enabled, semi_sync_replica_enabled, instance_alias, last_discovery_latency, last_seen)
        VALUES
                ("192.168.56.12", 3306, NOW(), NOW(), 1, 322504, 1521, "e2685a0f-d8f8-11e9-a2c9-002590e95c3c", "5.7.22-log", "5.7", "MySQL Community Server (GPL)", 0, 1, "ROW", "FULL", 1, 1, "mysql-bin.000016", 129186924,"192.168.56.11", 3306, 1, 1, 1, 1, 0, 1, 1,"6bf30525-d8f8-11e9-808c-0cc47a74fca8","6bf30525-d8f8-11e9-808c-0cc47a74fca8,e2685a0f-d8f8-11e9-a2c9-002590e95c3c","6bf30525-d8f8-11e9-808c-0cc47a74fca8:1-1554568,e2685a0f-d8f8-11e9-a2c9-002590e95c3c:1-632541","ON", "", "", 0, 0, "mysql-bin.000017", 150703414, "mysql-bin.000017", 150703414, "mysql-relay-bin.000052", 137056344, "", "", 0, 0, 0, 1,"[{"Hostname":"192.168.56.11","Port":3306}]", "192.168.56.11:3306", "repl3306", "", "", "", 1, 1, 1, 1, 0, 0, 0, 0, "", 8083748, NOW())
        ON DUPLICATE KEY UPDATE
                hostname=VALUES(hostname), port=VALUES(port), last_checked=VALUES(last_checked), last_attempted_check=VALUES(last_attempted_check), last_check_partial_success=VALUES(last_check_partial_success), uptime=VALUES(uptime), server_id=VALUES(server_id), server_uuid=VALUES(server_uuid), version=VALUES(version), major_version=VALUES(major_version), version_comment=VALUES(version_comment), binlog_server=VALUES(binlog_server), read_only=VALUES(read_only), binlog_format=VALUES(binlog_format), binlog_row_image=VALUES(binlog_row_image), log_bin=VALUES(log_bin), log_slave_updates=VALUES(log_slave_updates), binary_log_file=VALUES(binary_log_file), binary_log_pos=VALUES(binary_log_pos), master_host=VALUES(master_host), master_port=VALUES(master_port), slave_sql_running=VALUES(slave_sql_running), slave_io_running=VALUES(slave_io_running), replication_sql_thread_state=VALUES(replication_sql_thread_state), replication_io_thread_state=VALUES(replication_io_thread_state), has_replication_filters=VALUES(has_replication_filters), supports_oracle_gtid=VALUES(supports_oracle_gtid), oracle_gtid=VALUES(oracle_gtid), master_uuid=VALUES(master_uuid), ancestry_uuid=VALUES(ancestry_uuid), executed_gtid_set=VALUES(executed_gtid_set), gtid_mode=VALUES(gtid_mode), gtid_purged=VALUES(gtid_purged), gtid_errant=VALUES(gtid_errant), mariadb_gtid=VALUES(mariadb_gtid), pseudo_gtid=VALUES(pseudo_gtid), master_log_file=VALUES(master_log_file), read_master_log_pos=VALUES(read_master_log_pos), relay_master_log_file=VALUES(relay_master_log_file), exec_master_log_pos=VALUES(exec_master_log_pos), relay_log_file=VALUES(relay_log_file), relay_log_pos=VALUES(relay_log_pos), last_sql_error=VALUES(last_sql_error), last_io_error=VALUES(last_io_error), seconds_behind_master=VALUES(seconds_behind_master), slave_lag_seconds=VALUES(slave_lag_seconds), sql_delay=VALUES(sql_delay), num_slave_hosts=VALUES(num_slave_hosts), slave_hosts=VALUES(slave_hosts), cluster_name=VALUES(cluster_name), suggested_cluster_alias=VALUES(suggested_cluster_alias), data_center=VALUES(data_center), region=VALUES(region), physical_environment=VALUES(physical_environment), replication_depth=VALUES(replication_depth), is_co_master=VALUES(is_co_master), replication_credentials_available=VALUES(replication_credentials_available), has_replication_credentials=VALUES(has_replication_credentials), allow_tls=VALUES(allow_tls), semi_sync_enforced=VALUES(semi_sync_enforced), semi_sync_master_enabled=VALUES(semi_sync_master_enabled), semi_sync_replica_enabled=VALUES(semi_sync_replica_enabled), instance_alias=VALUES(instance_alias), last_discovery_latency=VALUES(last_discovery_latency), last_seen=VALUES(last_seen)
然后orch会每隔InstancePollSeconds秒从元数据库中获取每个被监控实例的状态,通过web端展示到页面上。

-- 探测实例失败
如果某个instance挂了,orch每隔InstancePollSeconds时间拉取实例状态失败,无法获取到最新的实例状态,也就无法用上面这条insert将实例状态存入到元数据库中,
那么orch会按下面的方式更新元数据库:
// 每隔InstancePollSeconds时间更新database_instance表的last_checked和last_check_partial_success字段
update database_instance set last_checked = NOW(), last_check_partial_success = 0 where hostname = "192.168.56.12" and port = 3306
// 每隔InstancePollSeconds+1s时间更新database_instance表的last_attempted_check字段
update database_instance set last_attempted_check = NOW() where hostname = "192.168.56.12" and port = 3306

-- 判断实例是否存活
被orch监控的实例是否正常,通过如下方式进行判断:
// instance每隔InstancePollSeconds时间通过如下方式判断某个instance是否正常
select
  ifnull(last_checked <= last_seen, 0) as is_last_check_valid
from
  database_instance
where
  hostname = "10.10.30.170"
  and port = 3306
order by
  hostname,
  port;  -- master instance是否存活的判断比较复杂,首先orch会每秒通过类似于如下方式判断master instance是否正常.
Empty set (0.00 sec)


SELECT
  MIN(
    master_instance.last_checked <= master_instance.last_seen
    and master_instance.last_attempted_check <= master_instance.last_seen + interval 6 second
  ) = 1 AS is_last_check_valid
from
  database_instance master_instance
GROUP BY
  master_instance.hostname,
  master_instance.port;  --如果is_last_check_valid为0,然后需要判断master instance对应的从库是否连得上,并且这些从库是否也连不上主库.
+---------------------+
| is_last_check_valid |
+---------------------+
|                   1 |
|                   1 |
|                   1 |
+---------------------+
3 rows in set (0.00 sec)


-- 整个SQL如下:
SELECT
  master_instance.hostname,
  master_instance.port,
  master_instance.read_only AS read_only,
  MIN(master_instance.data_center) AS data_center,
  MIN(master_instance.region) AS region,
  MIN(master_instance.physical_environment) AS physical_environment,
  MIN(master_instance.master_host) AS master_host,
  MIN(master_instance.master_port) AS master_port,
  MIN(master_instance.cluster_name) AS cluster_name,
  MIN(
    IFNULL(
      cluster_alias.alias,
      master_instance.cluster_name
    )
  ) AS cluster_alias,
  MIN(
    master_instance.last_checked <= master_instance.last_seen
    and master_instance.last_attempted_check <= master_instance.last_seen + interval 6 second
  ) = 1 AS is_last_check_valid,
  MIN(master_instance.last_check_partial_success) as last_check_partial_success,
  MIN(
    master_instance.master_host IN ("", "_")
    OR master_instance.master_port = 0
    OR substr(master_instance.master_host, 1, 2) = "//"
  ) AS is_master,
  MIN(master_instance.is_co_master) AS is_co_master,
  MIN(
    CONCAT(
      master_instance.hostname,
      ":",
      master_instance.port
    ) = master_instance.cluster_name
  ) AS is_cluster_master,
  MIN(master_instance.gtid_mode) AS gtid_mode,
  COUNT(replica_instance.server_id) AS count_replicas,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
    ),
    0
  ) AS count_valid_slaves,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
      AND replica_instance.slave_io_running != 0
      AND replica_instance.slave_sql_running != 0
    ),
    0
  ) AS count_valid_replicating_slaves,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
      AND replica_instance.slave_io_running = 0
      AND replica_instance.last_io_error like "%error %connecting to master%"
      AND replica_instance.slave_sql_running = 1
    ),
    0
  ) AS count_replicas_failing_to_connect_to_master,
  MIN(master_instance.replication_depth) AS replication_depth,
  GROUP_CONCAT(
    concat(
      replica_instance.Hostname,
      ":",
      replica_instance.Port
    )
  ) as slave_hosts,
  MIN(
    master_instance.slave_sql_running = 1
    AND master_instance.slave_io_running = 0
    AND master_instance.last_io_error like "%error %connecting to master%"
  ) AS is_failing_to_connect_to_master,
  MIN(
    master_downtime.downtime_active is not null
    and ifnull(master_downtime.end_timestamp, now()) > now()
  ) AS is_downtimed,
  MIN(IFNULL(master_downtime.end_timestamp, "")) AS downtime_end_timestamp,
  MIN(
    IFNULL(
      unix_timestamp() - unix_timestamp(master_downtime.end_timestamp),
      0
    )
  ) AS downtime_remaining_seconds,
  MIN(master_instance.binlog_server) AS is_binlog_server,
  MIN(master_instance.pseudo_gtid) AS is_pseudo_gtid,
  MIN(master_instance.supports_oracle_gtid) AS supports_oracle_gtid,
  SUM(replica_instance.oracle_gtid) AS count_oracle_gtid_slaves,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
      AND replica_instance.oracle_gtid != 0
    ),
    0
  ) AS count_valid_oracle_gtid_slaves,
  SUM(replica_instance.binlog_server) AS count_binlog_server_slaves,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
      AND replica_instance.binlog_server != 0
    ),
    0
  ) AS count_valid_binlog_server_slaves,
  MIN(master_instance.mariadb_gtid) AS is_mariadb_gtid,
  SUM(replica_instance.mariadb_gtid) AS count_mariadb_gtid_slaves,
  IFNULL(
    SUM(
      replica_instance.last_checked <= replica_instance.last_seen
      AND replica_instance.mariadb_gtid != 0
    ),
    0
  ) AS count_valid_mariadb_gtid_slaves,
  IFNULL(
    SUM(
      replica_instance.log_bin
      AND replica_instance.log_slave_updates
    ),
    0
  ) AS count_logging_replicas,
  IFNULL(
    SUM(
      replica_instance.log_bin
      AND replica_instance.log_slave_updates
      AND replica_instance.binlog_format = "STATEMENT"
    ),
    0
  ) AS count_statement_based_loggin_slaves,
  IFNULL(
    SUM(
      replica_instance.log_bin
      AND replica_instance.log_slave_updates
      AND replica_instance.binlog_format = "MIXED"
    ),
    0
  ) AS count_mixed_based_loggin_slaves,
  IFNULL(
    SUM(
      replica_instance.log_bin
      AND replica_instance.log_slave_updates
      AND replica_instance.binlog_format = "ROW"
    ),
    0
  ) AS count_row_based_loggin_slaves,
  IFNULL(SUM(replica_instance.sql_delay > 0), 0) AS count_delayed_replicas,
  IFNULL(SUM(replica_instance.slave_lag_seconds > 10), 0) AS count_lagging_replicas,
  IFNULL(MIN(replica_instance.gtid_mode), "") AS min_replica_gtid_mode,
  IFNULL(MAX(replica_instance.gtid_mode), "") AS max_replica_gtid_mode,
  IFNULL(
    MAX(
      case
        when replica_downtime.downtime_active is not null
        and ifnull(replica_downtime.end_timestamp, now()) > now() then ""
        else replica_instance.gtid_errant
      end
    ),
    ""
  ) AS max_replica_gtid_errant,
  IFNULL(
    SUM(
      replica_downtime.downtime_active is not null
      and ifnull(replica_downtime.end_timestamp, now()) > now()
    ),
    0
  ) AS count_downtimed_replicas,
  COUNT(
    DISTINCT case
      when replica_instance.log_bin
      AND replica_instance.log_slave_updates then replica_instance.major_version
      else NULL
    end
  ) AS count_distinct_logging_major_versions
FROM
  database_instance master_instance
  LEFT JOIN hostname_resolve ON (
    master_instance.hostname = hostname_resolve.hostname
  )
  LEFT JOIN database_instance replica_instance ON (
    COALESCE(
      hostname_resolve.resolved_hostname,
      master_instance.hostname
    ) = replica_instance.master_host
    AND master_instance.port = replica_instance.master_port
  )
  LEFT JOIN database_instance_maintenance ON (
    master_instance.hostname = database_instance_maintenance.hostname
    AND master_instance.port = database_instance_maintenance.port
    AND database_instance_maintenance.maintenance_active = 1
  )
  LEFT JOIN database_instance_downtime as master_downtime ON (
    master_instance.hostname = master_downtime.hostname
    AND master_instance.port = master_downtime.port
    AND master_downtime.downtime_active = 1
  )
  LEFT JOIN database_instance_downtime as replica_downtime ON (
    replica_instance.hostname = replica_downtime.hostname
    AND replica_instance.port = replica_downtime.port
    AND replica_downtime.downtime_active = 1
  )
  LEFT JOIN cluster_alias ON (
    cluster_alias.cluster_name = master_instance.cluster_name
  )
WHERE
  database_instance_maintenance.database_instance_maintenance_id IS NULL
  AND "" IN ("", master_instance.cluster_name)
GROUP BY
  master_instance.hostname,
  master_instance.port
HAVING
  (
    MIN(
      master_instance.last_checked <= master_instance.last_seen
      and master_instance.last_attempted_check <= master_instance.last_seen + interval 6 second
    ) = 1
  ) = 0
  OR (
    IFNULL(
      SUM(
        replica_instance.last_checked <= replica_instance.last_seen
        AND replica_instance.slave_io_running = 0
        AND replica_instance.last_io_error like "%error %connecting to master%"
        AND replica_instance.slave_sql_running = 1
      ),
      0
    ) > 0
  )
  OR (
    IFNULL(
      SUM(
        replica_instance.last_checked <= replica_instance.last_seen
      ),
      0
    ) < COUNT(replica_instance.server_id)
  )
  OR (
    IFNULL(
      SUM(
        replica_instance.last_checked <= replica_instance.last_seen
        AND replica_instance.slave_io_running != 0
        AND replica_instance.slave_sql_running != 0
      ),
      0
    ) < COUNT(replica_instance.server_id)
  )
  OR (
    MIN(
      master_instance.slave_sql_running = 1
      AND master_instance.slave_io_running = 0
      AND master_instance.last_io_error like "%error %connecting to master%"
    )
  )
  OR (COUNT(replica_instance.server_id) > 0)
ORDER BY
  is_master DESC,
  is_cluster_master DESC,
  count_replicas DESC\G
*************************** 1. row ***************************
                                   hostname: db03
                                       port: 3306
                                  read_only: 0
                                data_center: 
                                     region: 
                       physical_environment: 
                                master_host: 
                                master_port: 0
                               cluster_name: db03:3306
                              cluster_alias: db03
                        is_last_check_valid: 1
                 last_check_partial_success: 1
                                  is_master: 1
                               is_co_master: 0
                          is_cluster_master: 1
                                  gtid_mode: ON
                             count_replicas: 2
                         count_valid_slaves: 2
             count_valid_replicating_slaves: 2
count_replicas_failing_to_connect_to_master: 0
                          replication_depth: 0
                                slave_hosts: db02:3306,db01:3306
            is_failing_to_connect_to_master: 0
                               is_downtimed: 0
                     downtime_end_timestamp: 
                 downtime_remaining_seconds: 0
                           is_binlog_server: 0
                             is_pseudo_gtid: 0
                       supports_oracle_gtid: 1
                   count_oracle_gtid_slaves: 2
             count_valid_oracle_gtid_slaves: 2
                 count_binlog_server_slaves: 0
           count_valid_binlog_server_slaves: 0
                            is_mariadb_gtid: 0
                  count_mariadb_gtid_slaves: 0
            count_valid_mariadb_gtid_slaves: 0
                     count_logging_replicas: 2
        count_statement_based_loggin_slaves: 0
            count_mixed_based_loggin_slaves: 0
              count_row_based_loggin_slaves: 2
                     count_delayed_replicas: 0
                     count_lagging_replicas: 0
                      min_replica_gtid_mode: ON
                      max_replica_gtid_mode: ON
                    max_replica_gtid_errant: 
                   count_downtimed_replicas: 0
      count_distinct_logging_major_versions: 1
1 row in set (0.00 sec)