视频:《Orchestrator 管理与实战》
基础配置
{
"DiscoverByShowSlaveHosts": ture,
"HostnameResolveMethod": "default",
"MySQLHostnameResolveMethod": "@@hostname",
"DetectInstanceAliasQuery":"select @@hostname"
"DetectClusterAliasQuery": "select ifnull(max(cluster_name), '') as cluster_alias from meta.cluster where anchor=1",
"DetectDataCenterQuery": "select ifnull(max(data_center), '') as DC from meta.cluster where anchor=1",
"AuthenticationMethod": "multi",
"HTTPAuthUser": "baiding",
"HTTPAuthPassword": "baiding@12"
}
创建元数据表
CREATE TABLE `cluster` (
`anchor` tinyint(4) NOT NULL,
`cluster_name` varchar(128) CHARACTER SET ascii NOT NULL DEFAULT '',
`data_center` varchar(128) NOT NULL,
PRIMARY KEY (`anchor`)
) ENGINE=InnoDB;
GRANT SELECT ON meta.* TO 'orchestrator'@'192.168.%';
在mysql的主、从配置 report_host,并重启
实例发现周期
通过新发现的集群时,发现从节点的耗时判断
{
"InstancePollSeconds": 5;
}
数据库日志
主库:
2023-09-11T03:03:32.407442Z 3573 Query select @@global.hostname, ifnull(@@global.report_host, ''), @@global.server_id, @@global.version, @@global.version_comment, @@global.read_only, @@global.binlog_format, @@global.log_bin, @@global.log_slave_updates
2023-09-11T03:03:32.408604Z 3573 Query show master status
2023-09-11T03:03:32.409034Z 3574 Query show global status like 'Uptime'
2023-09-11T03:03:32.411188Z 3574 Query show global status like 'rpl_semi_sync_%'
2023-09-11T03:03:32.412468Z 3576 Query show slave status
2023-09-11T03:03:32.413280Z 3573 Query show global variables like 'rpl_semi_sync_%'
2023-09-11T03:03:32.415394Z 3574 Query select @@global.gtid_mode, @@global.server_uuid, @@global.gtid_executed, @@global.gtid_purged, @@global.master_info_repository = 'TABLE', @@global.binlog_row_image
2023-09-11T03:03:32.416230Z 3576 Query select count(*) > 0 and MAX(User_name) != '' from mysql.slave_master_info
2023-09-11T03:03:32.417379Z 3573 Query show slave hosts
2023-09-11T03:03:32.419464Z 3576 Query select ifnull(max(data_center), '') as DC from meta.cluster where anchor=1
2023-09-11T03:03:32.420980Z 3573 Query select ifnull(max(cluster_name), '') as cluster_alias from meta.cluster where anchor=1
2023-09-11T03:03:32.421955Z 3574 Query select @@hostname
从库:
2023-09-11T03:02:51.081283Z 3551 Query select @@global.hostname, ifnull(@@global.report_host, ''), @@global.server_id, @@global.version, @@global.version_comment, @@global.read_only, @@global.binlog_format, @@global.log_bin, @@global.log_slave_updates
2023-09-11T03:02:51.081658Z 3548 Query show global status like 'Uptime'
2023-09-11T03:02:51.084005Z 3548 Query show global variables like 'rpl_semi_sync_%'
2023-09-11T03:02:51.085430Z 3547 Query show slave status
2023-09-11T03:02:51.085954Z 3551 Query show master status
2023-09-11T03:02:51.087188Z 3551 Query select @@global.gtid_mode, @@global.server_uuid, @@global.gtid_executed, @@global.gtid_purged, @@global.master_info_repository = 'TABLE', @@global.binlog_row_image
2023-09-11T03:02:51.088125Z 3548 Query show global status like 'rpl_semi_sync_%'
2023-09-11T03:02:51.089905Z 3547 Query select count(*) > 0 and MAX(User_name) != '' from mysql.slave_master_info
2023-09-11T03:02:51.090989Z 3551 Query show slave hosts
2023-09-11T03:02:51.092919Z 3548 Query select @@hostname
2023-09-11T03:02:51.093921Z 3547 Query select
substring_index(host, ':', 1) as slave_hostname
from
information_schema.processlist
where
command IN ('Binlog Dump', 'Binlog Dump GTID')
2023-09-11T03:02:51.094713Z 3551 Query select ifnull(max(data_center), '') as DC from meta.cluster where anchor=1
2023-09-11T03:02:51.096395Z 3548 Query select gtid_subtract('84a24ba0-dabf-11ec-b079-080027a2a57f:1', '')
故障检测频率
在故障期间,控制钩子函数执行的频率
{
"FailureDetectionPeriodBlockMinutes":1,
"OnFailureDetectionProcesses": [
"echo `date +'F% T%'` >> /tmp/recovery.log",
"echo 'Detected {failureType} on {failureCluster}. Affected replicas: {countReplicas}' >> /tmp/recovery.log",
],
}
读超时判断
触发延时参数设置:"DetectInstanceAliasQuery": "select @@hostname from (select sleep(10)) a",
{
"ReasonableInstanceCheckSeconds": --会触发 LastCheckValid: false
"MySQLDiscoveryReadTimeoutSeconds": --在实例上执行命令的超时时间,只是提醒 I/O timeout
"MySQLTopologyReadTimeoutSeconds": --发现拓扑命令的超时时间
"MySQLConnectTimeoutSeconds": --指连接所有数据库(backend 和 topology)的超时时间
}
主从延时判断
通过设置 CHANGE MASTER TO MASTER_DELAY = 180; 来影响 Seconds_Behind_Master 延时
{
"ReasonableReplicationLagSeconds": 60,
}