---平台暂定为单节点,如有需要,可扩展为高可用集群


  1. 环境准备
    ​cat /etc/redhat-release CentOS Linux release 7.6.1810 (Core) cat >> /etc/security/limits.conf <<EOF #增大环境描述符 root soft nofile 65535 root hard nofile 65535 * soft nproc 65535 * hard nproc 65535 * soft nofile 65535 * hard nofile 65535 EOF echo "ulimit -SH 65535" >> /etc/rc.local ulimit -SH 65535 curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo curl -o /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo mkdir /soft mkdir /application ​​2.安装步骤
    ​1.安装prometheus server #进入软件目录 cd /application yum install git -y git clone https://github.com/prometheus/prometheus.git ln -s prometheus-2.18.0-rc.0.linux-amd64 prometheus cd prometheus make build ./prometheus --config.file=your_config.yml #配置启动文件 vi /etc/systemd/system/prometheus.service [Unit] Description=Prometheus Monitoring System Documentation=Prometheus Monitoring System [Service] ExecStart=/application/prometheus/prometheus \ --config.file=/application/prometheus/prometheus.yml \ --web.listen-address=:9090 \ --web.enable-lifecycle \ --storage.tsdb.retention=30d \ --web.read-timeout=5m \ --web.max-connections=512 \ --web.external-url=::9090 \ --web.route-prefix=/application/prometheus \ --web.user-assets=/application/prometheus \ --web.enable-lifecycle \ --web.enable-admin-api [Install] WantedBy=multi-user.target systemctl deamon-reload netstat -ltnp|grep 9090 2.安装mysqld—exporter 前提:创建用户 CREATE USER 'exporter'@'localhost' IDENTIFIED BY 'XXXXXXXX' WITH MAX_USER_CONNECTIONS 3; GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'localhost'; 下载安装文件 wget https://github.com/prometheus/mysqld_exporter/releases/download/v0.12.1/mysqld_exporter-0.12.1.linux-amd64.tar.gz cd /application tar xf prometheus-2.18.0-rc.0.linux-amd64.tar.gz ln -s mysqld_exporter-0.12.1.linux-amd64 mysqld_exporter 配置mysql用户密码 vim .my.cnf [client] user=xxxx password=xxxx 启动服务 nohup ./mysqld_exporter --collect.auto_increment.columns --no-collect.auto_increment.columns --config.my-cnf=.my.cnf & 安装grafana sudo nano /etc/yum.repos.d/grafana.repo vim /etc/yum.repos.d/grafana.repo [grafana] name=grafana baseurl=https://packages.grafana.com/enterprise/rpm repo_gpgcheck=1 enabled=1 gpgcheck=1 gpgkey=https://packages.grafana.com/gpg.key sslverify=1 sslcacert=/etc/pki/tls/certs/ca-bundle.crt #安装 yum install grafana-enterprise -y systemctl start grafana netstat -ltnp|grep 3000 #安装alertmanager wget https://github.com/prometheus/alertmanager/releases/download/v0.20.0/alertmanager-0.20.0.linux-amd64.tar.gz cd /application tar xf alertmanager-0.20.0.linux-amd64.tar.gz ln -s alertmanager-0.20.0.linux-amd64 alertmanager #稍后启动 nohup ./alertmanager --config.file=alertmanage.yml & ​

3.配置文件

#主配置文件
[root@prometheus prometheus]# cat prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 127.0.0.1:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "first_rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['10.0.0.15:9090']
- job_name: 'zabbix-server'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['10.0.0.201:9100']
- job_name: 'zabbix-server-mysql'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['10.0.0.201:9104']
- job_name: 'test-mysql'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['10.0.0.15:9104']

#规则配置文件
groups:
- name: MySQLStatsAlert
rules:
- alert: MySQL is down
expr: mysql_up == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} MySQL is down"
description: "MySQL database is down. This requires immediate action!"
- alert: open files high
expr: mysql_global_status_innodb_num_open_files > (mysql_global_variables_open_files_limit) * 0.75
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} open files high"
description: "Open files is high. Please consider increasing open_files_limit."
- alert: Read buffer size is bigger than max. allowed packet size
expr: mysql_global_variables_read_buffer_size > mysql_global_variables_slave_max_allowed_packet
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Read buffer size is bigger than max. allowed packet size"
description: "Read buffer size (read_buffer_size) is bigger than max. allowed packet size (max_allowed_packet).This can break your replication."
- alert: Sort buffer possibly missconfigured
expr: mysql_global_variables_innodb_sort_buffer_size <256*1024 or mysql_global_variables_read_buffer_size > 4*1024*1024
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Sort buffer possibly missconfigured"
description: "Sort buffer size is either too big or too small. A good value for sort_buffer_size is between 256k and 4M."
- alert: Thread stack size is too small
expr: mysql_global_variables_thread_stack <196608
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Thread stack size is too small"
description: "Thread stack size is too small. This can cause problems when you use Stored Language constructs for example. A typical is 256k for thread_stack_size."
- alert: Used more than 80% of max connections limited
expr: mysql_global_status_max_used_connections > mysql_global_variables_max_connections * 0.8
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Used more than 80% of max connections limited"
description: "Used more than 80% of max connections limited"
- alert: InnoDB Force Recovery is enabled
expr: mysql_global_variables_innodb_force_recovery != 0
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} InnoDB Force Recovery is enabled"
description: "InnoDB Force Recovery is enabled. This mode should be used for data recovery purposes only. It prohibits writing to the data."
- alert: InnoDB Log File size is too small
expr: mysql_global_variables_innodb_log_file_size < 16777216
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} InnoDB Log File size is too small"
description: "The InnoDB Log File size is possibly too small. Choosing a small InnoDB Log File size can have significant performance impacts."
- alert: InnoDB Flush Log at Transaction Commit
expr: mysql_global_variables_innodb_flush_log_at_trx_commit != 1
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} InnoDB Flush Log at Transaction Commit"
description: "InnoDB Flush Log at Transaction Commit is set to a values != 1. This can lead to a loss of commited transactions in case of a power failure."
- alert: Table definition cache too small
expr: mysql_global_status_open_table_definitions > mysql_global_variables_table_definition_cache
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Table definition cache too small"
description: "Your Table Definition Cache is possibly too small. If it is much too small this can have significant performance impacts!"
- alert: Table open cache too small
expr: mysql_global_status_open_tables >mysql_global_variables_table_open_cache * 99/100
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Table open cache too small"
description: "Your Table Open Cache is possibly too small (old name Table Cache). If it is much too small this can have significant performance impacts!"
- alert: Thread stack size is possibly too small
expr: mysql_global_variables_thread_stack < 262144
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Thread stack size is possibly too small"
description: "Thread stack size is possibly too small. This can cause problems when you use Stored Language constructs for example. A typical is 256k for thread_stack_size."
- alert: InnoDB Buffer Pool Instances is too small
expr: mysql_global_variables_innodb_buffer_pool_instances == 1
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} InnoDB Buffer Pool Instances is too small"
description: "If you are using MySQL 5.5 and higher you should use several InnoDB Buffer Pool Instances for performance reasons. Some rules are: InnoDB Buffer Pool Instance should be at least 1 Gbyte in size. InnoDB Buffer Pool Instances you can set equal to the number of cores of your machine."
- alert: InnoDB Plugin is enabled
expr: mysql_global_variables_ignore_builtin_innodb == 1
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} InnoDB Plugin is enabled"
description: "InnoDB Plugin is enabled"
- alert: Binary Log is disabled
expr: mysql_global_variables_log_bin != 1
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Binary Log is disabled"
description: "Binary Log is disabled. This prohibits you to do Point in Time Recovery (PiTR)."
- alert: Binlog Cache size too small
expr: mysql_global_variables_binlog_cache_size < 1048576
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Binlog Cache size too small"
description: "Binlog Cache size is possibly to small. A value of 1 Mbyte or higher is OK."
- alert: Binlog Statement Cache size too small
expr: mysql_global_variables_binlog_stmt_cache_size <1048576 and mysql_global_variables_binlog_stmt_cache_size > 0
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Binlog Statement Cache size too small"
description: "Binlog Statement Cache size is possibly to small. A value of 1 Mbyte or higher is typically OK."
- alert: Binlog Transaction Cache size too small
expr: mysql_global_variables_binlog_cache_size <1048576
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Binlog Transaction Cache size too small"
description: "Binlog Transaction Cache size is possibly to small. A value of 1 Mbyte or higher is typically OK."
- alert: Sync Binlog is enabled
expr: mysql_global_variables_sync_binlog == 1
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Sync Binlog is enabled"
description: "Sync Binlog is enabled. This leads to higher data security but on the cost of write performance."
- alert: IO thread stopped
expr: mysql_slave_status_slave_io_running != 1
for: 1m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} IO thread stopped"
description: "IO thread has stopped. This is usually because it cannot connect to the Master any more."
- alert: SQL thread stopped
expr: mysql_slave_status_slave_sql_running == 0
for: 1m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} SQL thread stopped"
description: "SQL thread has stopped. This is usually because it cannot apply a SQL statement received from the master."
- alert: SQL thread stopped
expr: mysql_slave_status_slave_sql_running != 1
for: 1m
labels:
severity: critical
annotations:
summary: "Instance {{ $labels.instance }} Sync Binlog is enabled"
description: "SQL thread has stopped. This is usually because it cannot apply a SQL statement received from the master."
- alert: Slave lagging behind Master
expr: rate(mysql_slave_status_seconds_behind_master[1m]) >30
for: 1m
labels:
severity: warning
annotations:
summary: "Instance {{ $labels.instance }} Slave lagging behind Master"
description: "Slave is lagging behind Master. Please check if Slave threads are running and if there are some performance issues!"
- alert: Slave is NOT read only(Please ignore this warning indicator.)
expr: mysql_global_variables_read_only != 0
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} Slave is NOT read only"
- name: example
rules:

# Alert for any instance that is unreachable for >5 minutes.
- alert: InstanceDown
expr: up == 0
for: 1m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
- name: mysql
rules:

# Alert for any instance that is unreachable for >5 minutes.
- alert: 主从挂了
expr: mysql_slave_status_slave_io_running == 0
for: 0m
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} 主从"
description: "{{ $labels.instance }} of job {{ $labels.job }} 主从挂了."

#alertmanager报警配置
global:
resolve_timeout: 1m
smtp_smarthost: 'smtp.qq.com:465'
smtp_from: 'xxxxxxx@qq.com'
smtp_auth_username: 'xxxxxxx@qq.com'
smtp_auth_password: 'xxxxxxx'
smtp_require_tls: false

route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
email_configs:
- to: 'xxxxxxx@126.com'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']


  1. grafana配置监控项
  2. 备用配合方案 zabbix
    ​1.配置初始环境 增大文件描述符 (三台都操作) cat >> /etc/security/limits.conf <<EOF root soft nofile 65535 root hard nofile 65535 * soft nproc 65535 * hard nproc 65535 * soft nofile 65535 * hard nofile 65535 EOF echo "ulimit -SH 65535" >> /etc/rc.local ulimit -SH 65535 2.配置yum环境 curl -o /etc/yum.repos.d/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo curl -o /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo rpm -Uvh https://repo.zabbix.com/zabbix/4.0/rhel/7/x86_64/zabbix-release-4.0-2.el7.noarch.rpm #这边要进行一个换源,要不然会被墙(最近才被墙) 复制zabbix.repo 到其他服务器 3.安装zabbix-server #只在服务端安装 yum install zabbix-server-mysql zabbix-web-mysql zabbix-agent httpd zabbix-get -y #客户端只安装agent yum install zabbix-agent -y 4.安装mysql数据库 yum install mariadb-server -y systemctl start mariadb systemctl enable mariadb 5.导入表结构 mysql_secure_installation mysql -e "create database zabbix character set utf8 collate utf8_bin;" mysql -e "grant all privileges on zabbix.* to zabbix@localhost identified by '123456';" zcat /usr/share/doc/zabbix-server-mysql*/create.sql.gz | mysql zabbix 6,修改配置文件 vi /etc/zabbix/zabbix_server.conf DBHost=localhost DBName=zabbix DBUser=zabbix DBPassword=1qaz@WSX vi /etc/httpd/conf.d/zabbix.conf php_value date.timezone Asia/Shanghai 7.启动 systemctl start zabbix-server systemctl enable zabbix-server systemctl start httpd systemctl enable httpd 8.进入zabbix界面 配置数据库用户密码 账号 Admin 密码 zabbix 9.导入模板 。。。 10.修改模板 cp userparameter_percona_mysql.conf /etc/zabbix/zabbix_agentd.d/ vim ss_get_mysql_stats.php #修改账号密码 vim get_mysql_stats_wrapper.sh #修改账号密码 主从同步的 11.解决图形字符乱码问题 将simkai.ttf 拷贝到/usr/share/fonts/dejavu/ 下 重新创建软链接 ln -s /usr/share/fonts/dejavu/simkai.ttf zabbix-web-font 11.安装测试用数据库(agent) 略 1.data目录755授权,zabbix授权 2》/dev/null #process select super #SELECT, PROCESS, SUPER replication slave, replication client ​​6.zabbix客户端脚本配合
    ​#!/bin/bash #create by dhc DIR=`dirname $0` zabbix_conf=/etc/zabbix/zabbix_agentd.conf source /etc/init.d/functions yum_dir=/etc/yum.repos.d percona_name=percona-zabbix-templates-1.1.8-1.noarch.rpm scripts_dir=/var/lib/zabbix/percona/scripts template_file=/var/lib/zabbix/percona/templates/userparameter_percona_mysql.conf zabbix_keydir=/etc/zabbix/zabbix_agentd.d/ yum_ip=10.0.0.50 #config yum repo yum_repo(){ #curl -o ${yum_dir}/CentOS-Base.repo http://mirrors.aliyun.com/repo/Centos-7.repo #curl -o ${yum_dir}/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo cp $DIR/zabbix.repo ${yum_dir}/ yum clean all yum makecache } #install zabbix... rpm packages yum_install(){ ping -w1 -c1 $yum_ip &>/dev/null if [ $? -eq 0 ];then yum install zabbix-agent php php-mysql -y if [ ! $? -eq 0 ];then action "安装失败,请手动执行 yum install zabbix-agent php php-mysql -y " /bin/false exit fi else action "连接不到$yum_ip.... " /bin/false exit fi } #config zabbix_agent point to zabbix_server zabbix_conf(){ if [ -e $zabbix_conf ];then rm -f $zabbix_conf 2&>/dev/null cp $DIR/zabbix_agentd.conf /etc/zabbix/ sed -i "/^Hostname/c Hostname=`hostname`" $zabbix_conf else action "zabbix_agent 安装失败,请检查 " /bin/false exit fi } zabbix_start(){ systemctl start zabbix-agent } zabbix_restart(){ systemctl restart zabbix-agent } #install percona monitoring templates percona_moni(){ if [ -e $percona_name ];then rpm -ivh $DIR/percona-zabbix-templates-1.1.8-1.noarch.rpm if [ -e $template_file ];then # cp $template_file $zabbix_keydir cp $DIR/userparameter_percona_mysql.conf $zabbix_keydir rm -f $scripts_dir/get_mysql_stats_wrapper.sh && cp $DIR/get_mysql_stats_wrapper.sh $scripts_dir chmod +x $scripts_dir/get_mysql_stats_wrapper.sh else action "$DIR perconna rpm 安装失败 " /bin/false exit fi else action "$DIR perconna rpm包不存在 " /bin/false exit fi } #editer the scripts with mysql user password sock. scripts_conf(){ echo -e "=============================================================" action "install is ok " /bin/true echo -e "=============================================================" echo -e "\n \n \n \n \n " sleep 3 echo -e "*****请在配置文件中修改数据库信息 !!!*****\ \n $scripts_dir/ss_get_mysql_stats.php (30行)\ \n $scripts_dir/get_mysql_stats_wrapper.sh (19行)" } #chown zabbix for /tmp/localhost_file chown_file(){ > /tmp/localhost-mysql_cacti_stats.txt chown zabbix.zabbix /tmp/localhost-mysql_cacti_stats.txt } last(){ /usr/bin/php -q /var/lib/zabbix/percona/scripts/ss_get_mysql_stats.php --host localhost --items gg rm -rf /tmp/localhost-mysql_cacti_stats.txt } yum_repo yum_install zabbix_conf zabbix_start percona_moni scripts_conf chown_file zabbix_restart last ​