1.Prometheus部署

prometheus配置环比 prometheus部署_docker

1.2.Prometheus安装(二进制)

tar -zxvf prometheus-2.17.1.linux-amd64.tar.gz
ln -sv /usr/local/src/prometheus-2.17.1.linux-amd64 /usr/local/prometheus

root@prometheus-server:/usr/local# cd /usr/local/prometheus
root@prometheus-server:/usr/local/prometheus# ll
total 142932
drwxr-xr-x 5 3434 3434     4096 Sep  6 11:38 ./
drwxr-xr-x 3 root root     4096 Sep  6 11:35 ../
drwxr-xr-x 2 3434 3434     4096 Mar 27 02:22 console_libraries/
drwxr-xr-x 2 3434 3434     4096 Mar 27 02:22 consoles/
drwxr-xr-x 3 root root     4096 Sep  6 11:38 data/
-rw-r--r-- 1 3434 3434    11357 Mar 27 02:22 LICENSE
-rw-r--r-- 1 3434 3434     3184 Mar 27 02:22 NOTICE
-rwxr-xr-x 1 3434 3434 84338005 Mar 27 00:20 prometheus*
-rw-r--r-- 1 3434 3434      926 Mar 27 02:22 prometheus.yml
-rwxr-xr-x 1 3434 3434 48235996 Mar 27 00:22 promtool*
-rwxr-xr-x 1 3434 3434 13732141 Mar 27 00:22 tsdb*
root@prometheus-server:/usr/local/prometheus#

1.3 创建Prometheus脚本

root@prometheus-server:/usr/local# cat /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
WorkingDirectory=/usr/local/prometheus/
ExecStart=/usr/local/prometheus/prometheus --
config.file=/usr/local/prometheus/prometheus.yml
[Install]
WantedBy=multi-user.target
root@prometheus-server:/usr/local#

1.4 启动prometheus服务

systemctl start  prometheus
systemctl enable prometheus


root@prometheus-server:/usr/local# systemctl status  prometheus
● prometheus.service - Prometheus Server
   Loaded: loaded (/etc/systemd/system/prometheus.service; enabled; vendor preset: enabled)
   Active: active (running) since Sun 2020-09-06 11:38:03 CST; 18min ago
     Docs: https://prometheus.io/docs/introduction/overview/
 Main PID: 2409 (prometheus)
    Tasks: 13 (limit: 4915)
   CGroup: /system.slice/prometheus.service
           └─2409 /usr/local/prometheus/prometheus --

Sep 06 11:38:03 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:03.282Z caller=head.go:624 component=tsdb msg="WAL segment loaded" segment=0 maxSegment=0
Sep 06 11:38:03 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:03.282Z caller=head.go:627 component=tsdb msg="WAL replay completed" duration=664.654µs
Sep 06 11:38:03 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:03.286Z caller=main.go:683 fs_type=EXT4_SUPER_MAGIC
Sep 06 11:38:03 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:03.287Z caller=main.go:684 msg="TSDB started"
Sep 06 11:38:03 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:03.287Z caller=main.go:788 msg="Loading configuration file" filename=prometheus.yml
Sep 06 11:38:04 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:04.066Z caller=main.go:816 msg="Completed loading of configuration file" filename=prometheus.yml
Sep 06 11:38:04 prometheus-server prometheus[2409]: level=info ts=2020-09-06T03:38:04.067Z caller=main.go:635 msg="Server is ready to receive web requests."
Sep 06 11:38:07 prometheus-server systemd[1]: /etc/systemd/system/prometheus.service:9: Unknown lvalue 'config.file' in section 'Service'
Sep 06 11:49:49 prometheus-server systemd[1]: /etc/systemd/system/prometheus.service:9: Unknown lvalue 'config.file' in section 'Service'
Sep 06 11:50:06 prometheus-server systemd[1]: /etc/systemd/system/prometheus.service:9: Unknown lvalue 'config.file' in section 'Service'
root@prometheus-server:/usr/local#

1.5 访问prometheus页面

prometheus配置环比 prometheus部署_docker_02

2.node exporter安装

  • 在每个master node etcd节点上都要安装node exporter
tar xvf node_exporter-0.18.1.linux-amd64.tar.gz
ln -sv /usr/local/src/node_exporter-0.18.1.linux-amd64 /usr/local/node_exporter

root@node1:/usr/local/src# cd /usr/local/node_exporter
root@node1:/usr/local/node_exporter# ls
LICENSE  node_exporter  NOTICE
root@node1:/usr/local/node_exporter# ll
total 16508
drwxr-xr-x 2 3434 3434     4096 Jun  5  2019 ./
drwxr-xr-x 3 root root     4096 Sep  6 10:40 ../
-rw-r--r-- 1 3434 3434    11357 Jun  5  2019 LICENSE
-rwxr-xr-x 1 3434 3434 16878582 Jun  5  2019 node_exporter*
-rw-r--r-- 1 3434 3434      463 Jun  5  2019 NOTICE
root@node1:/usr/local/node_exporter#

2.1 创建node-exporter服务脚本

root@node2:/usr/local/node_exporter# cat /etc/systemd/system/node-exporter.service
[Unit]
Description=Prometheus Node Exporter
After=network.target
[Service]
ExecStart=/usr/local/node_exporter/node_exporter
[Install]
WantedBy=multi-user.target
root@node2:/usr/local/node_exporter#

2.2 启动node-exporter服务

systemctl daemon-reload
systemctl restart node-exporter
systemctl enable node-exporter

2.3 访问node-exporter页面

prometheus配置环比 prometheus部署_docker_03

2.4 prometheus采集node指标数据

root@prometheus-server:/usr/local/prometheus# cat prometheus.yml
# my global config
global:
  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'k8s-master'
    static_configs:
    - targets: ['172.16.62.201:9100','172.16.62.202:9100','172.16.62.203:9100']
  - job_name: 'k8s-node'
    static_configs:
    - targets: ['172.16.62.207:9100','172.16.62.208:9100','172.16.62.209:9100']
  - job_name: 'k8s-etcd'
    static_configs:
    - targets: ['172.16.62.210:9100','172.16.62.211:9100','172.16.62.212:9100']

2.5重启服务

systemctl restart prometheus

2.6 访问prometheus

prometheus配置环比 prometheus部署_Server_04

2.7 prometheus验证node节点监控数据

prometheus配置环比 prometheus部署_prometheus配置环比_05

3.配置grafana

  • 链接

https://grafana.com/grafana/download

3.1 安装

sudo apt-get install -y adduser libfontconfig1
wget https://dl.grafana.com/oss/release/grafana_6.7.2_amd64.deb
sudo dpkg -i grafana_6.7.2_amd64.deb
apt --fix-broken install -y

3.2 配置文件grafana

root@prometheus-server:/etc/grafana# grep ^[a-z] grafana.ini 
protocol = http
http_addr =0.0.0.0
http_port = 3000

3.3 启动服务

systemctl start grafana-server
systemctl enable  grafana-server

3.4 访问grafana

prometheus配置环比 prometheus部署_linux_06

3.4.1 添加数据源选择prometheus

prometheus配置环比 prometheus部署_linux_07

3.4.2保存

prometheus配置环比 prometheus部署_docker_08

3.4.3 import 模板
  • 找到dashboard

prometheus配置环比 prometheus部署_linux_09

3.4.4 导入 模板8919

prometheus配置环比 prometheus部署_prometheus配置环比_10

3.4.5 导入成功

prometheus配置环比 prometheus部署_Server_11

3.4.6 安装插件
#列出插件
 grafana-cli plugins list-remote
#安装插件grafana-piechart-panel 饼图
root@prometheus-server:/etc/grafana# grafana-cli plugins install grafana-piechart-panel
installing grafana-piechart-panel @ 1.6.0
from: https://grafana.com/api/plugins/grafana-piechart-panel/versions/1.6.0/download
into: /var/lib/grafana/plugins

✔ Installed grafana-piechart-panel successfully 

Restart grafana after installing plugins . <service grafana-server restart>
#重启服务
root@prometheus-server:/etc/grafana# service grafana-server restart
root@prometheus-server:/etc/grafana#

4.监控pod资源

prometheus配置环比 prometheus部署_docker_12

4.1 安装cadvisor

cadvisor镜像准备

docker load -i cadvisor_v0.36.0.tar.gz
docker tag gcr.io/google_containers/cadvisor:v0.36.0 harbor.haostack.com/prometheus/gcr.io/google_containers/cadvisor:v0.36.0
docker push  harbor.haostack.com/prometheus/gcr.io/google_containers/cadvisor:v0.36.0

4.2 安装cadvisor

  • 在每个node节点上都要安装cadvisor
root@prometheus-server:/data# docker run \
> --volume=/:/rootfs:ro \
> --volume=/var/run:/var/run:rw \
> --volume=/sys:/sys:ro \
> --volume=/var/lib/docker/:/var/lib/docker:ro \
> --volume=/dev/disk/:/dev/disk:ro \
> --publish=8080:8080 \
> --detach=true \
> --name=cadvisor \
> harbor.haostack.com/prometheus/gcr.io/google_containers/cadvisor:v0.36.0
1c3dba5e036df2c668a4018b5011c58ffbbe7de84b28953682c2559912eab3d0

4.3 访问cadvisor web页面

prometheus配置环比 prometheus部署_prometheus配置环比_13

prometheus配置环比 prometheus部署_Server_14

prometheus配置环比 prometheus部署_linux_15

4.4 利用prometheus采集cadvisor数据

4.4.1 添加jod_name
- job_name: 'k8s-pods-cadvisor'
    static_configs:
    - targets: ['172.16.62.207:8080','172.16.62.208:8080','172.16.62.209:8080']
4.4.2 重启prometheus
systemctl restart prometheus
4.4.3 验证数据

prometheus配置环比 prometheus部署_Server_16

4.5 grafana添加pod模板

4.5.1 8588

prometheus配置环比 prometheus部署_docker_17

4.5.2 导入模板8588

prometheus配置环比 prometheus部署_linux_18

4.5.3 导入模板395

prometheus配置环比 prometheus部署_prometheus配置环比_19

4.5.4 导入模板893

prometheus配置环比 prometheus部署_docker_20

5 Prometheus报警配置

prometheus配置环比 prometheus部署_docker_21

5.1 alertmanager安装

tar -xvf alertmanager-0.20.0.linux-amd64.tar.gz -C /usr/local/src/
ln -sv /usr/local/src/alertmanager-0.20.0.linux-amd64 /usr/local/alertmanager

5.2 配置alertmanager

  • https://prometheus.io/docs/alerting/configuration/ #官方配置文档
root@prometheus-server:/usr/local/alertmanager# cat alertmanager.yml
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '111111111@qq.com'
  smtp_auth_username: '11111111@qq.com'
  smtp_auth_password: '********'
  smtp_hello: '@qq.com'
  smtp_require_tls: false

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'web.hook'
receivers:
- name: 'web.hook'
  #webhook_configs:
  #- url: 'http://127.0.0.1:5001/'
  email_configs:
    - to: '66666666@qq.com'
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

5.3启动alertmanager服务

5.3.1 二进制方式启动
./alertmanager --config.file=./alertmanager.yml
5.3.2 脚本方式启动
root@prometheus-server:/usr/local/alertmanager# cat /etc/systemd/system/alertmanager.service
[Unit]
Description=Prometheus Server
Documentation=https://prometheus.io/docs/introduction/overview/
After=network.target
[Service]
Restart=on-failure
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
[Install]
WantedBy=multi-user.target
root@prometheus-server:/usr/local/alertmanager#

5.4 alertmanager 页面访问

prometheus配置环比 prometheus部署_linux_22

5.5 创建报警规则文件

root@prometheus-server:/usr/local/prometheus# cat rule-uat-k8s.yml 
groups:
  - name: uat_pod.rules
    rules:
       - alert: Pod_all_cpu_usage
         expr: (sum by(name)(rate(container_cpu_usage_seconds_total{image!=""}[5m]))*100) > 10
         for: 5m
         labels:
            severity: critical
            service: pods
         annotations:
            description: 容器 {{ $labels.name }} CPU 资源利用率大于 75% , (current value is {{ $value }})
            summary: Dev CPU 负载告警

       - alert: Pod_all_memory_usage
         expr: sort_desc(avg by(name)(irate(container_memory_usage_bytes{name!=""}[5m]))*100) > 1024*10^3*2
         for: 10m
         labels:
           severity: critical
         annotations:
           description: 容器 {{ $labels.name }} Memory 资源利用率大于 2G , (current value is {{ $value }})
           summary: Dev Memory 负载告警

       - alert: Pod_all_network_receive_usage
         expr: sum by (name) (irate(container_network_receive_bytes_total{container_name="POD"}[1m])) > 1024*1024*50
         for: 10m
         labels:
           severity: critical
         annotations:
           description: 容器 {{ $labels.name }} network_receive 资源利用率大于 50M , (current value is {{ $value }})

root@prometheus-server:/usr/local/prometheus#
5.5.1 prometheus配置文件
# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
        - 172.16.62.213:9093   #指定alertmanager地址

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"
    - "/usr/local/prometheus/rule-uat-k8s.yml"   #指定rules路径
5.5.2 Alerts状态

prometheus配置环比 prometheus部署_prometheus配置环比_23

5.5.3 prometheus web界面验证报警规则

prometheus配置环比 prometheus部署_docker_24

5.5.4 验证报警邮件

prometheus配置环比 prometheus部署_Server_25

6 prometheus监控haproxy

6.1 部署haproxy_exporter

tar -xvf haproxy_exporter-0.10.0.linux-amd64.tar.gz
ln -sv /usr/local/src/haproxy_exporter-0.10.0.linux-amd64 /usr/local/haproxy_exporter


./haproxy_exporter --haproxy.scrape-uri="http://haadmin:123456@172.16.62.204:9999/haproxy-status;csv"

6.2 脚本启动

root@ha1:/usr/local/haproxy_exporter# cat /etc/systemd/system/haproxy-exporter.service
[Unit]
Description=Prometheus haproxy Exporter
After=network.target
[Service]
ExecStart=/usr/local/haproxy_exporter/haproxy_exporter
[Install]
WantedBy=multi-user.target

#启动服务
systemctl start haproxy-exporter
systemctl status haproxy-exporter

6.3 验证haproxy_exporter web 数据

prometheus配置环比 prometheus部署_Server_26

6.4 prometheus server端添加haproxy数据采集

- job_name: 'haproxy-exporter'
    static_configs:
    - targets: ['172.16.62.204:9101']
6.4.1 重启服务
systemctl restart  prometheus

6.5 查看数据

prometheus配置环比 prometheus部署_Server_27

6.6 grafana导入haproxy插件模板

  • 导入模板2428

prometheus配置环比 prometheus部署_prometheus配置环比_28

  • 导入模板367

prometheus配置环比 prometheus部署_docker_29