使用prometheus来监控ingress-nginx
ingress-nginx配置了metrics
通过标签去查看ingress的pod
root@guoguo-M5-Pro:/apps/k8s/prometheus# kubectl get pods -n kube-system -l app=ingress-nginx -owide
NAME READY STATUS RESTARTS AGE IP NODE
nginx-ingress-controller-ds6mq 1/1 Running 7 69d 172.17.20.115 172.17.20.115
nginx-ingress-controller-lxt8s 1/1 Running 8 69d 172.17.20.114 172.17.20.114
#分别部署在114 和115机器上 我们要监控这两个pod 的ingress-nginx
过滤一下 暴漏的metrics端口号
root@guoguo-M5-Pro:/apps/k8s/prometheus# kubectl get pods -n kube-system nginx-ingress-controller-ds6mq -o yaml | egrep -A2 "port|metrics"
prometheus.io/port: "10254"
prometheus.io/scrape: "true"
creationTimestamp: "2024-06-16T10:46:41Z"
--
- --enable-metrics=false
- --v=2
env:
--
port: 10254
scheme: HTTP
initialDelaySeconds: 10
--
ports:
- containerPort: 80
hostPort: 80
--
port: 10254
scheme: HTTP
initialDelaySeconds: 10
--
sysctl -w net.ipv4.ip_local_port_range="1024 65535"
sysctl -w kernel.core_uses_pid=0
fi
[root@k8s-master1 ~]# kubectl get svc -n kube-system nginx-ingress-lb
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
nginx-ingress-lb ClusterIP 10.101.79.97 <none> 80/TCP,443/TCP,10254/TCP 69d
看下metrics指标
root@guoguo-M5-Pro:/apps/k8s/prometheus# curl 172.17.20.114:10254/metrics | tail -5f
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 6024 0 6024 0 0 2609k 0 --:--:-- --:--:-- --:--:-- 2941k
# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code.
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 10
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
root@guoguo-M5-Pro:/apps/k8s/prometheus# curl 172.17.20.115:10254/metrics | tail -5f
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 6029 0 6029 0 0 3401k 0 --:--:-- --:--:-- --:--:-- 5887k
# HELP promhttp_metric_handler_requests_total Total number of scrapes by HTTP status code.
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 1
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
可以看到数据
创建个ServiceMonitor
ServiceMonitor
是Prometheus Operator提供的一种自定义资源(Custom Resource, CR),用于定义Prometheus监控服务发现的目标。它允许用户指定哪些Kubernetes服务(Service)和Pod的监控数据应该被Prometheus抓取,以及抓取数据的频率、路径等配置。
root@guoguo-M5-Pro:/apps/k8s/prometheus# vim ingress-nginx-prometheus.yaml
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: ingress-nginx-monitor #监控名称 也就是prometheus ui页面显示
namespace: ingress-nginx
labels:
app: ingress-nginx
spec:
selector: #这个标签要匹配到 被监控监控的标签
matchLabels:
app: ingress-nginx
endpoints:
- port: metrics #目标服务上暴露指标的端口名称。
path: /metrics #目标服务上暴露指标的 HTTP 路径
interval: 30s #监控频率 每隔30s抓取一次
# jobLabel: app #指定应该使用哪个标签来将目标分组为 Prometheus 中的一个作业。在这里,使用 app 标签来标识和分组共享此标签的所有目标为同一作业。
namespaceSelector: #指定 ServiceMonitor 应监控哪些命名空间。
matchNames:
- ingress-nginx #仅选择 ingress-nginx 命名空间
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl get svc -n kube-system nginx-ingress-lb -oyaml
.....
.....
ports:
- name: http
port: 80
protocol: TCP
targetPort: 80
- name: https
port: 443
protocol: TCP
targetPort: 443
- name: metrics #这里就是给这个端口号取个名字 上面的ServiceMonitor.spce.port 引用的
port: 10254
protocol: TCP
targetPort: 10254
selector:
app: ingress-nginx
sessionAffinity: None
type: ClusterIP
status:
loadBalancer: {}
新创建的prometheus 都会遇到一个权限的报错
现在promehtues ui 页面是看不到 监控项的
查看报错
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl -n monitoring logs prometheus-k8s-0 -c prometheus
......
......
level=error ts=2024-08-25T07:07:41.107Z caller=klog.go:96 component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.20.5/tools/cache/reflector.go:167: Failed to watch *v1.Endpoints: failed to list *v1.Endpoints: endpoints is forbidden: User \"system:serviceaccount:monitoring:prometheus-k8s\" cannot list resource \"endpoints\" in API group \"\" in the namespace \"ingress-nginx\""
# 当看到forbidden 就是权限问题
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl -n monitoring logs prometheus-k8s-1 -c prometheus
......
......
level=error ts=2024-08-25T07:09:33.891Z caller=klog.go:96 component=k8s_client_runtime func=ErrorDepth msg="pkg/mod/k8s.io/client-go@v0.20.5/tools/cache/reflector.go:167: Failed to watch *v1.Endpoints: failed to list *v1.Endpoints: endpoints is forbidden: User \"system:serviceaccount:monitoring:prometheus-k8s\" cannot list resource \"endpoints\" in API group \"\" in the namespace \"ingress-nginx\""
去修改prometheus 的集群角色clusterrole
root@guoguo-M5-Pro:/apps/k8s/prometheus/servicemonitor# kubectl edit clusterrole prometheus-k8s
......
...... #rules 部分改为下面 权限
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
登录prometheus ui网站 就可以看到了
serviceMonitor/ingress-nginx/ingress-nginx-monitor/0 (2/2 up)
yaml配置文件也修改下
vim kube-prometheus/manifests/prometheus-clusterRole.yaml
改为
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app.kubernetes.io/component: prometheus
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 2.26.0
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
- nodes/proxy
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- configmaps
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
这样就完成了