👨🏻🎓博主介绍:大家好!我是李大白,一名运维容器运维工程师,热爱分享知识🌟
🌈擅长领域:云原生、数据库、自动化运维
🙏🏻如果本文章对小伙伴们有帮助的话,🍭关注+👍🏻点赞+🗣评论+📦收藏!
🤝如果在文章描述时如有错,恳请各位大佬指正,在此感谢!!!
🍂 落叶而知秋,博闻而强识!
📕 精品专栏:Harbor进阶实战(企业级)
一、Metrics简介
metrics-server 是一个集群范围内的资源数据集和工具,同样的,metrics-server 也只是显示数据,并不提供数据存储服务,主要关注的是资源度量 API 的实现,比如 CPU、文件描述符、内存、请求延时等指标,metric-server 收集数据给 k8s 集群内使用,如 kubectl,hpa,scheduler 等。
metrics-server 从 kubelet 中获取资源指标,并通过 Metrics API 在 Kubernetes API 服务器中公开它们,以供 HPA 和 VPA 使用。
如果未安装metrics数据采集插件,执行kubect top node
将会报错:
[root@master ~]# kubectl top nodes
error: Metrics API not available
二、部署 metrics-server 服务
2.1 修改api-server配置
注意:
这个是 k8s 在 1.7 的新特性,如果是 1.16 版本的可以不用添加,1.17 以后要添加。这个参数的作用是 Aggregation 允许在不修改 Kubernetes 核心代码的同时扩展 Kubernetes API。
生产环境多master要逐个修改,验证成功后再修改下一个master上的api-server,否则会出现异常。
- 在/etc/kubernetes/manifests 里面改一下 apiserver 的配置
# vim /etc/kubernetes/manifests/kube-apiserver.yaml
apiVersion: v1
kind: Pod
metadata:
annotations:
kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.2.60:6443
creationTimestamp: null
labels:
component: kube-apiserver
tier: control-plane
name: kube-apiserver
namespace: kube-system
spec:
containers:
- command:
- kube-apiserver
- --advertise-address=192.168.2.60
- --enable-aggregator-routing=true
......
- 重新更新 apiserver 配置:
[root@sc-master1 <sub>]# kubectl apply -f /etc/kubernetes/manifests/kube-apiserver.yaml
[root@sc-master1 </sub>]# kubectl get pods -n kube-system
NAME READY STATUS RESTARTS AGE
etcd-sc-master1 1/1 Running 0 33m
kube-apiserver 0/1 CrashLoopBackOff 6 10m
kube-apiserver-sc-master1 1/1 Running 0 90s
[root@sc-master1 ~]# kubectl delete pod kube-apiserver -n kube-system
2.2 下载镜像
需要的镜像是:
- k8s.gcr.io/metrics-server-amd64:v0.3.6
- k8s.gcr.io/addon-resizer:1.8.4
国内无法下载k8s.gcr.io的镜像,我们就从阿里云镜像站下载。
$ sudo docker pull registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.3.6
$ sudo docker pull registry.aliyuncs.com/google_containers/addon-resizer:1.8.4
$ sudo docker tag registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.3.6 metrics-server-amd64:v0.3.6
$ sudo docker tag registry.aliyuncs.com/google_containers/addon-resizer:1.8.4 addon-resizer:1.8.4
$ sudo docker rmi registry.aliyuncs.com/google_containers/metrics-server-amd64:v0.3.6
$ sudo docker rmi registry.aliyuncs.com/google_containers/addon-resizer:1.8.4
2.3 编写yaml资源清单文件
$ vim metrics.yaml
---
apiVersion v1
kind ServiceAccount #创建SA服务账户,然后给该用户进行RBAC授权
metadata
name metrics-server
namespace kube-system
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
---
apiVersion rbac.authorization.k8s.io/v1
kind RoleBinding
metadata
name metrics-server-auth-reader
namespace kube-system
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
roleRef
apiGroup rbac.authorization.k8s.io
kind Role
name extension-apiserver-authentication-reader
subjects
kind ServiceAccount
name metrics-server
namespace kube-system
---
apiVersion rbac.authorization.k8s.io/v1
kind ClusterRoleBinding
metadata
name metrics-server system auth-delegator
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
roleRef
apiGroup rbac.authorization.k8s.io
kind ClusterRole
name system auth-delegator
subjects
kind ServiceAccount
name metrics-server
namespace kube-system
---
apiVersion rbac.authorization.k8s.io/v1
kind ClusterRole
metadata
name system metrics-server
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
rules
apiGroups
""
resources
pods
nodes
nodes/stats
namespaces
verbs
get
list
watch
apiGroups
"extensions"
resources
deployments
verbs
get
list
update
watch
---
apiVersion rbac.authorization.k8s.io/v1
kind ClusterRoleBinding
metadata
name system metrics-server
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
roleRef
apiGroup rbac.authorization.k8s.io
kind ClusterRole
name system metrics-server
subjects
kind ServiceAccount
name metrics-server
namespace kube-system
---
apiVersion v1
kind ConfigMap
metadata
name metrics-server-config
namespace kube-system
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode EnsureExists
data
NannyConfiguration -
apiVersion nannyconfig/v1alpha1
kind NannyConfiguration
---
apiVersion apps/v1
kind Deployment
metadata
name metrics-server
namespace kube-system
labels
k8s-app metrics-server
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
version v0.3.6
spec
selector
matchLabels
k8s-app metrics-server
version v0.3.6
template
metadata
name metrics-server
labels
k8s-app metrics-server
version v0.3.6
annotations
scheduler.alpha.kubernetes.io/critical-pod''
seccomp.security.alpha.kubernetes.io/pod'docker/default'
spec
priorityClassName system-cluster-critical
serviceAccountName metrics-server
containers
name metrics-server
image k8s.gcr.io/metrics-server-amd64 v0.3.6
imagePullPolicy IfNotPresent
command
/metrics-server
--metric-resolution=30s
--kubelet-preferred-address-types=InternalIP
--kubelet-insecure-tls
ports
containerPort443
name https
protocol TCP
name metrics-server-nanny
image k8s.gcr.io/addon-resizer1.8.4
imagePullPolicy IfNotPresent
resources
limits
cpu 100m
memory 300Mi
requests
cpu 5m
memory 50Mi
env
name MY_POD_NAME
valueFrom
fieldRef
fieldPath metadata.name
name MY_POD_NAMESPACE
valueFrom
fieldRef
fieldPath metadata.namespace
volumeMounts
name metrics-server-config-volume
mountPath /etc/config
command
/pod_nanny
--config-dir=/etc/config
--cpu=300m
--extra-cpu=20m
--memory=200Mi
--extra-memory=10Mi
--threshold=5
--deployment=metrics-server
--container=metrics-server
--poll-period=300000
--estimator=exponential
--minClusterSize=2
volumes
name metrics-server-config-volume
configMap
name metrics-server-config
tolerations
key"CriticalAddonsOnly"
operator"Exists"
key node-role.kubernetes.io/master
effect NoSchedule
---
apiVersion v1
kind Service
metadata
name metrics-server
namespace kube-system
labels
addonmanager.kubernetes.io/mode Reconcile
kubernetes.io/cluster-service"true"
kubernetes.io/name"Metrics-server"
spec
selector
k8s-app metrics-server
ports
port443
protocol TCP
targetPort https
---
apiVersion apiregistration.k8s.io/v1
kind APIService
metadata
name v1beta1.metrics.k8s.io
labels
kubernetes.io/cluster-service"true"
addonmanager.kubernetes.io/mode Reconcile
spec
service
name metrics-server
namespace kube-system
group metrics.k8s.io
version v1beta1
insecureSkipTLSVerifytrue
groupPriorityMinimum100
versionPriority100
2.4 更新资源清单文件
[root@master ~]# kubectl apply -f metrics.yaml
clusterrolebinding.rbac.authorization.k8s.io/metrics-server:system:auth-delegator created
rolebinding.rbac.authorization.k8s.io/metrics-server-auth-reader created
serviceaccount/metrics-server created
clusterrole.rbac.authorization.k8s.io/system:metrics-server created
clusterrolebinding.rbac.authorization.k8s.io/system:metrics-server created
configmap/metrics-server-config created
deployment.apps/metrics-server created
service/metrics-server created
apiservice.apiregistration.k8s.io/v1beta1.metrics.k8s.io created
2.5 服务状态检查
- 查看Pod状态
[root@master ~]# kubectl -n kube-system get pods -owide | grep metrics
metrics-server-6697ccf6fb-pxsz7 2/2 Running 0 5m26s 10.244.167.132 node <none> <none>
- 查看Metrics Service
[root@master ~]# kubectl -n kube-system get service
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP,9153/TCP 22h
metrics-server ClusterIP 10.106.165.248 <none> 443/TCP 7m15s
三、Metrics使用
Metrics主要是配合监控系统使用,例如Prometheus监控系统。
3.1 查看集群节点资源
[root@master ~]# kubectl top nodes
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
master 120m 6% 1225Mi 66%
node 70m 3% 821Mi 44%
3.2 查看指定命名空间下Pod占用资源
[root@master ~]# kubectl -n kube-system top pods
NAME CPU(cores) MEMORY(bytes)
calico-kube-controllers-755f6449f-882x7 1m 20Mi
calico-node-bw4t2 18m 94Mi
calico-node-lf66z 19m 93Mi
coredns-7f89b7bc75-c7h6x 2m 15Mi
coredns-7f89b7bc75-zmfjn 2m 14Mi
etcd-master 7m 35Mi
kube-apiserver-master 31m 331Mi
kube-controller-manager-master 11m 51Mi
kube-proxy-n65fb 1m 42Mi
kube-proxy-tdxn4 1m 21Mi
kube-scheduler-master 2m 25Mi
metrics-server-6697ccf6fb-pxsz7 1m 16Mi