1、 准备镜像包
在有网络机器下载镜像:
docker pull grafana/grafana:5.3.4
docker save -o grafana.tar grafana/grafana:5.3.4
docker pull busybox
docker save -o busybox.tar busybox
2、 创建PV
mkdir -p /data/testgd
vi /etc/exports
添加
/data/testgd *(rw,sync,no_root_squash)
chmod755 /etc/testgd
systemctl restart nfs
创建文件grafana-volume.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
name: grafana
spec:
capacity:
storage: 1Gi
accessModes:
- ReadWriteOnce
persistentVolumeReclaimPolicy: Recycle
nfs:
server: 192.168.100.94
path: /data/testgd
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana
namespace: kube-ops
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
kubectl apply –f grafana-volume.yaml
3、 使用Job更改目录所属用户
grafana5.1版本后 groupid 更改了引起的问题,需要更改目录所属用户
拷贝busybox.tar,加载busybox
docker loag -i busybox
docker tag busybox 192.168.100.94:80/busybox
docker push 192.168.100.94:80/busybox
创建grafana-chown-job.yaml
apiVersion: batch/v1
kind: Job
metadata:
name: grafana-chown
namespace: kube-ops
spec:
template:
spec:
restartPolicy: Never
containers:
- name: grafana-chown
command: ["chown", "-R", "472:472", "/var/lib/grafana"]
image: 192.168.100.94:80/busybox
imagePullPolicy: IfNotPresent
volumeMounts:
- name: storage
subPath: grafana
mountPath: /var/lib/grafana
volumes:
- name: storage
persistentVolumeClaim:
claimName: grafana
kubectl apply -f grafana-chown-job.yaml
4、 创建grafana
拷贝grafana.tar
docker load –i grafana.tar
docker tag grafana/grafana:5.3.4 192.168.100.94:80/grafana:5.3.4
docker push 192.168.100.94:80/grafana:5.3.4
创建grafana-deploy.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: grafana
namespace: kube-ops
labels:
app: grafana
spec:
revisionHistoryLimit: 10
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: 192.168.100.94:80/grafana:5.3.4
imagePullPolicy: IfNotPresent
ports:
- containerPort: 3000
name: grafana
env:
- name: GF_SECURITY_ADMIN_USER
value: admin
- name: GF_SECURITY_ADMIN_PASSWORD
value: admin321
readinessProbe:
failureThreshold: 10
httpGet:
path: /api/health
port: 3000
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/health
port: 3000
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: 100m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
volumeMounts:
- mountPath: /var/lib/grafana
subPath: grafana
name: storage
securityContext:
fsGroup: 472
runAsUser: 472
volumes:
- name: storage
persistentVolumeClaim:
claimName: grafana
kubectl apply –f grafana-deploy.yaml
5、 创建service
创建grafana-svc.yaml
apiVersion: v1
kind: Service
metadata:
name: grafana
namespace: kube-ops
labels:
app: grafana
spec:
type: NodePort
ports:
- port: 3000
selector:
app: grafana
kubectl apply -f grafana-svc.yaml
[root@master grafana]# kubectl get svc -n kube-ops
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
grafana NodePort 10.109.160.7 <none> 3000:31704/TCP 7s
prometheus NodePort 10.100.170.18 <none> 9090:32698/TCP 3d19h
redis ClusterIP 10.100.232.96 <none> 6379/TCP,9121/TCP 9d
访问192.168.100.94:31704
使用配置的用户名/密码 admin/admin321就可以登录了
6、 配置grafana
点击add datasource添加一个数据源
type选择prometheus
name自己起一个
url 使用http://prometheus:9090 prometheus是之前配置prometheus的service名,9090是端口
acces 选择server(服务器访问模式)
点击Save&Test 测试并保存数据源
下面添加dashboard
从一个有网的机器下载dashboard
https://grafana.com/grafana/dashboards/162/revisions
选择左侧+号- Upload .json File
将dashboard导入
出现了好看的图表
图表显示不正常是因为数据指标与prometheus的不一致造成的
点击edit
修改数据指标
需要和prometheus里的指标对应一下
例如node_memory_MemTotal在prometheus中为node_memory_MemTotal_bytes
Node_cpu 在prometheus中为node_cpu_seconds_total
node_filesystem_size在prometheus中为node_filesystem_size_bytes
node_filesystem_free在prometheus中为node_filesystem_free_bytes
node_filesystem_size在prometheus中为node_filesystem_size_bytes
也可以直接使用已经处理好的json文件导入
处理好的JSON文件
{
"__inputs": [
{
"name": "DS_PROMETHEUS",
"label": "Prometheus",
"description": "",
"type": "datasource",
"pluginId": "prometheus",
"pluginName": "Prometheus"
}
],
"__requires": [
{
"type": "panel",
"id": "singlestat",
"name": "Singlestat",
"version": ""
},
{
"type": "panel",
"id": "graph",
"name": "Graph",
"version": ""
},
{
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "3.1.0"
},
{
"type": "datasource",
"id": "prometheus",
"name": "Prometheus",
"version": "1.0.0"
}
],
"id": null,
"title": "Kubernetes cluster monitoring (via Prometheus)",
"tags": [
"kubernetes"
],
"style": "dark",
"timezone": "browser",
"editable": true,
"hideControls": true,
"sharedCrosshair": true,
"rows": [
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 4,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster memory usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 6,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) )) / count(node_cpu_seconds_total{mode=\"system\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster CPU usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"id": 7,
"interval": null,
"isNew": true,
"links": [],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 4,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"targets": [
{
"expr": "(sum(node_filesystem_size_bytes{device=\"rootfs\"}) - sum(node_filesystem_free_bytes{device=\"rootfs\"}) ) / sum(node_filesystem_size_bytes{device=\"rootfs\"}) * 100",
"interval": "10s",
"intervalFactor": 1,
"metric": "",
"refId": "A",
"step": 10
}
],
"thresholds": "65, 90",
"title": "Cluster Filesystem usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
}
],
"title": "Row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 3,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 3,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (pod_name)( rate(container_cpu_usage_seconds_total{image!=\"\", pod_name!=\"\"}[1m] ) )",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "{{ pod_name }}",
"metric": "container_cpu",
"refId": "A",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Pod CPU usage",
"tooltip": {
"msResolution": true,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
},
{
"collapse": false,
"editable": true,
"height": "250px",
"panels": [
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 2,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (pod_name, image))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "{{ pod_name }}",
"metric": "container_memory_usage:sort_desc",
"refId": "A",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Pod memory usage",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {},
"bars": false,
"datasource": "${DS_PROMETHEUS}",
"decimals": 2,
"editable": true,
"error": false,
"fill": 0,
"grid": {
"threshold1": null,
"threshold1Color": "rgba(216, 200, 27, 0.27)",
"threshold2": null,
"threshold2Color": "rgba(234, 112, 112, 0.22)"
},
"id": 8,
"isNew": true,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sideWidth": 200,
"sort": "current",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "connected",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum by (pod_name) (rate (container_network_receive_bytes_total{name!=\"\"}[1m]) ))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "{{ pod_name }}",
"metric": "network",
"refId": "A",
"step": 10
},
{
"expr": "sort_desc(sum by (pod_name) (rate (container_network_transmit_bytes_total{name!=\"\"}[1m]) ))",
"interval": "10s",
"intervalFactor": 1,
"legendFormat": "{{ pod_name }}",
"metric": "network",
"refId": "B",
"step": 10
}
],
"timeFrom": null,
"timeShift": null,
"title": "Pod Network i/o",
"tooltip": {
"msResolution": false,
"shared": true,
"sort": 0,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"show": true
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"title": "New row"
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"templating": {
"list": []
},
"annotations": {
"list": []
},
"refresh": "10s",
"schemaVersion": 12,
"version": 46,
"links": [],
"gnetId": 162,
"description": "Monitor a Kubernetes cluster using Prometheus TSDB. Shows overall cluster CPU / Memory / Disk usage as well as individual pod statistics. "
}
直接选择这个文件导入
效果图:
7、 安装grafana插件grafana-kubernetes-app
从https://grafana.com/api/plugins/grafana-kubernetes-app/versions/1.0.1/download
下载插件
将下载好的插件包放在共享目录下,我的在/data/testgd/grafana/plugins,目的是在容器内能读取到。也可以拷贝至容器内部,使用docker或kubect命令。
解压文件
Unzip grafana-kubernetes-app-31da38a.zip
rm -rf grafana-kubernetes-app-31da38a.zip
重启grafana
kubectl delete -f grafana-deploy.yaml
kubectl apply -f grafana-deploy.yaml
进入plugins页面http://192.168.100.94:31704/plugins
发现多了kubernetes插件
点进去enable启用
点connetctxxxx配置kubernetes
Name随便写
URL 是kubernetes apiserver的地址
Access选择Server
Auth选中TLSClient Auth 和 With CA Cert
Datasource选择上面建的grafana-prometheus
访问~/.kube/config,其中属性certificate-authority-data、client-certificate-data、client-key-data就对应这 CA 证书、Client 证书、Client 私钥,不过 config 文件里面的内容是base64编码过后的,所以我们这里填写的时候要做base64解码。
这里使用在线base64解码或命令echo “” | base64 -d
点击save
进入kubernetes插件
点击cluster dashboard
点击PodContainer Dashborad
8、 安装email报警
创建configmap grafana-cm.yaml配置企业邮箱
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-config
namespace: kube-ops
data:
grafana.ini: |
[server]
root_url = http://192.168.100.94:31704
[smtp]
enabled = true
host = smtp.sina.cn:25
user = quanwenz@sina.cn
password = zqw000
skip_verify = true
from_address = quanwenz@sina.cn
[alerting]
enabled = true
execute_alerts = true
kubectl apply –f grafana-cm.yaml
将configmap挂载到grafana
修改grafana-deploy.yaml
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: grafana
namespace: kube-ops
labels:
app: grafana
spec:
revisionHistoryLimit: 10
template:
metadata:
labels:
app: grafana
spec:
containers:
- name: grafana
image: 192.168.100.94:80/grafana:5.3.4
imagePullPolicy: IfNotPresent
ports:
- containerPort: 3000
name: grafana
env:
- name: GF_SECURITY_ADMIN_USER
value: admin
- name: GF_SECURITY_ADMIN_PASSWORD
value: admin321
readinessProbe:
failureThreshold: 10
httpGet:
path: /api/health
port: 3000
scheme: HTTP
initialDelaySeconds: 60
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 30
livenessProbe:
failureThreshold: 3
httpGet:
path: /api/health
port: 3000
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: 100m
memory: 256Mi
requests:
cpu: 100m
memory: 256Mi
volumeMounts:
- mountPath: /var/lib/grafana
subPath: grafana
name: storage
- mountPath: /etc/grafana
name: config
securityContext:
fsGroup: 472
runAsUser: 472
volumes:
- name: storage
persistentVolumeClaim:
claimName: grafana
- name: config
configMap:
name: grafana-config
kubectl create –f grafana-cm.yaml
kubectl delete –f grafana-deploy.yaml
kubectl create –f grafana-deploy.yaml
进入grafana
添加alert,发送测试邮件
去接收的邮箱里查看接收的结果