可以使用高级调度分为:

  • 节点选择器: nodeSelector、nodeName
  • 节点亲和性调度: nodeAffinity
  • Pod亲和性调度:PodAffinity
  • Pod反亲和性调度:podAntiAffinity

nodeSelector, nodeName

cd; mkdir schedule; cd schedule/

vi pod-demo.yaml
# 内容为
apiVersion: v1
kind: Pod
metadata:
  name: pod-demo
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
  nodeSelector:
    disktype: harddisk


kubectl apply -f pod-demo.yaml 
kubectl get pods

kubectl describe pod  pod-demo
# 运行结果:
Warning  FailedScheduling  2m3s (x25 over 3m15s)  default-scheduler  0/3 nodes are available: 3 node(s) didn't match node selector.

# 打上标签
kubectl label node node2 disktype=harddisk

# 正常启动
kubectl get pods

nodeAffinity

requiredDuringSchedulingIgnoredDuringExecution 硬亲和性 必须满足亲和性。
preferredDuringSchedulingIgnoredDuringExecution 软亲和性 能满足最好,不满足也没关系。

硬亲和性:
matchExpressions : 匹配表达式,这个标签可以指定一段,例如pod中定义的key为zone,operator为In(包含那些),values为 foo和bar。就是在node节点中包含foo和bar的标签中调度
matchFields : 匹配字段 和上面的意思 不过他可以不定义标签值,可以定义

# 选择在 node 有 zone 标签值为 foo 或 bar 值的节点上运行 pod
vi pod-nodeaffinity-demo.yaml 
# 内容为
apiVersion: v1
kind: Pod
metadata:
  name: pod-node-affinity-demo
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
  affinity:
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
        - matchExpressions:
          - key: zone
            operator: In
            values:
            - foo
            - bar

kubectl apply -f pod-nodeaffinity-demo.yaml

kubectl describe pod pod-node-affinity-demo
# 运行结果:
Warning  FailedScheduling  2s (x8 over 20s)  default-scheduler  0/3 nodes are available: 3 node(s) didn't match node selector.

# 给其中一个node打上foo的标签
kubectl label node node1 zone=foo

# 正常启动
kubectl get pods

软亲和性 :

cp pod-nodeaffinity-demo.yaml pod-nodeaffinity-demo-2.yaml 

vi pod-nodeaffinity-demo-2.yaml 
# 内容为
apiVersion: v1
kind: Pod
metadata:
  name: pod-node-affinity-demo-2
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
  affinity:
    nodeAffinity:
      preferredDuringSchedulingIgnoredDuringExecution:
      - preference:
          matchExpressions:
          - key: zone
            operator: In
            values:
            - foo
            - bar
        weight: 60

kubectl apply -f pod-nodeaffinity-demo-2.yaml

podAffinity

Pod亲和性场景,我们的k8s集群的节点分布在不同的区域或者不同的机房,当服务A和服务B要求部署在同一个区域或者同一机房的时候,我们就需要亲和性调度了。

labelSelector : 选择跟那组Pod亲和
namespaces : 选择哪个命名空间
topologyKey : 指定节点上的哪个键

kubectl get pods
kubectl delete pod pod-node-affinity-demo pod-node-affinity-demo-2 pod-demo

cd ~/schedule/

vi pod-required-affinity-demo.yaml 
# 内容为:
apiVersion: v1
kind: Pod
metadata:
  name: pod-first
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
---
apiVersion: v1
kind: Pod
metadata:
  name: pod-second
  labels:
    app: db
    tier: db
spec:
  containers:
  - name: busybox
    image: busybox
    imagePullPolicy: IfNotPresent
    command: ["sh","-c","sleep 3600"]
  affinity:
    podAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
      - labelSelector:
          matchExpressions:
          - {key: app, operator: In, values: ["myapp"]}
        topologyKey: kubernetes.io/hostname


kubectl apply -f pod-required-affinity-demo.yaml 

kubectl get pods -o wide
# 运行结果,两个 pod 在同一 node 节点上
NAME         READY   STATUS    RESTARTS   AGE   IP           NODE
pod-first    1/1     Running   0          11s   10.244.1.6   node1
pod-second   1/1     Running   0          11s   10.244.1.5   node1

podAntiAffinity

Pod反亲和性场景,当应用服务A和数据库服务B要求尽量不要在同一台节点上的时候。

kubectl delete -f pod-required-affinity-demo.yaml 

cp pod-required-affinity-demo.yaml pod-required-anti-affinity-demo.yaml 

vi pod-required-anti-affinity-demo.yaml 
# 内容为
apiVersion: v1
kind: Pod
metadata:
  name: pod-first
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
---
apiVersion: v1
kind: Pod
metadata:
  name: pod-second
  labels:
    app: backend
    tier: db
spec:
  containers:
  - name: busybox
    image: busybox:latest
    imagePullPolicy: IfNotPresent
    command: ["sh","-c","sleep 3600"]
  affinity:
    podAntiAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
      - labelSelector:
          matchExpressions:
          - {key: app, operator: In, values: ["myapp"]}
        topologyKey: kubernetes.io/hostname

kubectl apply -f pod-required-anti-affinity-demo.yaml 

kubectl get pods -o wide
# 运行结果,两个 pod 不在同一个 node
NAME         READY   STATUS    RESTARTS   AGE   IP           NODE
pod-first    1/1     Running   0          5s    10.244.2.4   node2
pod-second   1/1     Running   0          5s    10.244.1.7   node1

kubectl delete -f pod-required-anti-affinity-demo.yaml 


# 如果硬反亲和性定义的标签两个节点都有,则第二个 Pod 没法进行调度,如下面的的 zone=foo
# 给两个 node 打上同一个标签 zone=foo
kubectl label nodes node2 zone=foo
kubectl label nodes node1 zone=foo

vi pod-required-anti-affinity-demo.yaml 
# 内容为:
apiVersion: v1
kind: Pod
metadata:
  name: pod-first
  labels:
    app: myapp
    tier: frontend
spec:
  containers:
  - name: myapp
    image: ikubernetes/myapp:v1
---
apiVersion: v1
kind: Pod
metadata:
  name: pod-second
  labels:
    app: backend
    tier: db
spec:
  containers:
  - name: busybox
    image: busybox:latest
    imagePullPolicy: IfNotPresent
    command: ["sh","-c","sleep 3600"]
  affinity:
    podAntiAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
      - labelSelector:
          matchExpressions:
          - {key: app, operator: In, values: ["myapp"]}
        topologyKey: zone


kubectl apply -f pod-required-anti-affinity-demo.yaml 

kubectl get pods -o wide
# 结果如下,pod-second 没法启动
NAME         READY   STATUS    RESTARTS   AGE   IP           NODE
pod-first    1/1     Running   0          12s   10.244.1.8   node1
pod-second   0/1     Pending   0          12s   <none>       <none>

kubectl delete -f pod-required-anti-affinity-demo.yaml

污点容忍调度(Taint和Toleration)

taints and tolerations 允许将某个节点做标记,以使得所有的pod都不会被调度到该节点上。但是如果某个pod明确制定了 tolerates 则可以正常调度到被标记的节点上。

# 可以使用命令行为 Node 节点添加 Taints:
kubectl taint nodes node1 key=value:NoSchedule

operator可以定义为:
Equal:表示key是否等于value,默认
Exists:表示key是否存在,此时无需定义value

tain 的 effect 定义对 Pod 排斥效果:
NoSchedule:仅影响调度过程,对现存的Pod对象不产生影响;
NoExecute:既影响调度过程,也影响显著的Pod对象;不容忍的Pod对象将被驱逐
PreferNoSchedule: 表示尽量不调度

# 查看节点的 taint
kubectl describe node master
kubectl get pods -n kube-system
kubectl describe pods kube-apiserver-master -n kube-system

# 为 node1 打上污点
kubectl taint node node1 node-type=production:NoSchedule

vi deploy-demo.yaml 
# 内容为:
apiVersion: apps/v1
kind: Deployment
metadata:
  name: myapp-deploy
  namespace: default
spec:
  replicas: 2
  selector:
    matchLabels:
      app: myapp
      release: canary
  template:
    metadata:
      labels:
        app: myapp
        release: canary
    spec:
      containers:
      - name: myapp
        image: ikubernetes/myapp:v1
        ports:
        - name: http
          containerPort: 80



kubectl apply -f deploy-demo.yaml 

kubectl get pods -o wide
# 运行结果:
NAME                            READY   STATUS    RESTARTS   AGE   IP           NODE
myapp-deploy-69b47bc96d-cwt79   1/1     Running   0          5s    10.244.2.6   node2
myapp-deploy-69b47bc96d-qqrwq   1/1     Running   0          5s    10.244.2.5   node2


# 为 node2 打上污点
kubectl taint node node2 node-type=dev:NoExecute

# NoExecute 将会驱逐没有容忍该污点的 pod,因两个node节点都有污点,pod没有定义容忍,导致没有节点可以启动pod
kubectl get pods -o wide
# 运行结果:
NAME                            READY   STATUS    RESTARTS   AGE   IP       NODE
myapp-deploy-69b47bc96d-psl8f   0/1     Pending   0          14s   <none>   <none>
myapp-deploy-69b47bc96d-q296k   0/1     Pending   0          14s   <none>   <none>


# 定义Toleration(容忍)
vi deploy-demo.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: myapp-deploy
  namespace: default
spec:
  replicas: 2
  selector:
    matchLabels:
      app: myapp
      release: canary
  template:
    metadata:
      labels:
        app: myapp
        release: canary
    spec:
      containers:
      - name: myapp
        image: ikubernetes/myapp:v2
        ports:
        - name: http
          containerPort: 80
      tolerations:
      - key: "node-type"
        operator: "Equal"
        value: "production"
        effect: "NoSchedule"


kubectl apply -f deploy-demo.yaml

# pod 容忍 node1 的 tain ,可以在 node1 上运行
ubectl get pods -o wide
NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE
myapp-deploy-65cc47f858-tmpnz   1/1     Running   0          10s   10.244.1.10   node1
myapp-deploy-65cc47f858-xnklh   1/1     Running   0          13s   10.244.1.9    node1


# 定义Toleration,是否存在 node-type 这个key 且 effect 值为 NoSchedule 
vi deploy-demo.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: myapp-deploy
  namespace: default
spec:
  replicas: 2
  selector:
    matchLabels:
      app: myapp
      release: canary
  template:
    metadata:
      labels:
        app: myapp
        release: canary
    spec:
      containers:
      - name: myapp
        image: ikubernetes/myapp:v2
        ports:
        - name: http
          containerPort: 80
      tolerations:
      - key: "node-type"
        operator: "Exists"
        value: ""
        effect: "NoSchedule"

kubectl apply -f deploy-demo.yaml

kubectl get pods -o wide
NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE
myapp-deploy-559f559bcc-6jfqq   1/1     Running   0          10s   10.244.1.11   node1
myapp-deploy-559f559bcc-rlwp2   1/1     Running   0          9s    10.244.1.12   node1


##定义Toleration,是否存在 node-type 这个key 且 effect 值为空,则包含所有的值 
vi deploy-demo.yaml 
apiVersion: apps/v1
kind: Deployment
metadata:
  name: myapp-deploy
  namespace: default
spec:
  replicas: 2
  selector:
    matchLabels:
      app: myapp
      release: canary
  template:
    metadata:
      labels:
        app: myapp
        release: canary
    spec:
      containers:
      - name: myapp
        image: ikubernetes/myapp:v2
        ports:
        - name: http
          containerPort: 80
      tolerations:
      - key: "node-type"
        operator: "Exists"
        value: ""
        effect: ""

kubectl apply -f deploy-demo.yaml

# 两个 pod 均衡调度到两个节点
kubectl get pods -o wide
NAME                            READY   STATUS    RESTARTS   AGE   IP            NODE
myapp-deploy-5d9c6985f5-hn4k2   1/1     Running   0          2m    10.244.1.13   node1
myapp-deploy-5d9c6985f5-lkf9q   1/1     Running   0          2m    10.244.2.7    node2