背景:基于两台物理服务器,搭建的VMware环境,创建若干服务器

ip地址

ServerName

User

192.168.1.220

master-1

zdgk

192.168.1.221

master-2

zdgk

192.168.1.222

master-3

zdgk

192.168.1.223

worknode-1

zdgk

192.168.1.224

worknode-2

zdgk

192.168.1.225

sqlserver-1

zdgk

192.168.1.226

sqlserver-2

zdgk

192.168.1.229

虚拟地址

-

//Gitlib目录:

esxi k8s esxi k8s集群_linux


预想会用到

Kubernetes.md (文档主体)    
 calico.yml (every master and work-node 用)    
 docker_registry.md (master-1 用)
 prepare_k8s_images.sh (master-1 用)

Kubernetes.md 内置配置过程主体
docker_registry.md 内置docker注册
prepare_k8s_images.sh 内置k8s镜像获取安装

以上会上传个人资源

1. prerequisite:

Private docker registry
Preparing k8s images
准备docker注册
准备k8s镜像

2. Node Preparation

sudo vi  /etc/hosts:
192.168.1.220 gcr.io
192.168.1.220 k8s.gcr.io
192.168.1.220 quay.io
192.168.1.220 cr.zdgk.com.cn
192.168.1.220 cluster-endpoint
修改hosts文件为以上形式,替换ip地址
 //server ip 地址
 //每台机器的hosts文件 都如此 指向安装docker 厂库的服务器
sudo netplan apply
//重启

3. Change Hostname:

vi /etc/cloud/cloud.cfg -> 
preseve hostname: true
hostnamectl set-hostname <hostname>
//修改主机名
//cloud 路径下找到合适文件
sudo su
reboot
//重启

4. Disable Swap:

sudo vi /etc/fstab, remvoe swap line

sudo reboot
//将 /etc/fstab 文件中的 swap 相关行注释: # /swap.img     none    swap    sw      0       0
// free -m 可检查 输出内容 swap 均为 0 则 正确

5. Letting iptables see bridged traffic

//linux透明防火墙 or 桥接模式防火墙(Bridge Firewall)
sudo su
echo br_netfilter >> /etc/modules-load.d/modules.conf
modprobe br_netfilter
//将br_netfilter 写到自动加载模块内
lsmod | grep br_netfilter
//查看 自动加载模块 br_netfilter 的状态
cat <<EOF > /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
//要求iptables对bridge的数据进行处理
//网络桥可见  k8s需要网络桥转发
sysctl --system
//应用设置  呼出 没有error

以上基础环境配置

6. Install Docker:

//每台master节点上进行
sudo apt-get update
//更新软件列表
apt-get install -y \
apt-transport-https ca-certificates curl software-properties-common gnupg2
//安装所需要的package 必要工具
//apt  http 形式 运输端口  ca-证明  上传/下载  软件 通用 工具   并加密
//安装好apt的https模块,下载并验证了证书。
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
// 添加docker正式密钥
// 添加Docker 官方的 GPG 密钥(为了确认所下载软件包的合法性,需要添加软件源的 GPG 密钥)
add-apt-repository \
  "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
  $(lsb_release -cs) \
  stable"
//使用源
//设置稳定版本的apt仓库地址
apt-get update
//检查
//刷新源
apt-get install -y \
  containerd.io=1.2.13-1 \
  docker-ce=5:19.03.8~3-0~ubuntu-$(lsb_release -cs) \
  docker-ce-cli=5:19.03.8~3-0~ubuntu-$(lsb_release -cs)
//安装指定版本docker-ce  containerd.io  docker-ce-cli
cat > /etc/docker/daemon.json <<EOF
{
  "insecure-registries" : ["192.168.1.220", "gcr.io", "k8s.gcr.io", "quay.io"],
  "registry-mirrors": ["https://wgs2temd.mirror.aliyuncs.com"],
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-driver": "json-file",
  "log-opts": {
    "max-size": "100m"
  },
  "storage-driver": "overlay2"
}
EOF
//安装进程 ip 地址设置为本机ip  192.168.1.220
//所有机器均指向 docker_registry 的地址
mkdir -p /etc/systemd/system/docker.service.d
//为docker服务创建一个内嵌的systemd目录
systemctl daemon-reload
//更新配置
systemctl restart docker
//重启Docker服务

7. Deploy Private Docker Registry

//docker hub使用hub.docker.com作为公共仓库,与之相对应,我们可以通过registry来搭建自己的私有仓库,提升镜像仓库的访问速度。
//仅在一台机器上进行(master-1)
docker pull registry:2
//拉取镜像
docker run -d -p 80:5000 --restart=always -v /mnt/registry:/var/lib/registry --name registry registry:2
//不需要验证的启动

8.Install Kubernetes

//Debian / Ubuntu Linux 系统
curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
//	获取源 获得密钥并添加
cat <<EOF > /etc/apt/sources.list.d/kubernetes.list
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF
//获得源
apt-get update
//更新
apt-get install -y kubelet kubeadm kubectl
//默认同意安装 kubelet 、 kubeadm 、 kubectl,一次安装以上
//kubelet:运行在cluster所有节点上,负责启动POD和容器
//kubeadm:用于初始化cluster
//kubectl:kubectl是kubenetes命令行工具,通过kubectl可以部署和管理应用,查看各种资源,创建,删除和更新组件
apt-mark hold kubelet kubeadm kubectl
//保持 kubelet 、 kubeadm 、 kubectl 版本不允许自动更新, 一次设置以上

9. 拉取k8s镜像

//此操作仅在一台Master上进行  
//拉取镜像的操作,类似于向仓库内放置公共物品,仅一台机器进行就行
#!/bin/bash

urls=($1/pause:3.2=registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.2
  $1/kube-controller-manager:v1.18.3=registry.cn-hangzhou.aliyuncs.com/google_containers/kube-controller-manager:v1.18.3  $1/kube-scheduler:v1.18.3=registry.cn-hangzhou.aliyuncs.com/google_containers/kube-scheduler:v1.18.3
  $1/kube-proxy:v1.18.3=registry.cn-hangzhou.aliyuncs.com/google_containers/kube-proxy:v1.18.3
  $1/kube-apiserver:v1.18.3=registry.cn-hangzhou.aliyuncs.com/google_containers/kube-apiserver:v1.18.3
  $1/etcd:3.4.3-0=registry.cn-hangzhou.aliyuncs.com/google_containers/etcd:3.4.3-0
  $1/coredns:1.6.7=registry.cn-hangzhou.aliyuncs.com/google_containers/coredns:1.6.7
  $1/kubernetes-ingress-controller/nginx-ingress-controller:0.31.0=registry.cn-hangzhou.aliyuncs.com/google_containers/nginx-ingress-controller:0.31.0)

function prepare_k8s_images() {
  for url in ${urls[@]}; do
    IFS='=' read -r key value <<<"$url"
    docker pull ${value} && docker tag ${value} ${key} && docker push ${key} && docker rmi ${key} && docker rmi ${value}  done
}

if [ $# -ne 1 ]; then
  echo "Usage: $0 <registry>"
  exit 1
fi

prepare_k8s_images
//输出结果为 Pushed 不为 Refused
//eg : 4f6d2be816a7: Pushed   ea77d71c6777: Pushed   f61f67cca0be: Pushed
以上Master-1(主服务器)的配置完毕

10. Keepalived

数据库/存储服务器进行
与 master and work-node 相对独立,不需要进行docker and k8s 的 install

安装 Keepalived

sudo apt install keepalived
//安装keepalived
sudo  vi /etc/keepalived.conf
//编辑并创建keepalived.conf 初始下载内容为空
global_defs {

}

vrrp_instance k8s_control_plane {
    state <MASTER | BACKUP>
    interface ens160 #//虚拟 ip 绑定的网卡
    virtual_router_id 101
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass k8s
    }

    unicast_src_ip 192.168.1.225 #// 当前打开窗口的ip地址
    unicast_peer {
        192.168.1.226 #//指向数据库/存储服务器的ip地址
    }

    virtual_ipaddress {
        192.168.1.229 #// 192.168.1.229 虚拟机地址
    }
}
//以上文件配置完毕后
ping 192.168.1.229
//此虚拟机地址在ifconfig命令中不显示,ping 一下虚拟地址,看是否可以ping通
sudo systemctl restart keepalived
//重启
//另一台数据库/存储服务器进行相反操作 即 ip 地址进行调换

11.HAProxy

数据库/存储服务器进行

安装HAProxy


安装HAProxy

apt-get install haproxy -y
//安装 haproxy
sudo vi  /etc/haproxy/haproxy.cfg:

Append the following contents to /etc/haproxy/haproxy.cfg:

//在原文档的基础上 进行如下内容添加
frontend k8s
        bind 0.0.0.0:6443
        mode tcp
        option tcplog
        default_backend k8s-masters

backend k8s-masters
        mode tcp
        option tcplog
        option tcp-check
        balance roundrobin
        server k8s-master-01 192.168.1.220:6443 check
        server k8s-master-02 192.168.1.221:6443 check
        server k8s-master-03 192.168.1.222:6443 check

frontend esis-auth
        bind 0.0.0.0:50012
        mode tcp
        option tcplog
        default_backend esis-auth-services

backend esis-auth-services
        mode tcp
        option tcplog
        balance roundrobin
        server k8s-master-01 192.168.1.220:31134 check
        server k8s-master-02 192.168.1.221:31134 check
        server k8s-master-03 192.168.1.222:31134 check
        server k8s-worker-01 192.168.1.223:31134 check
        server k8s-worker-02 192.168.1.224:31134 check

frontend esis-ingress
        bind 0.0.0.0:50080
        mode tcp
        option tcplog
        default_backend esis-ingress

backend esis-ingress
        mode tcp
        option tcplog
        balance roundrobin
        server k8s-master-01 192.168.1.220:30753 check
        server k8s-master-02 192.168.1.221:30753 check
        server k8s-master-03 192.168.1.222:30753 check
        server k8s-worker-01 192.168.1.223:30753 check
        server k8s-worker-02 192.168.1.224:30753 check
//根据实际ip地址 进行修改 端口号不需要修改
systemctl start haproxy
//启动
以上数据库/存储服务器部署完毕

Creating HA k8s Cluster with kubeadm

12. Initialize First Control Plane Node

sudo kubeadm init --image-repository=192.168.1.220 --control-plane-endpoint=cluster-endpoint --kubernetes-version=1.18.3 --pod-network-cidr=10.96.0.0/16 --upload-certs
//  初始化操作 仅在主Master上进行 其他节点执行 kubeadm join 命令
//--image-repository=192.168.1.220 所有的服务器上均执行此操作 地址指向 kubenets 镜像安装服务器地址
                                                                                                                                                                                    					
// 执行完此操作后,会生成针对于 Join Control Plane Node / Join Work Node 的内容  替换 Join Control Plane Node / Join Work Node 内的 token 和 hash 值
// 如果中间出现了报错中断, 执行: sudo kubeadm reset

13. Deploy Network Addon (Calico)

// 每台机器都需要进行此操作
// By default, Calico uses 192.168.0.0/16 as the Pod network CIDR, though this can be configured in the calico.yaml file.
// Calico是一个基于BGP协议的虚拟网络工具,在数据中心中的虚拟机、容器或者裸金属机器(在这里都称为workloads)只需要一个IP地址就可以使用Calico实现互连,calico-3.1.yaml文件,为k8s提供网络支持

zdgk@maser-1:mkdir -p /tmp
zdgk@maser-1:ls /tmp
//创建tmp文件 ls后为空
d:
//scp calico.yml zdgk@192.168.1.221:~/ 或者用这个直接生成用户目录
D:\>scp calico.yml zdgk@192.168.1.220:/tmp
zdgk@192.168.1.220's password:
calico.yml
// CMD 窗口命令 从本地D导入 yaml文件
zdgk@maser-1:cd /tmp
zdgk@maser-1:/tmp$ ls
zdgk@maser-1:/tmp$ sudo vi calico.yml
//验证
kubectl apply -f /tmp/calico.yml
//kubectl apply -f calico.yml 对应直接执行导入用户目录的文件
//执行
//执行的这一步仅需要在master-1进行

14.Add User Kubectl jurisdiction

//为普通用户添加 kubectl 运行权限,命令内容在初始化成功后的输出内容中可以看到。
//仅需要在master-1 内执行
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config

15.Join Control Plane Node

//配置docker镜像外的 master 进行此操作
//kubeadm join 命令
sudo kubeadm join cluster-endpoint:6443 --token ckcmyx.xpfhcv04aaeeebtr \
    --discovery-token-ca-cert-hash sha256:7a7a5d00b0ce6720668b896019769a612c2d8478279c120a93d7c8a7a6de0670 \
    --control-plane --certificate-key 7c9bafa5e64b68b3ec907ce40fef6e0dd77e0417eea68b3bea3be633217a4ca9
//如果master-1执行kubectl get nodes 不是 Ready 执行 标题14  三行内容

16.Join Work Node

// work_node 节点进行操作
// kubeadm join 命令
sudo kubeadm join cluster-endpoint:6443 --token ckcmyx.xpfhcv04aaeeebtr \
--discovery-token-ca-cert-hash sha256:7a7a5d00b0ce6720668b896019769a612c2d8478279c120a93d7c8a7a6de0670
//如果master-1执行kubectl get nodes 不是 Ready 执行 标题14  第一行内容

17. 常见错误

1. 执行了此命令
sudo apt-get autoremove
导致master-2 系统崩溃 重装

2. master-2 执行 kubeadm join 命令

提示:

error execution phase control-plane-prepare/download-certs: error downloading certs: error downloading the secret: Secret "kubeadm-certs" was not found in the "kube-system" Namespace. This Secret might have expired. Please, run `kubeadm init phase upload-certs --upload-certs` on a control plane to generate a new one
To see the stack trace of this error execute with --v=5 or higher

解决方法:
master-1 执行 kubeadm init phase upload-certs --upload-certs 获取新key
master-1 执行kubeadm token create --print-join-command 获取新token

将输出的token + key

--token rq0fj2.exfkf62cikxkhud7\
     --discovery-token-ca-cert-hash sha256:013fdbf5b63deb699df20e70caf259446bb5c856f847987b135c91b583ee163e\
[upload-certs] Using certificate key:
4bc4f68085a2875d97ccdcfc4b91983bc895cb7b0f69a49659158dd32c6dc359
//替换掉 kubeadm join 命令中的hash key 值 和 token 值
//master-2 执行

3.IP地址配置

安装系统是选择的为Default(静态),不需要对50-cloud-init.yaml 进行操作;
如果选的DCHP(动态),每台机器需要对50-cloud-init.yaml 进行操作;

sudo vi /etc/netplan/50-cloud-init.yaml

修改成如下内容,固定好指定的ip 、网关 、DNS

# This file is generated from information provided by the datasource.  Changes
# to it will not persist across an instance reboot.  To disable cloud-init's
# network configuration capabilities, write a file
# /etc/cloud/cloud.cfg.d/99-disable-network-config.cfg with the following:
# network: {config: disabled}
network:
    ethernets:
        ens160:
            addresses:
            - 192.168.1.222/24
            gateway4: 192.168.1.1
            nameservers:
                addresses:
                - 192.168.1.1
    version: 2
//重启
sudo netplan apply
4.删除虚拟机远程SSH,拒绝访问
[root@hadoop2 ~]# ssh hadoop3
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@    WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED!     @
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
IT IS POSSIBLE THAT SOMEONE IS DOING SOMETHING NASTY!
Someone could be eavesdropping on you right now (man-in-the-middle attack)!
It is also possible that the RSA host key has just been changed.
The fingerprint for the RSA key sent by the remote host is
33:78:e5:9a:e8:c5:47:73:81:a3:9b:4c:b0:ed:af:2d.
Please contact your system administrator.
Add correct host key in /root/.ssh/known_hosts to get rid of this message. //key 找不到了
Offending key in /root/.ssh/known_hosts:2
Password authentication is disabled to avoid man-in-the-middle attacks.
Keyboard-interactive authentication is disabled to avoid man-in-the-middle attacks.
Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password).

解决方法:

//本地客户端执行
ssh-keygen -R +输入服务器的IP
//重新远程,即可访问

5.子Master节点加入 初始化Master节点 提示:
[preflight] Running pre-flight checks

持续一段时间后,提示:

error execution phase preflight: couldn't validate the identity of the API Server: could not find a JWS signature in the cluster-info ConfigMap for token ID "3xglgc"
To see the stack trace of this error execute with --v=5 or higher
//无法找到join命令的中token ID  
//建议重新生成token值及key值,替换掉join命令中的值

6. 子Master节点加入 初始化Master节点后 Node状态为Noready:

初始化节点执行kubectl get nodes

root@master-1:/home/zdgk# kubectl get no
NAME       STATUS     ROLES    AGE   VERSION
master-1   Ready      master   42h   v1.18.6
master-2   NotReady   master   50s   v1.18.6
master-3   Ready      master   24h   v1.18.6
//执行了join命令的master-2为NotReady

查看join命令后的提示

This node has joined the cluster and a new control plane instance was created:

* Certificate signing request was sent to apiserver and approval was received.
* The Kubelet was informed of the new secure connection details.
* Control plane (master) label and taint were applied to the new node.
* The Kubernetes control plane instances scaled up.
* A new etcd member was added to the local/stacked etcd cluster.

To start administering your cluster from this node, you need to run the following as a regular user:
mkdir -p $HOME/.kube
    sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    sudo chown $(id -u):$(id -g) $HOME/.kube/config
Run 'kubectl get nodes' to see this node join the cluster.
//从这个节点开始管理你的组,你需要去运行以下内容,让它作为一个正式成员:
//  mkdir -p $HOME/.kube
    sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    sudo chown $(id -u):$(id -g) $HOME/.kube/config

//然后去初始化节点执行`'kubectl get nodes'` 查看这个节点是否成功加入组
root@master-1:/home/zdgk# kubectl get no
NAME       STATUS   ROLES    AGE     VERSION
master-1   Ready    master   42h     v1.18.6
master-2   Ready    master   3m56s   v1.18.6
master-3   Ready    master   24h     v1.18.6



7. 如果token 和 hash值 都没有过期 , 但未记录可通过下列命令来查看

1.先获取token

#如果过期可先执行此命令
kubeadm token create    #重新生成token
#列出token
kubeadm token list  | awk -F" " '{print $1}' |tail -n 1

2.获取CA公钥的哈希值

openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed  's/^ .* //'

kubeadm join cluster-endpoint:6443 --token xxx \
    --discovery-token-ca-cert-hash sha256:xxxx \
    --control-plane --certificate-key xxx

kubeadm join cluster-endpoint:6443 --token xxx \
    --discovery-token-ca-cert-hash sha256:xxx

8.Install Kubernets
执行 curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -  
报错 gpg: no valid OpenPGP data found.

解放方法:

sudo apt-get update

刷新后重新获取源

root@worknode-2:~# curl https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   653  100   653    0     0    115      0  0:00:05  0:00:05 --:--:--   157
OK  // 成功获取