标签(空格分隔): kubernetes系列


一: 系统介绍

1.1 安装环境介绍

系统:
    CentOS7.9x64

主机名:
    cat /etc/hosts
  ---
# public network [NAT 网络]

172.16.10.11    flyfish11   
172.16.10.12    flyfish12  
172.16.10.13    flyfish13  
172.16.10.14    flyfish14  
172.16.10.15    flyfish15  

# cluster network [私有网络]

10.16.10.11    ceph01  
10.16.10.12    ceph02  
10.16.10.13    ceph03  
10.16.10.14    ceph04  
10.16.10.15    ceph05  

  ---

系统关闭selinux/firewalld 清空iptables的防火墙
所有的节点做root的无密钥登陆

本次规划前五台机器
    1块200G SSD用作系统盘,
    1块100GB SSD盘用作OSD,
    1块100GB SSD盘用作OSD 
---


1.2 系统初始化

1. 统一网卡命名(选做)
 有部分生产环境机型不同,网卡名有差异,统一命名便于后续管理。

 注意:在线修改网卡名,可能会导致网络中断,请谨慎操作!!

在openeuler 22.03使用在centos7.9上udev绑定网卡名的方式已经不生效,rocky linux 9.2 也不生效,应该是绑定udev方式发生了变化,此处通过修改grub的方式统一网卡名。
- public  网络:eth0
- cluster 网络:eth1

## 在虚拟机中,public 网络的网卡名默认是 ens160(操作系统及版本不同,网卡名可能不同),现在统一命名为 eth0

## i. 先修改网卡文件命名
# mv /etc/sysconfig/network-scripts/ifcfg-ens160 /etc/sysconfig/network-scripts/ifcfg-eth0
# cat > /etc/sysconfig/network-scripts/ifcfg-eth0 <<EOF
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=static
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=no
IPV6_AUTOCONF=no
IPV6_DEFROUTE=no
IPV6_FAILURE_FATAL=no
# modify, default ens160
NAME=eth0
# modify,default ens160
DEVICE=eth0
ONBOOT=yes
IPADDR=192.168.59.241
NETMASK=255.255.255.0
GATEWAY=192.168.59.2
DNS1=223.5.5.5
DNS2=114.114.114.114
EOF
--------------------------------------------------------------------------------------------
## 在虚拟机中,cluster 网络的网卡名默认是 ens192(操作系统及版本不同,网卡名可能不同),现在统一命名为 eth1
## 从 public 网络登录修改 cluster 网络!!!
# mv /etc/sysconfig/network-scripts/ifcfg-ens192 /etc/sysconfig/network-scripts/ifcfg-eth1
# cat > /etc/sysconfig/network-scripts/ifcfg-eth1 <<EOF
TYPE=Ethernet
PROXY_METHOD=none
BROWSER_ONLY=no
BOOTPROTO=static
DEFROUTE=yes
IPV4_FAILURE_FATAL=no
IPV6INIT=no
IPV6_AUTOCONF=no
IPV6_DEFROUTE=no
IPV6_FAILURE_FATAL=no
# modify,default ens192
NAME=eth1
# modify,default ens192
DEVICE=eth1
ONBOOT=yes
IPADDR=10.168.59.241
NETMASK=255.255.255.0
EOF

## 上面用`cat`命令修改网卡,也可以直接用`sed`命令替换网卡中的名字
# sed -i 's/ens160/eth0/g' /etc/sysconfig/network-scripts/ifcfg-eth0
# sed -i 's/ens192/eth1/g' /etc/sysconfig/network-scripts/ifcfg-eth1

## ii. 修改GRUB,同时关闭net.ifnames和biosdevname命名规则。
## 生产环境操作前,建议备份数据
## 在GRUB_CMDLINE_LINUX中增加`net.ifnames=0 biosdevname=0`
# vi /etc/default/grub
GRUB_TIMEOUT=5
GRUB_DISTRIBUTOR="$(sed 's, release .*$,,g' /etc/system-release)"
GRUB_DEFAULT=saved
GRUB_DISABLE_SUBMENU=true
GRUB_TERMINAL_OUTPUT="console"
GRUB_CMDLINE_LINUX="resume=/dev/mapper/vg00-swap rd.lvm.lv=vg00/root rd.lvm.lv=vg00/swap net.ifnames=0 biosdevname=0 cgroup_disable=files apparmor=0 crashkernel=512M rhgb quiet"
GRUB_DISABLE_RECOVERY="true

## iii. 更新grub并重启生效
# grub2-mkconfig -o /boot/grub2/grub.cfg
# reboot
如果系统是UEFI启动,则要执行以下命令
# grub2-mkconfig -o /boot/efi/EFI/centos/grub.cfg
# reboot

上述修改网卡名称,需在ceph01-05上操作,注意配置文件中IP需按实际修改。

  1. 关闭防火墙和selinux
systemctl stop firewalld
systemctl disable firewalld
setenforce 0
sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
  1. 关闭交换分区(选做),生产环境建议关闭交换分区
swapoff -a
sed -i 's/.*swap.*/#&/' /etc/fstab
  1. 修改内核参数及资源限制参数
## 转发 IPv4 并让 iptables 看到桥接流量(选做)
# cat <<EOF | sudo tee /etc/modules-load.d/ceph.conf
overlay
br_netfilter
EOF

# modprobe overlay
# modprobe br_netfilter
# modprobe ip_conntrack
# lsmod | grep br_netfilter
#验证br_netfilter模块

## 修改内核参数
cat <<EOF | tee /etc/sysctl.d/ceph.conf
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
#1. 用于对外连接的随机端口范围。缺省是# 32768    60999
#端口范围开始和结束要奇偶不同,如果设置为1024 65530则在dmesg中会报ip_local_port_range: prefer different parity for start/end values.
net.ipv4.ip_local_port_range = 1024 65335

# 如果dmesg中有类似“nf_conntrack: table full, dropping packet”日志,则需要调大 conntrack 参数,默认是2621440,该值不能太大,否则会出现:nf_conntrack: falling back to vmalloc.
net.netfilter.nf_conntrack_max = 2621440
net.nf_conntrack_max = 2621440
# 指定了进程可以拥有的内存映射区域的最大数目。这个设置对于使用大量内存映射的应用程序很重要
vm.max_map_count = 1048576                      
#2. 如果 netstat -s | grep "buffer errors" 中errors数在增加,则需要调整如下参数
# net.ipv4.tcp_wmem 默认值:4096        16384   4194304
net.ipv4.tcp_wmem = 4096        16384   4194304
#  net.ipv4.tcp_rmem 默认值:4096  87380  6291456
net.ipv4.tcp_rmem = 4096  87380  6291456
# net.ipv4.tcp_mem 默认值:381462  508616  762924
net.ipv4.tcp_mem = 381462  508616  762924
# net.core.rmem_default 默认值:212992
net.core.rmem_default = 8388608
# net.core.rmem_max 默认值:212992
net.core.rmem_max = 26214400
# net.core.wmem_max 默认值:212992
net.core.wmem_max = 26214400

# 调大文件句柄数
fs.nr_open = 16777216
fs.file-max = 16777216

#3.如果dmesg中有类似"arp_cache: neighbor table overflow",则需要调整如下参数
# net.ipv4.neigh.default.gc_thresh1 默认值 128
net.ipv4.neigh.default.gc_thresh1 = 40960
# net.ipv4.neigh.default.gc_thresh2 默认值 512
net.ipv4.neigh.default.gc_thresh2 = 81920
# net.ipv4.neigh.default.gc_thresh3 默认值 1024
net.ipv4.neigh.default.gc_thresh3 = 102400

#4. 连接队列满导致丢包,需要调整半连接队列和全连接队列
#TCP 连接请求队列长度,默认为1024,加大队列长度为8192,可以容纳更多等待连接的网络连接数。
net.ipv4.tcp_max_syn_backlog = 65535
# 调整全连接队列上限,即服务器同时接受连接的数量
net.core.somaxconn = 65535
# 网络设备最大接收队列长度
net.core.netdev_max_backlog = 250000
#5. 在低版本内核中(比如 3.10),支持使用 tcp_tw_recycle 内核参数来开启 TIME_WAIT 的快速回收,但如果 client 也开启了 timestamp (一般默认开启),同时也就会导致在 NAT 环境丢包,甚至没有 NAT 时,稍微高并发一点,也会导致PAWS校验失败,导致丢包,所以生产环境不建议开启。
#### TIME_WAIT
# 默认0
# 用 SYN Cookie 防御机制
net.ipv4.tcp_syncookies = 1
# 开启 TIME-WAIT 状态的重用,此处为0,未开启
net.ipv4.tcp_tw_reuse = 0
# 不建议启用tcp_tw_recycle,会导致数据错乱,4.12内核已去掉这个参数
# net.ipv4.tcp_tw_recycle = 0
# 默认60
net.ipv4.tcp_fin_timeout = 30

#6.启用fastopen,跳过tcp3次握手;第 1 个比特位为 1 时,表示作为客户端时支持 TFO;第 2 个比特位为 1 时,表示作为服务器时支持 TFO,所以当 tcp_fastopen 的值为 3 时(比特为 0x11)就表示完全支持 TFO 功能。
net.ipv4.tcp_fastopen = 3
net.ipv4.tcp_orphan_retries = 3
# 默认0,表示如果三次握手第三步的时候 accept queue 满了,则 server 丢弃 client 发过来的 ack;为1表示第三步的时候如果全连接队列满了,server 发送一个 rst 包给 client ,表示拒绝这个握手过程和这个连接
# 只有确信守护进程真的不能完成连接请求时才打开该选项,该选项会影响客户的使用
net.ipv4.tcp_abort_on_overflow = 1
EOF

# sysctl -p  /etc/sysctl.d/ceph.conf

## 修改资源限制参数
cat > /etc/security/limits.d/ceph.conf <<EOF
# End of file
*               hard    nofile         655360
*               soft    nofile         655360
*               soft    core           655360
*               hard    core           655360
*          soft    nproc     unlimited
root       soft    nproc     unlimited
EOF
  1. 配置时间同步服务,以ceph01作为时间同步服务器
## 在ceph01上操作
# yum -y install chrony
# vi /etc/chrony.conf
pool ntp.aliyun.com iburst
...
...


# systemctl start chronyd && systemctl enable chronyd

## 选择存储模式

#tuned-adm list |grep storage 

#tuned-adm profile |grep stoarge

#tuned-adm profile enterprise-storage


1.3 安装docker 环境

yum -y install wget jq psmisc vim net-tools nfs-utils telnet yum-utils device-mapper-persistent-data lvm2 git network-scripts tar curl -y


# 关闭交换分区
sed -ri 's/.*swap.*/#&/' /etc/fstab
swapoff -a && sysctl -w vm.swappiness=0
 
cat /etc/fstab
# /dev/mapper/centos-swap swap                    swap    defaults        0 0
# 

## 关闭 SeLinux
# setenforce 0
# sed -i "s/SELINUX=enforcing/SELINUX=disabled/g" /etc/selinux/config
# 修改内核参数

yum -y install bridge-utils
modprobe br_netfilter
vim /etc/sysctl.conf
-----
net.ipv4.ip_forward = 1
net.ipv6.conf.all.disable_ipv6 = 1
net.bridge.bridge-nf-call-arptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
-----
sysctl -p 

cat <<EOF >> /etc/security/limits.conf
* hard nofile 655360
* soft nofile 655360
* hard nproc 655360
* soft nproc 655360
* soft core 655360
* hard core 655360
root hard nofile 655360
root soft nofile 655360

EOF


###系统依赖包
yum install -y conntrack scoat ntpdate ntp ipvsadm ipset jq iptables curl sysstat libseccomp wget vim net-tools git

### 开启ipvs 转发
modprobe br_netfilter 
modprobe ip_conntrack

cat > /etc/sysconfig/modules/ipvs.modules << EOF 

#!/bin/bash 
modprobe -- ip_vs 
modprobe -- ip_vs_rr 
modprobe -- ip_vs_wrr 
modprobe -- ip_vs_sh 
modprobe -- nf_conntrack
EOF 

chmod 755 /etc/sysconfig/modules/ipvs.modules 

bash /etc/sysconfig/modules/ipvs.modules 

lsmod | grep -e ip_vs -e nf_conntrack


1) RPM 包安装方法:
提供的官方网址
官方文档:https://docs.docker.com

# 安装依赖包
yum install -y yum-utils device-mapper-persistent-data lvm2
# 添加Docker软件包源
yum-config-manager \
--add-repo \
http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

## 查看所有的可用版本
yum list docker-ce --showduplicates | sort -r

# 安装Docker CE  
yum install -y docker-ce  【直接安装最新版本】

## 安装所需要的指定版本
#yum install docker-ce-19.03.15-3.el7 docker-ce-cli-19.03.15-3.el7
yum install docker-ce-20.10.24-3.el7 docker-ce-cli-20.10.24-3.el7 docker-ce-rootless-extras-20.10.24-3.el7 
   
## 阿里云镜像加速器
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
"registry-mirrors": ["https://dfmo7maf.mirror.aliyuncs.com"],
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "2048m"
},
"storage-driver": "overlay2"
}
EOF
sudo systemctl daemon-reload
sudo systemctl restart docker
sudo systemctl enable docker

## 添加镜像加速
curl -sSL https://get.daocloud.io/daotools/set_mirror.sh | sh -s http://f1361db2.m.daocloud.io

# 启动Docker服务并设置开机启动
systemctl start docker
systemctl enable docker

1.4 安装cephadm

1. 安装cephadm,O版开始就不再支持ceph-deploy工具
cephadm安装
https://docs.ceph.com/en/latest/cephadm/install/#install-cephadm

cephadm安装前提
- Python3
- Systemd
- Podman or Docker
- Chrony or NTP
- LVM2


## 在ceph所有点上执行
# CEPH_RELEASE=17.2.6 # replace this with the active release
# curl --silent --remote-name --location https://download.ceph.com/rpm-${CEPH_RELEASE}/el9/noarch/cephadm
# chmod +x cephadm
# mv cephadm /usr/sbin/

## 执行cephadm install 会在当前节点上安装cephadm依赖相关的软件包,版本较低,所以不建议执行
# cephadm install
ERROR: Distro openeuler version 22.03 not supported

## 修改 /usr/sbin/cephadm ,在
# vi /usr/sbin/cephadm
## DISTRO_NAMES 这个字典中增加 openeuler
  7654     DISTRO_NAMES = {
  7655         'centos': ('centos', 'el'),
  7656         'rhel': ('centos', 'el'),
  7657         'scientific': ('centos', 'el'),
  7658         'rocky': ('centos', 'el'),
  7659         'openeuler': ('centos', 'el'),   ## 增加openeuler 系统的支持
  7660         'almalinux': ('centos', 'el'),
  7661         'ol': ('centos', 'el'),
  7662         'fedora': ('fedora', 'fc'),
  7663         'mariner': ('mariner', 'cm'),
  7664     }
  7665  
## 将 cephadm 文件拷贝到其他节点上  
# for i in {2..5};do scp -rp /usr/sbin/cephadm ceph0$i:/usr/sbin/;done

如果是Centos8,可直接yum源安装

# yum download cephadm
# rpm -ivh cephadm-17.2.6-0.el8.noarch.rpm
# rpm -ql cephadm-17.2.6-0.el8
/usr/sbin/cephadm
/usr/share/man/man8/cephadm.8.gz
/var/lib/cephadm
/var/lib/cephadm/.ssh
/var/lib/cephadm/.ssh/authorized_keys


2. 检查ceph各节点是否满足安装ceph集群,该命令需要在当前节点执行,比如要判断ceph02是否支持安装ceph集群,则在ceph02上执行


# cephadm check-host --expect-hostname ceph02
docker (/usr/bin/docker) is present
systemctl is present
lvcreate is present
Unit chronyd.service is enabled and running
Hostname "ceph02" matches what is expected.
Host looks OK

## 也可以使用以下命令检查
# cephadm check-host --expect-hostname `hostname`

image.png image.png image.png

准备镜像

vim +46 /usr/sbin/cephadmin

需要准备如下镜像:
----
# Default container images -----------------------------------------------------
DEFAULT_IMAGE = 'quay.io/ceph/ceph:v17'
DEFAULT_IMAGE_IS_MASTER = False
DEFAULT_IMAGE_RELEASE = 'quincy'
DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.33.4'
DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.3.1'
DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.23.0'
DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:8.3.5'
DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.1.5'
#DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:latest'
DEFAULT_REGISTRY = 'docker.io'   # normalize unqualified digests to this
# ------------------------------------------------------------------------------
----

docker pull quay.io/ceph/ceph:v17
docker pull quay.io/prometheus/prometheus:v2.33.4
docker pull docker.io/grafana/loki:2.4.0
docker pull docker.io/grafana/promtail:2.4.0
docker pull quay.io/prometheus/node-exporter:v1.3.1
docker pull quay.io/prometheus/alertmanager:v0.23.0
docker pull quay.io/ceph/ceph-grafana:8.3.5
docker pull quay.io/ceph/haproxy:2.3
docker pull quay.io/ceph/keepalived:2.1.5
docker pull maxwo/snmp-notifier:v1.2.1   改成  snmp-notifier:latest

这个镜像:
https://hub.docker.com/r/maxwo/snmp-notifier/tags


docker load -i alertmanager.tar.gz
docker load -i ceph-grafana.tar.gz
docker load -i ceph.tar.gz
docker load -i haproxy.tar.gz
docker load -i keepalived.tar.gz
docker laod -i loki.tar.gz
docker load -i node-exporter.tar.gz
docker load -i prometheus.tar.gz
docker load -i promtail.tar.gz
docker load -i snmp-notifier.tar.gz

image.png


1.5 初始化ceph集群

使用cephadm bootstrap初始化最小集群
> cephadm bootstrap 过程是在单一节点上创建一个小型的ceph集群,包括一个ceph monitor和
  一个ceph mgr,以及监控组件,包括prometheus、node-exporter等。

## 初始化时,指定了mon-ip、集群网段、dashboard初始用户名和密码
# cephadm bootstrap --mon-ip 172.16.10.11 --cluster-network 10.16.10.0/24 --initial-dashboard-user admin --initial-dashboard-password demo2023
-----
Creating directory /etc/ceph for ceph.conf
Verifying podman|docker is present...
Verifying lvm2 is present...
Verifying time synchronization is in place...
Unit chronyd.service is enabled and running
Repeating the final host check...
docker (/usr/bin/docker) is present
systemctl is present
lvcreate is present
Unit chronyd.service is enabled and running
Host looks OK
Cluster fsid: 4b85c264-0f18-11ee-8f0e-000c294f2b6f
Verifying IP 172.16.10.11 port 3300 ...
Verifying IP 172.16.10.11 port 6789 ...
Mon IP `172.16.10.11` is in CIDR network `172.16.10.0/24`
Mon IP `172.16.10.11` is in CIDR network `172.16.10.0/24`
Pulling container image quay.io/ceph/ceph:v17...
Ceph version: ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)
Extracting ceph user uid/gid from container image...
Creating initial keys...
Creating initial monmap...
Creating mon...
Waiting for mon to start...
Waiting for mon...
mon is available
Assimilating anything we can from ceph.conf...
Generating new minimal ceph.conf...
Restarting the monitor...
Setting mon public_network to 172.16.10.0/24
Setting cluster_network to 10.16.10.0/24
Wrote config to /etc/ceph/ceph.conf
Wrote keyring to /etc/ceph/ceph.client.admin.keyring
Creating mgr...
Verifying port 9283 ...
Waiting for mgr to start...
Waiting for mgr...
mgr not available, waiting (1/15)...
mgr not available, waiting (2/15)...
mgr not available, waiting (3/15)...
mgr is available
Enabling cephadm module...
Waiting for the mgr to restart...
Waiting for mgr epoch 4...
mgr epoch 4 is available
Setting orchestrator backend to cephadm...
Generating ssh key...
Wrote public SSH key to /etc/ceph/ceph.pub
Adding key to root@localhost authorized_keys...
Adding host flyfish11...
Deploying mon service with default placement...
Deploying mgr service with default placement...
Deploying crash service with default placement...
Deploying ceph-exporter service with default placement...
Deploying prometheus service with default placement...
Deploying grafana service with default placement...
Deploying node-exporter service with default placement...
Deploying alertmanager service with default placement...
Enabling the dashboard module...
Waiting for the mgr to restart...
Waiting for mgr epoch 8...
mgr epoch 8 is available
Generating a dashboard self-signed certificate...
Creating initial admin user...
Fetching dashboard port number...
Ceph Dashboard is now available at:

             URL: https://flyfish11:8443/
            User: admin
        Password: demo2023

Enabling client.admin keyring and conf on hosts with "admin" label
Saving cluster configuration to /var/lib/ceph/4b85c264-0f18-11ee-8f0e-000c294f2b6f/config directory
Enabling autotune for osd_memory_target
You can access the Ceph CLI as following in case of multi-cluster or non-default config:

        sudo /usr/sbin/cephadm shell --fsid 4b85c264-0f18-11ee-8f0e-000c294f2b6f -c /etc/ceph/ceph.conf -k /etc/ceph/ceph.client.admin.keyring

Or, if you are only running a single cluster on this host:

        sudo /usr/sbin/cephadm shell

Please consider enabling telemetry to help improve Ceph:

        ceph telemetry on

For more information see:

        https://docs.ceph.com/docs/master/mgr/telemetry/

Bootstrap complete.



页面打开
   https://172.16.10.11:8443/
      admin/demo2023

image.png image.png image.png

cephadmn shell

ceph -s 

cluster:
    id:     5e6b64e2-0c1b-11ee-9481-000c29e19e3a
    health: HEALTH_WARN
            OSD count 0 < osd_pool_default_size 3

  services:
    mon: 1 daemons, quorum flyfish61 (age 13m)
    mgr: flyfish61.wczeib(active, since 12m)
    osd: 0 osds: 0 up, 0 in

  data:
    pools:   0 pools, 0 pgs
    objects: 0 objects, 0 B
    usage:   0 B used, 0 B / 0 B avail
    pgs:





image.png

ceph orch ps
---
alertmanager.flyfish61   flyfish61  *:9093,9094  running (13m)     2m ago  14m    12.0M        -  0.23.0   ba2b418f427c  0d4b31348264
ceph-exporter.flyfish61  flyfish61               running (14m)     2m ago  14m    5275k        -  17.2.6   9d81ef07be30  8090e8094fc4
crash.flyfish61          flyfish61               running (14m)     2m ago  14m    7535k        -  17.2.6   9d81ef07be30  9385f6a357ac
grafana.flyfish61        flyfish61  *:3000       running (13m)     2m ago  14m    46.0M        -  8.3.5    dad864ee21e9  4e9b40203012
mgr.flyfish61.wczeib     flyfish61  *:9283       running (15m)     2m ago  15m     508M        -  17.2.6   9d81ef07be30  3ee8d8bf39b5
mon.flyfish61            flyfish61               running (15m)     2m ago  15m    40.0M    2048M  17.2.6   9d81ef07be30  f83c6b6573f7
node-exporter.flyfish61  flyfish61  *:9100       running (14m)     2m ago  14m    16.3M        -  1.3.1    1dbe0e931976  37e4a0239347
prometheus.flyfish61     flyfish61  *:9095       running (13m)     2m ago  13m     129M        -  2.33.4   514e6a882f6e  fdd0ba0ab9f0
---



image.png

ceph orch ls
---
alertmanager   ?:9093,9094      1/1  2m ago     14m  count:1
ceph-exporter                   1/1  2m ago     14m  *
crash                           1/1  2m ago     14m  *
grafana        ?:3000           1/1  2m ago     14m  count:1
mgr                             1/2  2m ago     14m  count:2
mon                             1/5  2m ago     14m  count:5
node-exporter  ?:9100           1/1  2m ago     14m  *
prometheus     ?:9095           1/1  2m ago     14m  count:1

---
调整服务节点:
  ceph orch apply grafana --placement="flyfish12"  

image.png image.png

grafana 页面

https://172.16.10.12:3000

image.png

prometheus:
      http://172.16.10.11:9095/targets

image.png

ceph mgr services 
---
{
    "dashboard": "https://172.16.10.11:8443/",
    "prometheus": "http://172.16.10.11:9283/"
}
---

image.png

1.6 添加节点到集群

将ceph镜像导出到其他ceph节点上

# docker images  |grep -v "REPOSITORY" |awk '{print $1":"$2}' |xargs docker save -o ceph-images.tar
# for i in {2..5};do scp ceph-images.tar ceph0$i:/root/;done

## 在ceph02-05上导入镜像
# docker load -i ceph-images.tar


将ceph.pub公钥拷贝到其他ceph节点

# ssh-copy-id -f -i /etc/ceph/ceph.pub ceph02
# ssh-copy-id -f -i /etc/ceph/ceph.pub ceph03
# ssh-copy-id -f -i /etc/ceph/ceph.pub ceph04
# ssh-copy-id -f -i /etc/ceph/ceph.pub ceph05

## 也可以用以下命令拷贝,在ceph01上操作
# for i in {2..5};do ssh-copy-id -f -i /etc/ceph/ceph.pub ceph0$i;done


 使用cephadm将主机添加到存储集群中,执行添加节点命令后,会在目标节点拉到ceph/node-exporter镜像,需要一定时间,所以可提前在节点上将镜像导入。


# cephadm shell 
# ceph orch host add flyfish12 172.16.10.12 --labels=mon,mgr
# ceph orch host add flyfish13 172.16.10.13 --labels=mon
# ceph orch host add flyfish14 172.16.10.14
# ceph orch host add flyfish15 172.16.10.15



查看加入到集群的节点

# ceph orch host ls
HOST       ADDR          LABELS   STATUS
flyfish61  172.16.10.61  _admin
flyfish62  172.16.10.62  mon mgr
flyfish63  172.16.10.63  mon
flyfish64  172.16.10.64
flyfish65  172.16.10.65
5 hosts in cluster


image.png

image.png

yum install ceph

如果存在cephadm 的这包给卸掉

yum remove -y cephadm-16.2.7-14.oe2203sp1.noarch


ceph -s 

ceph orch ls 

image.png

ceph version
ceph -v 

image.png

1.7 给节点打上标签

为节点添加标签并调整mon个数为节点添加标签并调整mon个数

 给节点打上指标标签后,后续可以按标签进行编排。

 给节点打_admin标签,默认情况下,_admin标签应用于存储集群中的 bootstrapped 主机, 
 client.admin密钥被分发到该主机(ceph orch client-keyring {ls|set|rm})。
 将这个标签添加到其他主机后,其他主机的/etc/ceph下也将有client.admin密钥。


## 给 flyfish62、flyfish63加上 _admin 标签  
# ceph orch host label add flyfish12 _admin
# ceph orch host label add flyfish13 _admin

## 给 flyfish61-flyfish64加上 mon 标签
# ceph orch host label add flyfish11 mon
# ceph orch host label add flyfish12 mon
# ceph orch host label add flyfish13 mon
# ceph orch host label add flyfish14 mon

## 给 flyfish61/flyfish62加上 mgr 标签
# ceph orch host label add flyfish11 mgr
# ceph orch host label add flyfish12 mgr

### 调整mon 标签数
# ceph orch apply mon --placement="3 label:mon"


## 列出节点,查看节点上标签
# ceph orch host ls
HOST       ADDR          LABELS          STATUS
flyfish11  172.16.10.11  _admin mon mgr
flyfish12  172.16.10.12  mon mgr _admin
flyfish13  172.16.10.13  mon _admin
flyfish14  172.16.10.14  mon
flyfish15  172.16.10.15
5 hosts in cluster

删除标签
注意:删除节点上的_admin标签,并不会删除该节点上已有的`ceph.client.admin.keyring`密钥文件

ceph orch host label rm flyfish13 label=_admin
 



image.png image.png

1.8 增加OSD 的数量

为ceph集群添加OSD 为ceph集群添加OSD

说明:添加OSD时,建议将磁盘先格式化为无分区的原始磁盘

## https://rook.github.io/docs/rook/v1.10/Getting-Started/ceph-teardown/?h=sgdisk#zapping-devices
DISK="/dev/sdX"

## Zap the disk to a fresh, usable state (zap-all is important, b/c MBR has to be clean)
sgdisk --zap-all $DISK

## Wipe a large portion of the beginning of the disk to remove more LVM metadata that may be present
dd if=/dev/zero of="$DISK" bs=1M count=100 oflag=direct,dsync

## SSDs may be better cleaned with blkdiscard instead of dd
blkdiscard $DISK

## Inform the OS of partition table changes
partprobe $DISK

## 修改副本数
# ceph config set global osd_pool_default_size 2


## 查看各ceph节点有哪些磁盘是可用的,关注`AVAILABLE`列
# ceph orch device ls
HOST       PATH          TYPE  DEVICE ID                                   SIZE  AVAILABLE  REFRESHED  REJECT REASONS
flyfish11  /dev/nvme0n1  ssd   VMware_Virtual_NVMe_Disk_VMware_NVME_0000   107G  Yes        22m ago
flyfish11  /dev/sdb      hdd                                               107G  Yes        22m ago
flyfish12  /dev/nvme0n1  ssd   VMware_Virtual_NVMe_Disk_VMware_NVME_0000   107G  Yes        19m ago
flyfish12  /dev/sdb      hdd                                               107G  Yes        19m ago
flyfish13  /dev/nvme0n1  ssd   VMware_Virtual_NVMe_Disk_VMware_NVME_0000   107G  Yes        18m ago
flyfish13  /dev/sdb      hdd                                               107G  Yes        18m ago
flyfish14  /dev/nvme0n1  ssd   VMware_Virtual_NVMe_Disk_VMware_NVME_0000   107G  Yes        18m ago
flyfish14  /dev/sdb      hdd                                               107G  Yes        18m ago
flyfish15  /dev/nvme0n1  ssd   VMware_Virtual_NVMe_Disk_VMware_NVME_0000   107G  Yes        18m ago
flyfish15  /dev/sda      hdd                                               107G  Yes        18m ago


## 接下来初始化osd
## 将指定的磁盘格式化为无分区的原始磁盘
# blkdiscard /dev/nvme0n2
# cephadm shell ceph orch device zap ceph01 /dev/sda
## 接着初始化其他节点上磁盘


## 添加OSD
## ssd
# ceph orch daemon add osd flyfish11:/dev/nvme0n1
# ceph orch daemon add osd flyfish12:/dev/nvme0n1
# ceph orch daemon add osd flyfish13:/dev/nvme0n1
# ceph orch daemon add osd flyfish14:/dev/nvme0n1
# ceph orch daemon add osd flyfish15:/dev/nvme0n1

##hdd
# ceph orch daemon add osd flyfish11:/dev/sdb
# ceph orch daemon add osd flyfish12:/dev/sdb
# ceph orch daemon add osd flyfish13:/dev/sdb
# ceph orch daemon add osd flyfish14:/dev/sdb
# ceph orch daemon add osd flyfish15:/dev/sda
          

image.png image.png image.png image.png

1.9 创建存储策略并和pool关联

创建存储策略并和pool关联 创建存储策略并和pool关联

1. 添加池

# ceph osd pool create ssdpool 256 256
# ceph osd pool create hddpool 256 256
## 列出池
# ceph osd lspools
1 .mgr
2 ssdpool
3 hddpool

2. 创建规则以使用该设备

# ceph osd crush rule create-replicated ssd default host ssd
# ceph osd crush rule create-replicated hdd default host hdd
## 查看池规则
# ceph osd crush rule ls
replicated_rule
ssd
hdd

3. 将池设置为使用规则

ceph osd pool set ssdpool crush_rule ssd
ceph osd pool set hddpool crush_rule hdd

4. 删除池

## 删除池时,池的名字要输入2次
# ceph osd pool rm testpool --yes-i-really-really-mean-it
Error EPERM: WARNING: this will *PERMANENTLY DESTROY* all data stored in pool testpool.  If you are *ABSOLUTELY CERTAIN* that is what you want, pass the pool name *twice*, followed by --yes-i-really-really-mean-it.

image.png image.png

1.10 部署MDS元数据服务



MDS守护进程用于Cephfs(文件系统),MDS采用的是主备模式,即cephfs仅使用1个活跃的MDS守护进程,配置MDS服务有多种方法,此处介绍2种,大同小异。
先创建CephFS,然后使用placement部署MDS

## 1. 先创建CephFS池
# ceph osd pool create cephfs_data 128 128
# ceph osd pool create cephfs_metadata 64 64

## 2. 为数据池和元数据池创建文件系统
# ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 5 and data pool 4

## 3. 使用ceph orch apply 命令部署MDS服务
# ceph orch apply mds cephfs --placement="3 flyfish11 flyfish12 flyfish13"
Scheduled mds.cephfs update...

## 最后查看状态
# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]

# ceph fs status
# ceph orch ps --daemon_type=mds
# ceph -s

## 列出服务
# ceph orch ls 

image.png image.png image.png