文章目录

  • ceph安装
  • 环境
  • 准备工作
  • 1、安装ceph-deploy
  • 2、配置时间同步
  • 3、配置节点间ssh无密码登录
  • 4、防火墙配置
  • 5、安装包管理器
  • 部署ceph集群
  • 1、创建ceph操作目录(node-1
  • 2、创建集群
  • 3、安装ceph
  • 4、安装monitor
  • 5、安装ceph-mgr
  • 6、创建osd
  • 7、创建元数据服务器mds
  • 8、扩展集群
  • 8.1、增加monitor
  • 8.2、增加manager
  • 8.3、创建RGW实例
  • 8.4、开启dashboard
  • 重装
  • 使用rbd
  • 1、块设备池pool
  • 2、创建块设备映像image
  • 3、调整块设备
  • 4、映像快照
  • 5、克隆
  • 6、依赖children/flatten
  • 7、客户端使用
  • 7.1 为客户端安装ceph
  • 7.2 复制配置文件和密钥环
  • 7.3 创建块设备映像
  • 7.4 映射为块设备
  • 7.5 格式化挂载使用
  • 8、rbd qos设置
  • 8.1 rbd qos 参数像
  • 使用cephfs
  • 1、创建池
  • 2、admin节点创建文件系统
  • 3、配置多活mds
  • 4、client节点使用配置
  • 5、查看挂载的客户端
  • 5.1手动驱逐客户端
  • 6、使用池快照恢复文件系统
  • 7、给文件系统加多个池
  • 8、设置不同目录所使用的池
  • 9、配额
  • 使用rgw
  • 1、创建用户
  • 2、s3
  • 3、swift
  • 命令
  • ceph.conf 参考:


ceph安装

环境

系统版本:centos 7.6

系统内核版本:kernel 4.19.61

ceph版本:ceph 13.2.5 mimic

网络:管理网络:10.200.46.0/24

集群网络:10.200.47.0/24

业务网络:10.200.45.0/24

准备工作

在各节点对ip做解析(/etc/hosts)

1、安装ceph-deploy

#表示root用户执行,$ 表示ymceph用户执行

添加ceph存储库到ceph-deploy管理节点(node-1

//安装epel源
# yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
//配置ceph源
# cat /etc/yum.repos.d/ceph.repo
[ceph-noarch]
name=Ceph noarch packages
baseurl=https://download.ceph.com/rpm-mimic/el7/noarch    //配置稳定版,此处为mimic
enabled=1
gpgcheck=1
type=rpm-md
gpgkey=https://download.ceph.com/keys/release.asc
# yum update
//安装ceph-deploy
# yum install ceph-deploy -y
2、配置时间同步
//all node
# setenforce 0
# sed -i 's/^SELINUX=enforcing/SELINUX=disabled/' /etc/selinux/config 
# yum install chrony -y
//node-1
# vim /etc/chrony.conf
allow 10.200.46.0/24
# systemctl restart chronyd
# chronyc sources
//other node
# vim /etc/chrony.conf
注释4行server
server 10.200.46.30 iburst
# systemctl restart chronyd
# chronyc sources
210 Number of sources = 1
MS Name/IP address         Stratum Poll Reach LastRx Last sample               
===============================================================================
^* zhc-ceph-node-1               3   6    77     2   +548us[+3835us] +/-   29ms
3、配置节点间ssh无密码登录
//all node
# yum install openssh-server -y
# useradd ymceph
# passwd ymceph(cf1234)
# visudo
ymceph ALL=(ALL)  NOPASSWD:ALL
# su - ymceph
$ echo "ymceph ALL=(root) NOPASSWD:ALL" |sudo tee /etc/sudoers.d/ymceph
$ sudo chmod 0440 /etc/sudoers.d/ymceph
//node-1
$ ssh-keygen
$ ssh-copy-id ymceph@10.200.46.31
$ ssh-copy-id ymceph@10.200.46.32
4、防火墙配置
# firewall-cmd --zone=public --add-service=ceph-mon --permanent
# firewall-cmd --zone=public --add-service=ceph --permanent
# firewall-cmd --reload
5、安装包管理器
# yum install yum-plugin-priorities -y
# cat /etc/yum/pluginconf.d/priorities.conf 
[main]
enabled = 1
//可指定ceph安装源的优先级为1
# vim /etc/yum.repos.d/ceph.repo
priority=1

部署ceph集群

1、创建ceph操作目录(node-1
//ymceph用户执行
$ mkdir ceph-cluster
$ cd ceph-cluster/
2、创建集群
$ ceph-deploy new zhc-ceph-node-1  zhc-ceph-node-2 zhc-ceph-node-3 --cluster-network 10.200.47.0/24 --public-network 10.200.45.0/24
$ ls
ceph.conf  ceph-deploy-ceph.log  ceph.mon.keyring
3、安装ceph
# yum install cryptsetup -y
//方法一,(此方法可能会出现超时无法安装的现象,可使用方法二安装)
$ ceph-deploy install zhc-ceph-node-1 zhc-ceph-node-2 zhc-ceph-node-3
//方法二, 先将软件包下载到本地,将cephdir传至其他节点 (先配置好源)
# yum install ceph ceph-radosgw --downloadonly --downloaddir=cephdir 
//all node
# cd cephdir
# yum localinstall *.rpm -y
# ceph --version
ceph version 13.2.6 (7b695f835b03642f85998b2ae7b6dd093d9fbce4) mimic (stable)fang

ubuntu:

# apt-get install -y openssh-server # wget 'http://mirrors.163.com/ceph/keys/release.asc'# apt-key add release.asc# vim /etc/apt/sources.list.d/ceph_mimic.listdeb http://mirrors.163.com/ceph/debian-mimic/ xenial main# apt update# apt install ceph ceph-osd ceph-mds ceph-mon radosgw -y//将集群配置文件拷贝到配置目录下

node-1:

apt-get install -y ceph-deploy --allow-unauthenticatedsu - ymcephceph-deploy new hostname1 hostname2 hostname3vim ceph.conf //参考下面的ceph-deploy install hostname1 hostname2 hostname3ceph-deploy mon create-initialceph-deploy admin hostname1 hostname2 hostname3

4、安装monitor
$ cd ceph-cluster///部署初始监视器并获取key$ ceph-deploy mon create-initial//拷贝配置文件和key到所有节点$ ceph-deploy admin zhc-ceph-node-1 zhc-ceph-node-2 zhc-ceph-node-3
5、安装ceph-mgr
//luminous以后版本才有,为使用dashboard做准备$ ceph-deploy mgr create zhc-ceph-node-1$ sudo systemctl status ceph-mgr@zhc-ceph-node-1
6、创建osd

db:存储BlueStore内部产生的元数据 ,一般不小于block的4%,例如设备为1T,则应大于40G

wal:存储RocksDB内部产生的.log文件 ,大小一般和db一样

//将每个节点的sdb盘添加为osd$ ceph-deploy osd create --data /dev/sdb zhc-ceph-node-1$ ceph-deploy osd create --data /dev/sdb zhc-ceph-node-2$ ceph-deploy osd create --data /dev/sdb zhc-ceph-node-3# ceph-volume lvm zap --destroy /dev/sdb# ceph-volume lvm create --bluestore --data /dev/sdb --block.db /dev/sda2 --block.wal /dev/sda1 //dev/sda2是ssd分区(创建时如果将db和wal指向同一个设备,会报错“RuntimeError: Command failed with exit code 250: /usr/bin/ceph-osd --cluster ceph --osd-objectstore bluestore --mkfs -i 0 --monmap /var/lib/ceph/osd/ceph-0/activate.monmap --keyfile - --bluestore-block-wal-path /dev/ceph/db --bluestore-block-db-path /dev/ceph/db --osd-data /var/lib/ceph/osd/ceph-0/ --osd-uuid c7c9395d-036b-4311-abc8-d0e5cf9a7fd3 --setuser ceph --setgroup ceph”,手动执行此命令就一直卡着无输出)//node-all# pvcreate /dev/sda# vgcreate ceph /dev/sda# for i in {1..10};do lvcreate -n db$i -L 100G ceph;done# for i in {1..10};do lvcreate -n wal$i -L 40G ceph;done# cat createosd.sh#!/bin/bashj=1while [ $j -le 10 ]do for i in {b..k};do           ceph-volume lvm create --bluestore --data /dev/sd$i --block.db ceph/db$j --block.wal ceph/wal$j           let j++   donedone//列出当前的lvm osd# ceph-volume lvm list//装完之后,可在任一节点查看集群状态$ sudo ceph -s  cluster:    id:     e3d46ea9-50fd-4339-95f4-a79610e0bd55    health: HEALTH_OK   services:    mon: 1 daemons, quorum zhc-ceph-node-1    mgr: zhc-ceph-node-1(active)    osd: 3 osds: 3 up, 3 in   data:    pools:   0 pools, 0 pgs    objects: 0  objects, 0 B    usage:   3.0 GiB used, 6.5 TiB / 6.5 TiB avail    pgs:
7、创建元数据服务器mds
$ ceph-deploy mds create zhc-ceph-node-1 [2] [3]$ sudo systemctl status ceph-mds@zhc-ceph-node-1
8、扩展集群
8.1、增加monitor
$ ceph-deploy mon add zhc-ceph-node-2$ ceph-deploy mon add zhc-ceph-node-3//查看monitor$ sudo ceph quorum_status --format json-pretty
8.2、增加manager
$ ceph-deploy --overwrite-conf mgr create zhc-ceph-node-2
8.3、创建RGW实例
//要使用ceph对象网关组件,必须部署RGW实例$ ceph-deploy rgw create zhc-ceph-node-1$ sudo iptables -A IN_public_allow -p tcp -m tcp --dport 7480 -m conntrack --ctstate NEW -j ACCEPT  //开放rgw实例端口访问# 默认端口为7480,修改端口则在ceph.conf文件中配置:[client.rgw.zhc-ceph-node-1]    //zhc-ceph-node-1 为服务节点名rgw_frontends = "civetweb port=80"再重启rgw服务
8.4、开启dashboard
# ceph mgr module enable dashboard//在配置文件中添加[mon]mgr initial modules = dashboard# ceph dashboard create-self-signed-certSelf-signed certificate created# cd /home/ymceph/ceph-cluster# mkdir ssl;cd ssl# openssl req -new -nodes -x509 -subj "/O=IT/CN=ceph-mgr-dashboard" -days 3650 -keyout dashboard.key -out dashboard.crt -extensions v3_ca# lsdashboard.crt  dashboard.key# ceph mgr module disable dashboard# ceph mgr module enable dashboard# ceph config set mgr mgr/dashboard/server_addr 10.200.45.30# ceph config set mgr mgr/dashboard/server_port 7000# ceph dashboard set-login-credentials admin admin# ceph mgr services{    "dashboard": "https://10.200.45.30:7000/"     //端口未生效的话可以重启服务试试}
//rgw需另外配置开启# radosgw-admin user create --uid=rgwwebuser --display-name=rgwwebuser --system{    "user_id": "rgwwebuser",    "display_name": "rgwwebuser",    "email": "",    "suspended": 0,    "max_buckets": 1000,    "auid": 0,    "subusers": [],    "keys": [        {            "user": "rgwwebuser",            "access_key": "1CNFD6SSAYKVQ6OMZRFU",            "secret_key": "GLHLF6ouj2O2DuYVa7m5MrYdF9TOjuwvM4qOtSgJ"        }    ],    "swift_keys": [],    "caps": [],    "op_mask": "read, write, delete",    "system": "true",    "default_placement": "",    "placement_tags": [],    "bucket_quota": {        "enabled": false,        "check_on_raw": false,        "max_size": -1,        "max_size_kb": 0,        "max_objects": -1    },    "user_quota": {        "enabled": false,        "check_on_raw": false,        "max_size": -1,        "max_size_kb": 0,        "max_objects": -1    },    "temp_url_keys": [],    "type": "rgw",    "mfa_ids": []}# ceph dashboard set-rgw-api-access-key 1CNFD6SSAYKVQ6OMZRFUOption RGW_API_ACCESS_KEY updated# ceph dashboard set-rgw-api-secret-key GLHLF6ouj2O2DuYVa7m5MrYdF9TOjuwvM4qOtSgJOption RGW_API_SECRET_KEY updated
重装
ceph-deploy purge {ceph-node} [{ceph-node}]
ceph-deploy purgedata {ceph-node} [{ceph-node}]
ceph-deploy forgetkeys
rm ceph.*

修改完配置文件同步到各节点:$ ceph-deploy --overwrite-conf admin zhc-ceph-node-1 zhc-ceph-node-2 zhc-ceph-node-3

查看当前节点所有服务状态: # systemctl status ceph*.service ceph*.target

查看osd的设备信息:ceph-volume lvm list {/dev/sdb}

使用rbd

1、块设备池pool
# ceph osd pool create rbd 256# rbd pool init rbd# rados lspools.rgw.rootdefault.rgw.controldefault.rgw.metadefault.rgw.logrbd# ceph osd pool rename currentname newname   //重命名池//池快照# ceph osd pool mksnap pool1 pool1_snapcreated pool pool1 snap pool1_snap# ceph osd pool rmsnap pool1 pool1_snapremoved pool pool1 snap pool1_snap# rados -p pool1 lssnap0 snaps//删除pool# ceph osd pool delete pool1 pool1 --yes-i-really-really-mean-it//存储池对象查看# rados -p rbd lsrbd_object_map.37d96b8b4567.0000000000000004rbd_id.image3rbd_header.8afc6b8b4567rbd_header.142b6b8b4567rbd_object_map.37d96b8b4567rbd_directoryrbd_header.37d96b8b4567rbd_id.image2rbd_childrenrbd_inforbd_id.b1rbd_object_map.142b6b8b4567rbd_object_map.38cf6b8b4567rbd_trashrbd_header.38cf6b8b4567rbd_id.image1//设置pool配额# ceph osd pool set-quota quota-test max_bytes 5368709120# ceph osd pool get-quota quota-test
2、创建块设备映像image

映像是精简配置的,在开始向其保存数据之前,它们实际上不使用任何物理存储。但是,它们具有使用--size选项设置的最大容量。

# rbd create --size 10240 rbd/image1# rbd ls rbd     //后面的rbd为池名称,默认就是rbdimage1//列出rbd池中的延迟删除块设备# rbd trash ls rbd   //查看镜像信息# rbd info rbd/image1rbd image 'image1':	size 10 GiB in 2560 objects	order 22 (4 MiB objects)	id: 142b6b8b4567	block_name_prefix: rbd_data.142b6b8b4567	format: 2	features: layering, exclusive-lock, object-map, fast-diff, deep-flatten	op_features: 	flags: 	create_timestamp: Tue Aug  6 10:32:43 2019//查看被哪个客户端使用# rbd status rbd/image1
3、调整块设备
//调整映像大小# rbd resize --size 102400 rbd/image1                 //增大# rbd resize --size 10240 rbd/image1 --allow-shrink   //减小//删除映像# rbd rm rbd/image2//推迟从池中删除块设备# rbd trash mv rbd/image2# rbd trash ls37b56b8b4567 image2# rbd lsimage1# rbd device listid pool  image snap device    0  pool1 a3    -    /dev/rbd0//删除延迟块设备# rbd trash rm rbd/37b56b8b4567Removing image: 100% complete...done.//恢复延迟删除块设备# rbd trash restore rbd/37d96b8b4567        //重命名可加上参数--image new-name
4、映像快照
# rbd snap create rbd/image2@image2snap# rbd snap ls rbd/image2SNAPID NAME         SIZE TIMESTAMP                     4 image2snap 10 GiB Tue Aug  6 16:08:57 2019//回滚# rbd snap rollback rbd/image2@image2snap//删除快照# rbd snap rm rbd/image2@image2snap1     //删除所有用purge代替rm//保护快照# rbd snap protect rbd/image2@image2snap//查看# rbd ls rbd -lNAME                 SIZE PARENT FMT PROT LOCK image1            100 GiB          2           image2             10 GiB          2           image2@image2snap  10 GiB          2 yes//取消保护快照# rbd snap unprotect rbd/image2@image2snap

要删除快照,快照不能是保护状态;

要将有子映像的快照的子映像展平才能将快照取消保护

要停止 I/O 操作可以使用 fsfreeze -f /mnt(-u解除) 命令。 对于虚拟机,qemu-guest-agent 被用来在做快照时自动冻结文件系统

5、克隆

克隆前,快照必须处于被保护状态

# rbd info rbd/image2@image2snaprbd image 'image2':	size 10 GiB in 2560 objects	order 22 (4 MiB objects)	id: 37d96b8b4567	block_name_prefix: rbd_data.37d96b8b4567	format: 2	features: layering, exclusive-lock, object-map, fast-diff, deep-flatten	op_features: 	flags: 	create_timestamp: Tue Aug  6 10:53:06 2019	`protected: True`# rbd clone rbd/image2@image2snap rbd/image3# rbd lsimage1image2image3
6、依赖children/flatten
//查看快照的children# rbd children rbd/image2@image2snaprbd/image3//将映像变为没有层级# rbd flatten rbd/image3Image flatten: 100% complete...done.# rbd children rbd/image2@image2snap
7、客户端使用
7.1 为客户端安装ceph

可在admin节点上安装,也可使用包安装

# ceph-deploy --username user install client/ip   //username:系统用户(sudo权限)
7.2 复制配置文件和密钥环
# cd /etc/ceph/# ceph-deploy --username yumeixian admin 10.200.10.96
7.3 创建块设备映像
# rbd create pool2/a1 --size 40960 --image-feature layering -m 10.200.45.30 -k /etc/ceph/ceph.client.admin.keyring# rbd ls pool2a1
7.4 映射为块设备
# rbd map pool2/a1 --name client.admin -m 10.200.45.30 -k /etc/ceph/ceph.client.admin.keyring /dev/rbd0# rbd showmapped id pool  image snap device    0  pool2 a1    -    /dev/rbd0//nbd # rbd-nbd map quota-test/q1 -k /etc/ceph/ceph.client.admin.keyring -c /etc/ceph/ceph.conf # rbd-nbd list-mappedid     pool       image snap device    106789 quota-test q1    -    /dev/nbd0# rbd-nbd unmap /dev/nbd0

nbd:

io 先通过nbd,再转到librbd,librbd 才有rbd cache

#centos默认内核下没有nbd 的模块,需升级内核

安装nbd

yum install -y rbd-nbd ;apt install rbd-nbd

加载并查看nbd信息

modprobe nbd ;modinfo nbd

7.5 格式化挂载使用
# mkfs.ext4 -m0 /dev/rbd/pool2/a1 # mkfs.xfs /dev/rbd/pool2/a2 # mount /dev/rbd/pool2/a1 /ceph# df -hFilesystem      Size  Used Avail Use% Mounted on/dev/rbd0        40G   48M   40G   1% /ceph1
8、rbd qos设置
8.1 rbd qos 参数像
  • conf_rbd_qos_iops_limit 每秒 IO 限制
  • conf_rbd_qos_read_iops_limit 每秒读 IO 限制
  • conf_rbd_qos_write_iops_limit 每秒写 IO 限制
  • conf_rbd_qos_bps_limit 每秒带宽限制
  • conf_rbd_qos_read_bps_limit 每秒读带宽限制
  • conf_rbd_qos_write_bps_limit 每秒写带宽限制
# rbd create --size 10240 --image-feature layering data-bcache-test/f1//查看当前配置# rbd image-meta list data-bcache-test/f1//配置# rbd image-meta set data-bcache-test/f1 conf_rbd_qos_iops_limit 1000# rbd image-meta list data-bcache-test/f1There is 1 metadatum on this image:Key                     Value conf_rbd_qos_iops_limit 1000

使用cephfs

一个 Ceph 文件系统需要至少两个 RADOS 存储池,一个用于数据、一个用于元数据。配置这些存储池时需考虑:

1、为元数据存储池设置较高的副本水平,因为此存储池丢失任何数据都会导致整个文件系统失效。

2、为元数据存储池分配低延时存储器(像 SSD ),因为它会直接影响到客户端的操作延时。

1、创建池
# ceph osd pool create cephfs_data 64# ceph osd pool create cephfs_metadata 64# ceph osd pool set cephfs_data size 2  //可以设置数据池的副本小一些,不然费空间
2、admin节点创建文件系统
# ceph fs new fs cephfs_metadata cephfs_data# ceph fs ls  //查看name: cephfs, metadata pool: cephfs_metadata, data pools: [cephfs_data ]此时查看mds状态为活动状态# ceph mds statfs-1/1/1 up  {0=zhc-netmis-cephfs-test-1=up:active}# ceph fs dump  //查看具体信息# ceph fs status
3、配置多活mds
//在admin节点给集群中的其他几点安装mds守护进程$  ceph-deploy mds create zhc-netmis-cephfs-test-2  zhc-netmis-cephfs-test-3# ceph mds statfs1-1/1/1 up  {0=zhc-netmis-cephfs-test-2=up:active}, 2 up:standby# ceph fs statusfs1 - 2 clients===+------+--------+--------------------------+---------------+-------+-------+| Rank | State  |           MDS            |    Activity   |  dns  |  inos |+------+--------+--------------------------+---------------+-------+-------+|  0   | active | zhc-netmis-cephfs-test-2 | Reqs:    0 /s |   61  |   23  |+------+--------+--------------------------+---------------+-------+-------++-----------------+----------+-------+-------+|       Pool      |   type   |  used | avail |+-----------------+----------+-------+-------+| cephfs_metadata | metadata | 1955k |  282G ||   cephfs_data   |   data   | 2149M |  423G ||       aaa       |   data   |    0  |  282G |+-----------------+----------+-------+-------++--------------------------+|       Standby MDS        |+--------------------------+| zhc-netmis-cephfs-test-1 || zhc-netmis-cephfs-test-3 |+--------------------------+//配置多活# ceph fs set fs1 max_mds 2# ceph mds statfs1-2/2/2 up  {0=zhc-netmis-cephfs-test-2=up:active,1=zhc-netmis-cephfs-test-3=up:active}, 1 up:standby# ceph fs statusfs1 - 2 clients===+------+--------+--------------------------+---------------+-------+-------+| Rank | State  |           MDS            |    Activity   |  dns  |  inos |+------+--------+--------------------------+---------------+-------+-------+|  0   | active | zhc-netmis-cephfs-test-2 | Reqs:    0 /s |   61  |   23  ||  1   | active | zhc-netmis-cephfs-test-3 | Reqs:    0 /s |    0  |    0  |+------+--------+--------------------------+---------------+-------+-------++-----------------+----------+-------+-------+|       Pool      |   type   |  used | avail |+-----------------+----------+-------+-------+| cephfs_metadata | metadata | 1955k |  282G ||   cephfs_data   |   data   | 2149M |  423G ||       aaa       |   data   |    0  |  282G |+-----------------+----------+-------+-------++--------------------------+|       Standby MDS        |+--------------------------+| zhc-netmis-cephfs-test-1 |+--------------------------+
4、client节点使用配置
(跟rbd一样要先装ceph)# mkdir /cephfs# cat /etc/ceph/ceph.client.admin.keyring [client.admin]    key = AQBuxMJcGfRaABAAz7Fp9n1fokjU7cWiTcHtJw==将key部分存储为新的文件# cat admin.secretAQBuxMJcGfRaABAAz7Fp9n1fokjU7cWiTcHtJw==# mount -t ceph 10.200.10.221,10.200.10.222:6789:/ /cephfs/ -o name=admin,secretfile=admin.secret    //挂载文件系统(内核模式)//开机挂载# vim /etc/fstab10.200.10.221,10.200.10.222:6789:/dir1 /cephfs ceph name=admin,secretfile=/etc/ceph/admin.secret,noatime,_netdev 0 2

如果报mount error 110 = Connection timed out可能是客户端内核版本不够,可以通过ceph-fuse命令(用户态模式)挂载,但性能不如内核模式挂载:

# ceph-fuse -m 10.200.10.221 /datacf/ --keyring /etc/ceph/ceph.client.admin.keyring --name client.admin//开机挂载方式:# cat  /etc/fstabnone  /datacf  fuse.ceph ceph.id=admin,_netdev,defaults  0 0//可挂载子目录# ls /datacf/a a.txt hh//使用-r指定子目录进行挂载# ceph-fuse -m 10.200.10.221  -r /hh /test1/ --keyring /etc/ceph/ceph.client.admin.keyring --name client.admin//挂载完后/test1目录和/datacf/hh同步# ls /datacf/hhaa  hdjsh  lll# ls /test1/aa  hdjsh  lll
5、查看挂载的客户端
# ceph tell mds.0 client ls2020-07-10 10:13:37.927 7fe288ff9700  0 client.34647 ms_handle_reset on 10.200.10.222:6804/18341386862020-07-10 10:13:37.939 7fe289ffb700  0 client.34653 ms_handle_reset on 10.200.10.222:6804/1834138686[    {        "id": 34527,        "num_leases": 0,        "num_caps": 1,        "state": "open",        "request_load_avg": 0,        "uptime": 1199.765451,        "replay_requests": 0,        "completed_requests": 1,        "reconnecting": false,        "inst": "client.34527 10.200.10.96:0/2718487558",        "client_metadata": {            "features": "00000000000001ff",            "entity_id": "admin",            "hostname": "zhc-ceph-client-1",            "kernel_version": "4.19.61",            "root": "/dir1"        }    },    {        "id": 34248,        "num_leases": 0,        "num_caps": 11,        "state": "open",        "request_load_avg": 0,        "uptime": 1199.767451,        "replay_requests": 0,        "completed_requests": 0,        "reconnecting": false,        "inst": "client.34248 10.200.10.57:0/722559795",        "client_metadata": {            "features": "00000000000009ff",            "ceph_sha1": "564bdc4ae87418a232fc901524470e1a0f76d641",            "ceph_version": "ceph version 13.2.10 (564bdc4ae87418a232fc901524470e1a0f76d641) mimic (stable)",            "entity_id": "admin",            "hostname": "zhc-netmis-saltsyndic-test-1",            "mount_point": "/datacf",            "pid": "610390",            "root": "/"        }    }]
5.1手动驱逐客户端
# ceph tell mds.0 client evict id=34527# ll /cephfs    //再去客户端查看,发现没有任何权限进入目录ls: cannot access /cephfs: Permission denied//如需重新使用,则卸载再重新挂载
6、使用池快照恢复文件系统
//对metadata pool做快照# ceph osd pool mksnap cephfs_metadata snap1//保存元数据信息# for i in `rados -p cephfs_metadata ls`;do echo $i >> metalist;done//模拟毁坏文件系统# systemctl stop ceph-mds@zhc-netmis-cephfs-test-1.service # ceph mds fail 0# ceph fs rm fs --yes-i-really-mean-it//创建新的文件系统# ceph fs new fs1 cephfs_metadata cephfs_data --force    //会提示原有池有其他文件系统信息,--force忽略# systemctl start ceph-mds@zhc-netmis-cephfs-test-1# ceph mds stat  //查看状态//恢复之前的文件系统# systemctl stop ceph-mds@zhc-netmis-cephfs-test-1.service # for i in `cat metalist`;do rados -p cephfs_metadata rollback $i snap1;done# systemctl restart ceph-mds@zhc-netmis-cephfs-test-1
7、给文件系统加多个池
//原始状态# ceph fs lsname: fs1, metadata pool: cephfs_metadata, data pools: [cephfs_data]//给fs1加一个data pools:aaa# ceph osd pool create aaa 8# ceph fs add_data_pool fs1 aaa# ceph fs lsname: fs1, metadata pool: cephfs_metadata, data pools: [cephfs_data aaa ]
8、设置不同目录所使用的池

常规文件的布局 xattrs 名为ceph.file.layout,而目录的布局 xattrs 名为ceph.dir.layout

//查看文件、目录的布局信息# getfattr -n ceph.file.layout /datacf/fi.txtgetfattr: Removing leading '/' from absolute path names file: datacf/fi.txtceph.file.layout="stripe_unit=4194304 stripe_count=1 object_size=4194304 pool=cephfs_data"# setfattr -n ceph.dir.layout.stripe_count -v 2 a    //目录没有显式布局,除非对其进行了自定义# setfattr -n ceph.dir.layout -v "pool=aaa" /datacf/b//取消:# setfattr -x ceph.dir.layout /datacf/hh# getfattr -n ceph.dir.layout /datacf/a/getfattr: Removing leading '/' from absolute path names file: datacf/a/ceph.dir.layout="stripe_unit=4194304 stripe_count=2 object_size=4194304 pool=cephfs_data"//修改目录的pool# setfattr -n ceph.dir.layout.pool -v aaa /datacf/hh# getfattr -n ceph.dir.layout /datacf/hhgetfattr: Removing leading '/' from absolute path names file: datacf/hhceph.dir.layout="stripe_unit=4194304 stripe_count=2 object_size=4194304 pool=aaa"
9、配额

对池设置配额,则挂载后只能使用配额限制大小,但还是显示整个文件系统的大小

//对data pools做配额,可限制客户端可使用大小,取消配额,设置为0# ceph osd pool set-quota cephfs_data max_bytes $((10*1024*1024*1024))  set-quota max_bytes = 10737418240 for pool cephfs_data

对子目录做限制,则挂载后是相应子目录的大小

测试后发现可以写入超过10G的数据,网上挂载时加上–client-quota限制,但是挂载失败; 在Luminous之后的版本,不支持这个参数了,会自动识别quota,但没识别? 官网:Mimic版本更新了配额磁盘格式。> = 4.17的Linux内核客户端可以支持新的格式配额; 测试后确实是客户端内核版本不够 保险的做法就是给子目录配置单独的pool,对pool做配额

//设置dir1目录配额为10G# setfattr -n ceph.quota.max_bytes -v $((10*1024*1024*1024)) /datacf/dir1# getfattr -n ceph.quota.max_bytes /datacf/dir1/   //查看配额大小# setfattr -n ceph.quota.max_bytes -v 0 /datacf/dir1/     //取消配额//挂载子目录# ceph-fuse -m 10.200.10.221 -r /dir1 /datacf/ --keyring /etc/ceph/ceph.client.admin.keyring --name client.admin2020-07-09 10:55:50.106 7efe5bea8b80 -1 init, newargv = 0x2a39d40 newargc=7ceph-fuse[126981]: starting ceph clientceph-fuse[126981]: starting fuse# df -hFilesystem      Size  Used Avail Use% Mounted onceph-fuse        10G     0   10G   0% /datacf

使用rgw

1、创建用户

ceph节点中创建用户,不同的用户或需求提供不同用户,key不同创建的bucket互不可见

# radosgw-admin user create --uid="radosgw" --display-name="First User"   //创建s3用户{    "user_id": "lsyuser",    "display_name": "First User",    "email": "",    "suspended": 0,    "max_buckets": 1000,    "auid": 0,    "subusers": [],    "keys": [        {            "user": "lsyuser",            "access_key": "2L19E94JGPHYIZ0992IY",                "secret_key": "5DAbQOzxZdjGjKp3yAEQ9WDxOXe26Ezo05UxZCO2"        }    ],    "swift_keys": [],    "caps": [],    "op_mask": "read, write, delete",    "default_placement": "",    "placement_tags": [],    "bucket_quota": {        "enabled": false,        "check_on_raw": false,        "max_size": -1,        "max_size_kb": 0,        "max_objects": -1    },    "user_quota": {        "enabled": false,        "check_on_raw": false,        "max_size": -1,        "max_size_kb": 0,        "max_objects": -1    },    "temp_url_keys": [],    "type": "rgw",    "mfa_ids": []}

使用swift时需创建子用户

# radosgw-admin subuser create --uid=radosgw --subuser=radosgw:swift --access=full   {"user_id": "radosgw","display_name": "radosgw","email": "","suspended": 0,"max_buckets": 1000,"auid": 0,"subusers": [{"id": "radosgw:swift","permissions": "full-control"}],"keys": [{"user": "radosgw","access_key": "Y326T07XWSCYAG6LCZ2P","secret_key": "toCHP1BlY5Dta5nWqySEBtQGjtZwOmUJM9DOMSsE"}],"swift_keys": [{"user": "radosgw:swift","secret_key": "eCuvJmL2NbkgCAhGREDvYrtfDXRs064IbjIdwCtk"    //swift使用的key}],"caps": [],"op_mask": "read, write, delete","default_placement": "","placement_tags": [],"bucket_quota": {"enabled": false,"max_size_kb": -1,"max_objects": -1},"user_quota": {"enabled": false,"max_size_kb": -1,"max_objects": -1},"temp_url_keys": []}

客户端

2、s3
# apt install -y s3cmd# s3cmd --configureEnter new values or accept defaults in brackets with Enter.Refer to user manual for detailed description of all options.Access key and Secret key are your identifiers for Amazon S3. Leave them empty for using the env variables.Access Key: 2L19E94JGPHYIZ0992IYSecret Key: 5DAbQOzxZdjGjKp3yAEQ9WDxOXe26Ezo05UxZCO2Default Region [US]: Encryption password is used to protect your files from readingby unauthorized persons while in transfer to S3Encryption password: Path to GPG program [/usr/bin/gpg]: When using secure HTTPS protocol all communication with Amazon S3servers is protected from 3rd party eavesdropping. This method isslower than plain HTTP, and can only be proxied with Python 2.7 or newerUse HTTPS protocol [Yes]: noOn some networks all internet access must go through a HTTP proxy.Try setting it here if you can't connect to S3 directlyHTTP Proxy server name: New settings:  Access Key: 2L19E94JGPHYIZ0992IY  Secret Key: 5DAbQOzxZdjGjKp3yAEQ9WDxOXe26Ezo05UxZCO2  Default Region: US  Encryption password:   Path to GPG program: /usr/bin/gpg  Use HTTPS protocol: False  HTTP Proxy server name:   HTTP Proxy server port: 0Test access with supplied credentials? [Y/n] noSave settings? [y/N] y# vim .s3cfg   //修改以下两项为服务地址(注意解析ip)host_base = zhc-ceph-node-1:80host_bucket = %(bucket)s.s3.zhc-ceph-node-1
3、swift
# yum/apt install python-pip -y# pip install --upgrade python-swiftclient//查看# swift -A http://zhc-ceph-node-1:80/auth/1.0 -U radosgw:swift -K eCuvJmL2NbkgCAhGREDvYrtfDXRs064IbjIdwCtk listfirst-bucket//新增# swift -A http://zhc-ceph-node-1:80/auth/1.0 -U radosgw:swift -K eCuvJmL2NbkgCAhGREDvYrtfDXRs064IbjIdwCtk post second-bucket# s3cmd ls2020-03-6 19:43 s3://first-bucket2020-03-6 19:23 s3://second-bucket

命令

#查看osd  rbd_readahead_disable_after_bytes配置ceph daemon osd.3 config show|grep rbd_readahead_disable_after_bytes#在线修改配置ceph tell osd.* injectargs --rbd_cache_size 335544320#PG相关命令ceph pg statceph pg dumpceph pg 10.34 queryceph pg dump_stuck unclean #查看osd延时情况 ceph osd perf

cephx参考:http://www.xuxiaopang.com/2017/08/23/easy-ceph-CephX/

ceph.conf 参考:

[global]fsid = 869a4f06-176e-4597-b1ce-ffe6ad8a06c8public_network = 10.200.45.0/24cluster_network = 10.200.47.0/24mon_initial_members = zhc-netmis-ceph-bcachetest-1, zhc-netmis-ceph-bcachetest-2, zhc-netmis-ceph-bcachetest-3mon_host = 10.200.4.130,10.200.4.131,10.200.4.132auth_cluster_required = cephxauth_service_required = cephxauth_client_required = cephxmax open files = 131072osd pool default size = 3osd pool default pg num = 256osd pool default pgp num = 256mon_max_pg_per_osd = 800err_to_stderr = true    log_max_recent = 10000  log_to_stderr = false  mon_allow_pool_delete = true  mon_clock_drift_allowed = 2.000000  mon_clock_drift_warn_backoff = 30.000000  ms_dispatch_throttle_bytes = 2097152000 objecter_inflight_op_bytes = 3048576000  objecter_inflight_ops = 819200osd_client_message_cap = 5000  osd_client_message_size_cap = 2147483648  osd_deep_scrub_stride = 131072  osd_map_cache_size = 1024osd_max_write_size = 512 osd_objectstore = bluestore  osd_pg_object_context_cache_count = 2048  osd_pool_default_min_size = 2  rocksdb_separate_wal_dir = truerbd_cache_max_dirty = 2516582400  rbd_cache_max_dirty_age = 30.000000  rbd_cache_target_dirty = 167772160  rbd_op_threads = 32rbd_default_stripe_unit = 524288rbd_default_stripe_count = 16rbd readahead disable after bytes = 0rbd cache writethrough until flush = falserbd readahead max bytes = 8194304debug lockdep = 0/0debug context = 0/0debug crush = 0/0debug buffer = 0/0debug timer = 0/0debug filer = 0/0debug objecter = 0/0debug rados = 0/0debug rbd = 0/0debug journaler = 0/0debug objectcatcher = 0/0debug client = 0/0debug osd = 0/0debug optracker = 0/0debug objclass = 0/0debug filestore = 0/0debug journal = 0/0debug ms = 0/0debug mon = 0/0debug monc = 0/0debug tp = 0/0debug auth = 0/0debug finisher = 0/0debug heartbeatmap = 0/0debug perfcounter = 0/0debug asok = 0/0debug throttle = 0/0debug paxos = 0/0debug rgw = 0/0debug_bdev = 0/0debug_bluefs = 0/0debug_bluestore = 0/0debug_civetweb = 0/0debug_compressor = 0/0debug_crypto = 0/0debug_dpdk = 0/0debug_eventtrace = 0/0debug_fuse = 0/0debug_javaclient = 0/0debug_kinetic = 0/0debug_kstore = 0/0debug_leveldb = 0/0debug_mds = 0/0debug_mds_balancer = 0/0debug_mds_locker = 0/0debug_mds_log = 0/0debug_mds_log_expire = 0/0debug_mds_migrator = 0/0debug_memdb = 0/0debug_mgr = 0/0debug_mgrc = 0/0debug_none = 0/0debug_rbd_mirror = 0/0debug_rbd_replay = 0/0debug_refs = 0/0debug_reserver = 0/0debug_rocksdb = 0/0debug_striper = 0/0debug_xio = 0/0ms_bind_before_connect = truemon_osd_min_down_reporters = 13osd_recovery_max_active = 1osd_recovery_max_single_start = 1osd_recovery_sleep = 0.5osd_recovery_op_priority = 3osd_client_op_priority = 63osd_max_backfills = 1bluestore rocksdb options = compression=kNoCompression,max_write_buffer_number=32,min_write_buffer_number_to_merge=2,recycle_log_file_num=32,compaction_style=kCompactionStyleLevel,write_buffer_size=67108864,target_file_size_base=67108864,max_background_compactions=31,level0_file_num_compaction_trigger=8,level0_slowdown_writes_trigger=32,level0_stop_writes_trigger=64,max_bytes_for_level_base=536870912,compaction_threads=32,max_bytes_for_level_multiplier=8,flusher_threads=8,compaction_readahead_size=2097152[osd]osd mkfs type = xfsosd op threads = 8ms crc data = falsebluestore_shard_finishers = truebluestore_cache_autotune = falsebluestore_cache_kv_ratio = 0.2bluestore_cache_meta_ratio = 0.8bluestore_csum_type = nonebluestore extent map shard max size = 200bluestore extent map shard min size = 50osd map share max epochs = 100osd memory target = 4294967296osd op num shards = 8osd op num threads per shard = 2osd min pg log entries = 10osd max pg log entries = 10osd pg log dups tracked = 10osd pg log trim min = 10osd scrub begin hour = 0osd scrub end hour = 6osd scrub chunk min = 1osd scrub chunk max = 1osd scrub sleep = 3osd deep scrub interval = 2419200osd_mon_heartbeat_interval = 40throttler_perf_counter = false[mon]mgr modules = dashboardmon allow pool delete = true[mds]mds cache size = 250000[client]   //client 部分在客户端配置生效rbd cache = truerbd cache size = 335544320rbd_cache_max_dirty = 360994944rbd_cache_target_dirty = 190663296rbd_cache_max_dirty_age = 10rbd_op_threads = 1rbd_cache_writethrough_until_flush = falserbd_qos_iops_limit = 3000