- cluster模式是为了解决单机redis存储容量有限的问题。比如我们有一些4核8G的机子,如果使用主从复制集群或者哨兵集群模式,那么理论上最多只能存储8G的数据。如果现在要在缓存中存储20G的数据,就需要使用到cluster模式。cluster模式将数据按照一定的规则分配存储在不同的机器上。
- 使用cluster集群,只需要将redis配置文件中的
cluster-enable
配置打开即可。 - 每个cluster集群中至少要3个master节点才能正常运行。
特点
- 多个redis节点网络互联,数据共享
- 所有节点都是一主N从,其中从节点不提供服务,仅作为备份。
- 不支持同时处理多个key(如MSET/MGET)的操作,因为redis需要把key均匀的分布在各个节点上,在并发量很高的情况下同时创建多个key会降低集群性能并导致不可预测的行为
- 支持在线增加、删除节点
- 客户端可以连接任何一个主节点进行读写
- 集群测试
在 一台机器上启动3个主节点,每个主节点带一个从节点,一共6个节点。
配置文件为:
#3个主节点配置文件
redis-7001.conf
redis-7002.conf
redis-7003.conf
#3个从节点配置文件
redis-7004.conf
redis-7005.conf
redis-7006.conf
#修改相应的配置项 以7001为例
daemonize yes
port 7001
pidfile /var/run/redis_7001.pid
logfile "7001.log"
dbfilename dump7001.rdb
cluster-enabled yes
cluster-config-file nodes-7001.conf
cluster-node-timeout 15000
- 启动6个节点
[root@localhost bin]# redis-server /opt/redisconfig/redis7001.conf
[root@localhost bin]# redis-server /opt/redisconfig/redis7002.conf
[root@localhost bin]# redis-server /opt/redisconfig/redis7003.conf
[root@localhost bin]# redis-server /opt/redisconfig/redis7004.conf
[root@localhost bin]# redis-server /opt/redisconfig/redis7005.conf
[root@localhost bin]# redis-server /opt/redisconfig/redis7006.conf
[root@localhost bin]#
[root@localhost bin]# ps -ef|grep redis
root 2291 1 0 10:58 ? 00:00:00 redis-server 127.0.0.1:7001 [cluster]
root 2296 1 0 10:59 ? 00:00:00 redis-server 127.0.0.1:7002 [cluster]
root 2301 1 0 10:59 ? 00:00:00 redis-server 127.0.0.1:7003 [cluster]
root 2306 1 0 10:59 ? 00:00:00 redis-server 127.0.0.1:7004 [cluster]
root 2311 1 0 10:59 ? 00:00:00 redis-server 127.0.0.1:7005 [cluster]
root 2316 1 0 10:59 ? 00:00:00 redis-server 127.0.0.1:7006 [cluster]
root 2321 2265 0 10:59 pts/1 00:00:00 grep --color=auto redis
- 从redis源文件目录的src目录下(/opt/redis-5.0.8/src/)拷贝redis-trib.rb文件到安装目录下,这里是/usr/local/bin/
cp /opt/redis-5.0.8/src/redis-trib.rb /usr/local/bin/
- 创建集群 redis当前版本为redis_version:5.0.8 使用redis-cli创建
./redis-cli --cluster create 127.0.0.1:7001 127.0.0.1:7002 127.0.0.1:7003 127.0.0.1:7004 127.0.0.1:7005 127.0.0.1:7006 --cluster-replicas 1
启动成功!!!
查看nodes-7001.conf 文件
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 master - 0 1586576184000 3 connected 10923-16383
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586576182000 4 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586576184030 2 connected 5461-10922
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 slave dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 0 1586576183000 6 connected
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 myself,master - 0 1586576183000 1 connected 0-5460
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586576185038 5 connected
vars currentEpoch 6 lastVoteEpoch 0
- 随便选择一个节点 安装集群方式登陆
redis-cli -c -p 7001 #-c选项 按照集群方式登陆
#查看集群信息
127.0.0.1:7001> CLUSTER INFO
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:1
cluster_stats_messages_ping_sent:206
cluster_stats_messages_pong_sent:218
cluster_stats_messages_sent:424
cluster_stats_messages_ping_received:213
cluster_stats_messages_pong_received:206
cluster_stats_messages_meet_received:5
cluster_stats_messages_received:424
#查看集群节点 和node配置文件一样
127.0.0.1:7001> CLUSTER nodes
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 master - 0 1586576547000 3 connected 10923-16383
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586576542000 4 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586576544748 2 connected 5461-10922
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 slave dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 0 1586576547769 6 connected
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 myself,master - 0 1586576546000 1 connected 0-5460
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586576545755 5 connected
#设值
127.0.0.1:7001> set k1 v1
-> Redirected to slot [12706] located at 127.0.0.1:7003
OK
127.0.0.1:7003> set k2 v2
-> Redirected to slot [449] located at 127.0.0.1:7001
OK
127.0.0.1:7001> set k3 v3
OK
127.0.0.1:7001>
127.0.0.1:7001> set k4 v4
-> Redirected to slot [8455] located at 127.0.0.1:7002
OK
127.0.0.1:7002> set k5 v5
-> Redirected to slot [12582] located at 127.0.0.1:7003
OK
127.0.0.1:7003> set k6 v6
-> Redirected to slot [325] located at 127.0.0.1:7001
OK
#取值
127.0.0.1:7001> get k1
-> Redirected to slot [12706] located at 127.0.0.1:7003
"v1"
127.0.0.1:7003> get k2
-> Redirected to slot [449] located at 127.0.0.1:7001
"v2"
127.0.0.1:7001> get k3
"v3"
127.0.0.1:7001> get k4
-> Redirected to slot [8455] located at 127.0.0.1:7002
"v4"
127.0.0.1:7002> get k5
-> Redirected to slot [12582] located at 127.0.0.1:7003
"v5"
127.0.0.1:7003> get k6
-> Redirected to slot [325] located at 127.0.0.1:7001
"v6"
#从上面的设置和取值操作可以看出
#cluster集群模式是去中心化的,设值时会分配到不同的节点中,取值是会从不同的节点中获取。客户端会重定向到不同的节点
- 集群增加节点
还是按照上面的方式修改配置文件,并启动服务
在连接到集群的客户端中执行下面命令
#该节点以master的身份加入集群
CLUSTER MEET 127.0.0.1 7007
127.0.0.1:7002> CLUSTER meet 127.0.0.1 7007
OK
127.0.0.1:7002>
127.0.0.1:7002>
127.0.0.1:7002> CLUSTER nodes
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586577788000 4 connected
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586577788706 5 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 myself,master - 0 1586577789000 2 connected 5461-10922
621945c00f3e8986295d0aa95f271cf9c5fd7a5f 127.0.0.1:7007@17007 master - 0 1586577789110 0 connected #master身份加入
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 slave dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 0 1586577790721 6 connected
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 master - 0 1586577790000 3 connected 10923-16383
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 master - 0 1586577789714 1 connected 0-5460
#再加入一个节点 7008
CLUSTER MEET 127.0.0.1 7008
- 更改集群节点的身份 把7008变为7007的从节点
必须先登陆到7008节点上并且7008当前必须是一个从节点,才能执行切换命令
- 删除节点
#不能删除自己
127.0.0.1:7008> CLUSTER FORGET 92c1cad6dd2adbf1f83938c3ef0b0b99bde3b22d
(error) ERR I tried hard but I can't forget myself...
#不能删除自己的master
127.0.0.1:7008> CLUSTER FORGET 621945c00f3e8986295d0aa95f271cf9c5fd7a5f
(error) ERR Can't forget my master!
#可以删除其他master
#现在来把master节点7003删除 注意现在7003的从节点是7006
127.0.0.1:7008> CLUSTER FORGET dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15
OK #7003被删除
127.0.0.1:7008> CLUSTER NODES
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 slave - 0 1586578971517 3 connected #7006现在没有从主节点
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586578975545 1 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586578971000 2 connected 5461-10922
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586578973529 2 connected
621945c00f3e8986295d0aa95f271cf9c5fd7a5f 127.0.0.1:7007@17007 master - 0 1586578973000 7 connected
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 master - 0 1586578974537 1 connected 0-5460
92c1cad6dd2adbf1f83938c3ef0b0b99bde3b22d 127.0.0.1:7008@17008 myself,slave 621945c00f3e8986295d0aa95f271cf9c5fd7a5f 0 1586578972000 0 connected
#也可以删除从节点
- 模拟master节点挂掉
先查看节点信息
127.0.0.1:7001> CLUSTER NODES
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 master - 0 1586676121401 3 connected 10923-16383 #模拟7003主节点挂点
92c1cad6dd2adbf1f83938c3ef0b0b99bde3b22d 127.0.0.1:7008@17008 slave 621945c00f3e8986295d0aa95f271cf9c5fd7a5f 0 1586676122409 7 connected
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586676118000 4 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586676118000 2 connected 5461-10922
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 slave dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 0 1586676118000 6 connected #7006从节点的主节点是7003
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 myself,master - 0 1586676120000 1 connected 0-5460
621945c00f3e8986295d0aa95f271cf9c5fd7a5f 127.0.0.1:7007@17007 master - 0 1586676119388 7 connected
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586676122000 5 connected
#现在模拟7003主节点挂掉
#kill掉7003主节点的进程
#再查看集群信息
127.0.0.1:7001> CLUSTER NODES
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 master,fail - 1586676429015 1586676426699 3 disconnected #7003已经下线
92c1cad6dd2adbf1f83938c3ef0b0b99bde3b22d 127.0.0.1:7008@17008 slave 621945c00f3e8986295d0aa95f271cf9c5fd7a5f 0 1586676455000 7 connected
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586676456000 4 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586676457000 2 connected 5461-10922
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 master - 0 1586676458000 8 connected 10923-16383 #7006变成了主节点
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 myself,master - 0 1586676453000 1 connected 0-5460
621945c00f3e8986295d0aa95f271cf9c5fd7a5f 127.0.0.1:7007@17007 master - 0 1586676458994 7 connected
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586676457987 5 connected
#现在又把7003节点启动起来
#再查看集群信息
127.0.0.1:7001> CLUSTER NODES
dad6e7e94e7d7a08ebaa0024bc5c32f18406ef15 127.0.0.1:7003@17003 slave 64c680e880d29257c4526b9e210eae93e2465447 0 1586676656000 8 connected #7003节点启动后 变成了7006的从节点
92c1cad6dd2adbf1f83938c3ef0b0b99bde3b22d 127.0.0.1:7008@17008 slave 621945c00f3e8986295d0aa95f271cf9c5fd7a5f 0 1586676655000 7 connected
9c6e3319d2aeeb383df6f5d9eb4f6eb5818af85a 127.0.0.1:7004@17004 slave d14b5c6791230896ad037dcac9b803bcdfc8541b 0 1586676656688 4 connected
f0abb788e567719e325a43639463ed5806332217 127.0.0.1:7002@17002 master - 0 1586676656000 2 connected 5461-10922
64c680e880d29257c4526b9e210eae93e2465447 127.0.0.1:7006@17006 master - 0 1586676657593 8 connected 10923-16383 #7006变成了7003的主节点
d14b5c6791230896ad037dcac9b803bcdfc8541b 127.0.0.1:7001@17001 myself,master - 0 1586676657000 1 connected 0-5460
621945c00f3e8986295d0aa95f271cf9c5fd7a5f 127.0.0.1:7007@17007 master - 0 1586676654000 7 connected
ae2b18ad37729287000ef0cdbc0e65cb1431c2c3 127.0.0.1:7005@17005 slave f0abb788e567719e325a43639463ed5806332217 0 1586676657693 5 connected
以上可以看出,当主节点挂掉后,它的从节点会变成主节点继续提供 服务。当重新启动该节点时,会成为新主节点的从节点。特别注意,这里的主节点一定是分配了存储槽的主节点。没有分配到存储槽的主节点挂掉,它的从节点也不会变成主节点,集群也可以正常使用。
- 当一个主节点和它的从节点一起挂掉时,集群将不可用(一定是已经分配了存储槽的节点)
127.0.0.1:7001> get k2
(error) CLUSTERDOWN The cluster is down
- 重新分配slot
因为7007是新添加的master节点,并没有分配slot。
#重新分配slot
redis-cli --cluster reshard 127.0.0.1:7007
此时再查看集群信息:7007节点已经分配到slot!!!