1. 需求概览
所谓集群管理无外乎两点:是否有机器退出和加入、选举master
对于第一点,所有机器约定在父目录GroupMembers下创建临时目录节点,然后监听父目录节点的子节点变化消息,一旦有机器挂掉,该机器与zookeeper的连接断开,其所创建的代表该节点的存活状态的临时节点被删除,所有其他机器都将受到通知:某个兄弟目录被删除,于是,所有人都知道:有兄弟节点挂掉了,新机器加入也是类似的,所有机器收到通知:新兄弟目录加入,有多了个新兄弟节点。
对于第二点,我们稍微改变一下,所有机器创建临时顺序编号目录节点,每次选取编号最小的机器作为master就好了,当然,这只是其中的一种策略而已,选举策略完全可以由管理员自己制定,在分布式环境中,相同的业务应用分布在不同的机器上,有些业务逻辑(例如一些耗时的计算,网络IO处理),往往只需要让整个集群中的某一台机器进行执行,其余机器可以共享整个结果,这样可以大大减少重复劳动,提高性能。
利用Zookeeper的强一致性,能够保证在分布式高并发情况下节点创建的全局唯一性,即:同时又多个客户端请求创建/currentMaster节点,最终一定只有一个客户端请求能够创建成功,利用这个特性,就能很轻易的在分布式环境中进行集群选取了。(其实只要实现数据唯一性就可以做到选举)
1. HDFS没有利用zk做服务器的状态感知,判断死没死是心跳时间超过10分30秒
2. HBase是依赖于zk实现的,可以实时获取节点死亡
2. 实现
2.1 动态上下线
2.1.1 NameNode
package de.apps.动态上下线;
import org.apache.zookeeper.*;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* Copyright (c) 2019 bigdata ALL Rights Reserved
* Project: learning
* Package: de.apps.动态上下线
* Version: 1.0
*
* @author qingzhi.wu 2020/8/8 17:36
*/
public class NameNode {
private static ZooKeeper zk = null;
private static volatile HashSet<String> nodes = new HashSet<String>();
public static void main(String[] args) throws Exception {
//1.获取连接
zk = new ZooKeeper(ZookeeperConstant.ZK_CONNECTS,ZookeeperConstant.TIMEOUT_MILL,new MyWatch());
//2.判断servers节点是否存在,如果不存在则创建
if (zk.exists(ZookeeperConstant.SERVER_PATH,false) == null){
zk.create(ZookeeperConstant.SERVER_PATH,null, ZooDefs.Ids.OPEN_ACL_UNSAFE,CreateMode.PERSISTENT);
}
//3.注册监听事件和初始化Namenode管理的节点
getNodes(ZookeeperConstant.SERVER_PATH);
TimeUnit.SECONDS.sleep(Integer.MAX_VALUE);
zk.close();
}
static class MyWatch implements Watcher {
@Override
public void process(WatchedEvent event) {
Event.EventType type = event.getType();
String path = event.getPath();
if (type == Event.EventType.NodeChildrenChanged && ZookeeperConstant.SERVER_PATH.equals(path) ){
//节点上线或者下线机器
//判断节点上线还是下线机器
List<String> currentNodes = null;
try {
currentNodes = getNodes(ZookeeperConstant.SERVER_PATH,false);
} catch (KeeperException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
if (currentNodes.size() > nodes.size() ){
//上线
String diffNodes = getDiffNodes(currentNodes, nodes);
if (diffNodes == null){
throw new RuntimeException("没有找到,不同的节点,未知错误");
}
nodes.add(diffNodes);
System.out.println("NameNode: 上线了 -->" + diffNodes+" 节点");
System.out.println("NameNode:当前管理节点-->" + nodes);
}else {
//下线
String diffNodes = getDiffNodes(nodes, currentNodes);
nodes.remove(diffNodes);
System.out.println("NameNode: 下线了 -->" + diffNodes+" 节点");
System.out.println("NameNode:当前管理节点-->" + nodes);
}
}
//持续监听
try {
if (path != null){
zk.getChildren(ZookeeperConstant.SERVER_PATH, true);
}
} catch (KeeperException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
public static void getNodes(String path) throws KeeperException, InterruptedException {
List<String> children = zk.getChildren(ZookeeperConstant.SERVER_PATH, true);
nodes.addAll(children);
}
public static String getDiffNodes(Collection<String> currentNodes, Collection<String> oldNodes){
//这里因为我们的节点没几个 也就不需要使用算法优化了
if (currentNodes.size() > oldNodes.size()){
return getDiff(currentNodes,oldNodes);
}else if (currentNodes.size() == oldNodes.size()){
return null;
}else {
return getDiff(oldNodes,currentNodes);
}
}
public static String getDiff(Collection<String> maxs, Collection<String> mins){
for (String currentNode : maxs) {
if (!mins.contains(currentNode)){
return currentNode;
}
}
return null;
}
public static List<String> getNodes(String path,boolean watch) throws KeeperException, InterruptedException {
List<String> children = zk.getChildren(ZookeeperConstant.SERVER_PATH, watch);
return children;
}
}
2.1.2 DataNode
package de.apps.动态上下线;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
import java.util.concurrent.TimeUnit;
/**
* Copyright (c) 2019 bigdata ALL Rights Reserved
* Project: learning
* Package: de.apps.动态上下线
* Version: 1.0
*
* @author qingzhi.wu 2020/8/8 17:47
*/
public class DataNode {
private static ZooKeeper zk = null;
private static String id = "03";
private static String nodeName = ZookeeperConstant.SERVER_PATH +"/DataNode"+id;
public static void main(String[] args) throws Exception {
//1.获取连接
zk = new ZooKeeper(ZookeeperConstant.ZK_CONNECTS,ZookeeperConstant.TIMEOUT_MILL,null);
//2.上线节点,停掉的话,就是断开连接
zk.create(nodeName,null, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
TimeUnit.SECONDS.sleep(Integer.MAX_VALUE);
zk.close();
}
}
2.1.3 验证
2.2 集群高可用
我们应该知道HDFS中的NameNode是可以高可用的,他借助的就是Zookeeper那么我们是否可以实现一下这个业务场景
我们先看一看我们的思路
NameNode启动之后先去创建锁
谁创建成功锁,谁有资格去Active节点创建自己
这个时候没有抢到锁的就应该是standby
2.2.1 NameNode
package de.apps.高可用;
import de.apps.动态上下线.ZookeeperConstant;
import org.apache.zookeeper.*;
import java.util.concurrent.TimeUnit;
/**
* Copyright (c) 2019 bigdata ALL Rights Reserved
* Project: learning
* Package: de.apps.高可用
* Version: 1.0
*
* @author qingzhi.wu 2020/8/8 19:16
*/
public class NameNode {
private static ZooKeeper zk = null;
private static final String HOSTNAME = "hadoop01";
private static final String PARENT = "/cluster_ha";
private static final String ACTIVE = PARENT + "/active";
private static final String STANDBY = PARENT + "/standby";
private static final String LOCK = PARENT + "/lock";
private static final String activeMasterPath = ACTIVE + "/" + HOSTNAME;
private static final String standByMasterPath = STANDBY + "/" + HOSTNAME;
public static void main(String[] args) throws Exception {
zk = new ZooKeeper(ZookeeperConstant.ZK_CONNECTS,ZookeeperConstant.TIMEOUT_MILL,new MyWatch());
//保证节点存在
if (!nodeExists(PARENT)){
createNode(PARENT,PARENT,CreateMode.PERSISTENT);
}
//保证节点存在
if (!nodeExists(ACTIVE)){
createNode(ACTIVE,ACTIVE,CreateMode.PERSISTENT);
}
//保证节点存在
if (!nodeExists(STANDBY)){
createNode(STANDBY,STANDBY,CreateMode.PERSISTENT);
}
//启动后,先判断,active下面的节点有节点吗
if (zk.getChildren(ACTIVE,null).size()==0){
//说明active节点下没有节点了,要去创建锁
zk.exists(LOCK,true);
//创建锁,退出就别人抢占
createNode(LOCK,HOSTNAME,CreateMode.EPHEMERAL);
}else {
createNode(standByMasterPath,HOSTNAME,CreateMode.EPHEMERAL);
System.out.println(HOSTNAME + " 发现active存在,所以自动成为standby");
zk.getChildren(ACTIVE,true);
}
TimeUnit.SECONDS.sleep(Integer.MAX_VALUE);
}
private static boolean nodeExists(String path) throws KeeperException, InterruptedException {
return zk.exists(PARENT,false)== null ?false:true;
}
static class MyWatch implements Watcher{
@Override
public void process(WatchedEvent event) {
String path = event.getPath();
Event.EventType type = event.getType();
if (ACTIVE.equals(path) && Event.EventType.NodeChildrenChanged.equals(type)){
try {
if (zk.getChildren(ACTIVE,null).size()==0){
//创建监听,因为我不一定创建成功
zk.exists(LOCK,true);
createZNode(LOCK,HOSTNAME,CreateMode.EPHEMERAL,"lock");
}
} catch (Exception e) {
e.printStackTrace();
}
}else if(LOCK.equals(path) && type == Event.EventType.NodeCreated) { //锁相关
//获取锁节点的数据
String trueData = null;
try {
byte[] data = zk.getData(LOCK, false, null);
trueData = new String(data);
} catch (Exception e) {
}
if(trueData.equals(HOSTNAME)){
createNode(activeMasterPath,HOSTNAME,CreateMode.EPHEMERAL);
try {
if (!nodeExists(standByMasterPath)){
System.out.println(HOSTNAME + " 成功切换自己的状态为active");
zk.delete(standByMasterPath,-1);
}else {
System.out.println(HOSTNAME + " 竞选成为active状态");
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (KeeperException e) {
e.printStackTrace();
}
}
}
}
}
private static void createZNode(String path, String data, CreateMode cm, String message) {
try {
zk.create(path, data.getBytes(), ZooDefs.Ids.OPEN_ACL_UNSAFE, cm);
} catch (Exception e) {
if (message.equals("lock")) {
System.out.println("我没有抢到锁,等下一次,我要抢占锁");
}
}
}
private static void createNode(String path, String data, CreateMode mode) {
try {
zk.create(path,data.getBytes(),ZooDefs.Ids.OPEN_ACL_UNSAFE,mode);
} catch (Exception e) {
System.out.println("创建节点失败,或者节点已经存在");
}
}
}
2.2.1 验证