一、hadoop集群的kerberos认证

1、hadoop101节点下的三个主体认证

kinit -kt  /etc/security/keytab/nn.service.keytab  nn/hadoop101

  

2、修改hadoop101节点下的配置文件进行分发

core-site.xml

<!-- Kerberos主体到系统用户的映射机制 -->
<property>
  <name>hadoop.security.auth_to_local.mechanism</name>
  <value>MIT</value>
</property>

<!-- Kerberos主体到系统用户的具体映射规则 -->
<property>
  <name>hadoop.security.auth_to_local</name>
  <value>
    RULE:[2:$1/$2@$0]([ndj]n\/.*@EXAMPLE\.COM)s/.*/hdfs/
    RULE:[2:$1/$2@$0]([rn]m\/.*@EXAMPLE\.COM)s/.*/yarn/
    RULE:[2:$1/$2@$0](jhs\/.*@EXAMPLE\.COM)s/.*/mapred/
    DEFAULT
  </value>
</property>

<!-- 启用Hadoop集群Kerberos安全认证 -->
<property>
  <name>hadoop.security.authentication</name>
  <value>kerberos</value>
</property>

<!-- 启用Hadoop集群授权管理 -->
<property>
  <name>hadoop.security.authorization</name>
  <value>true</value>
</property>

<!-- Hadoop集群间RPC通讯设为仅认证模式 -->
<property>
  <name>hadoop.rpc.protection</name>
  <value>authentication</value>
</property>

  

hdfs-site.xml

<!-- 访问DataNode数据块时需通过Kerberos认证 -->
<property>
  <name>dfs.block.access.token.enable</name>
  <value>true</value>
</property>

<!-- NameNode服务的Kerberos主体,_HOST会自动解析为服务所在的主机名 -->
<property>
  <name>dfs.namenode.kerberos.principal</name>
  <value>nn/_HOST@EXAMPLE.COM</value>
</property>

<!-- NameNode服务的Kerberos密钥文件路径 -->
<property>
  <name>dfs.namenode.keytab.file</name>
  <value>/etc/security/keytab/nn.service.keytab</value>
</property>

<!-- Secondary NameNode服务的Kerberos主体 -->
<property>
  <name>dfs.secondary.namenode.keytab.file</name>
  <value>/etc/security/keytab/sn.service.keytab</value>
</property>

<!-- Secondary NameNode服务的Kerberos密钥文件路径 -->
<property>
  <name>dfs.secondary.namenode.kerberos.principal</name>
  <value>sn/_HOST@EXAMPLE.COM</value>
</property>

<!-- NameNode Web服务的Kerberos主体 -->
<property>
  <name>dfs.namenode.kerberos.internal.spnego.principal</name>
  <value>HTTP/_HOST@EXAMPLE.COM</value>
</property>

<!-- WebHDFS REST服务的Kerberos主体 -->
<property>
  <name>dfs.web.authentication.kerberos.principal</name>
  <value>HTTP/_HOST@EXAMPLE.COM</value>
</property>

<!-- Secondary NameNode Web UI服务的Kerberos主体 -->
<property>
  <name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name>
  <value>HTTP/_HOST@EXAMPLE.COM</value>
</property>

<!-- Hadoop Web UI的Kerberos密钥文件路径 -->
<property>
  <name>dfs.web.authentication.kerberos.keytab</name>
  <value>/etc/security/keytab/spnego.service.keytab</value>
</property>

<!-- DataNode服务的Kerberos主体 -->
<property>
  <name>dfs.datanode.kerberos.principal</name>
  <value>dn/_HOST@EXAMPLE.COM</value>
</property>

<!-- DataNode服务的Kerberos密钥文件路径 -->
<property>
  <name>dfs.datanode.keytab.file</name>
  <value>/etc/security/keytab/dn.service.keytab</value>
</property>

<!-- 配置NameNode Web UI 使用HTTPS协议 -->
<property>
  <name>dfs.http.policy</name>
  <value>HTTPS_ONLY</value>
</property>

<!-- 配置DataNode数据传输保护策略为仅认证模式 -->
<property>
  <name>dfs.data.transfer.protection</name>
  <value>authentication</value>
</property>

  

yarn-site.xml

<!-- Resource Manager 服务的Kerberos主体 -->
<property>
  <name>yarn.resourcemanager.principal</name>
  <value>rm/_HOST@EXAMPLE.COM</value>
</property>

<!-- Resource Manager 服务的Kerberos密钥文件 -->
<property>
  <name>yarn.resourcemanager.keytab</name>
  <value>/etc/security/keytab/rm.service.keytab</value>
</property>

<!-- Node Manager 服务的Kerberos主体 -->
<property>
  <name>yarn.nodemanager.principal</name>
  <value>nm/_HOST@EXAMPLE.COM</value>
</property>

<!-- Node Manager 服务的Kerberos密钥文件 -->
<property>
  <name>yarn.nodemanager.keytab</name>
  <value>/etc/security/keytab/nm.service.keytab</value>
</property>

  

mapred-site.xml

<!-- 历史服务器的Kerberos主体 -->
<property>
  <name>mapreduce.jobhistory.keytab</name>
  <value>/etc/security/keytab/jhs.service.keytab</value>
</property>

<!-- 历史服务器的Kerberos密钥文件 -->
<property>
  <name>mapreduce.jobhistory.principal</name>
  <value>jhs/_HOST@EXAMPLE.COM</value>
</property>

  分发配置文件到集群各个节点

 

3、.配置hdfs使用https安全协议

生成密钥对

keytool -keystore /etc/security/keytab/keystore -alias jetty -genkey -keyalg RSA

解释

Keytool是java数据证书的管理工具,使用户能够管理自己的公/私钥对及相关证书。
-keystore    指定密钥库的名称及位置(产生的各类信息将存在.keystore文件中)
-genkey(或者-genkeypair)      生成密钥对
-alias  为生成的密钥对指定别名,如果没有默认是mykey
-keyalg  指定密钥的算法 RSA/DSA 默认是DSA

  

[root@hadoop101 keytab]# keytool -keystore /etc/security/keytab/keystore -alias jetty -genkey -keyalg RSA
输入密钥库口令:  
再次输入新口令: 
您的名字与姓氏是什么?
  [Unknown]:  hu
您的组织单位名称是什么?
  [Unknown]:  yi
您的组织名称是什么?
  [Unknown]:  yi
您所在的城市或区域名称是什么?
  [Unknown]:  sz
您所在的省/市/自治区名称是什么?
  [Unknown]:  sz
该单位的双字母国家/地区代码是什么?
  [Unknown]:  cn
CN=hu, OU=yi, O=yi, L=sz, ST=sz, C=cn是否正确?
  [否]:  yes

  

修改keystore访问权限

chown -R root:hadoop /etc/security/keytab/keystore

   chmod 660 /etc/security/keytab/keystore

 

将证书分发到各个节点

xsync /etc/security/keytab/keystore

 

4、修改hadoop的http配置文件

mv $HADOOP_HOME/etc/hadoop/ssl-server.xml.example $HADOOP_HOME/etc/hadoop/ssl-server.xml

vim $HADOOP_HOME/etc/hadoop/ssl-server.xml

<!-- SSL密钥库路径 -->
<property>
  <name>ssl.server.keystore.location</name>
  <value>/etc/security/keytab/keystore</value>
</property>

<!-- SSL密钥库密码 -->
<property>
  <name>ssl.server.keystore.password</name>
  <value>123456</value>
</property>

<!-- SSL可信任密钥库路径 -->
<property>
  <name>ssl.server.truststore.location</name>
  <value>/etc/security/keytab/keystore</value>
</property>

<!-- SSL密钥库中密钥的密码 -->
<property>
  <name>ssl.server.keystore.keypassword</name>
  <value>123456</value>
</property>

<!-- SSL可信任密钥库密码 -->
<property>
  <name>ssl.server.truststore.password</name>
  <value>123456</value>
</property>

  分发到各个节点

 

5、配置yarn使用LinuxContainerExecutor

修改所有节点的container-executor所有者和权限,要求其所有者为root,所有组为hadoop(启动NodeManger的yarn用户的所属组),权限为6050。其默认路径为$HADOOP_HOME/bin

 

[root@hadoop102 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/bin/container-executor
[root@hadoop102 ~]# chmod 6050 /opt/module/hadoop-3.1.3/bin/container-executor

[root@hadoop103 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/bin/container-executor
[root@hadoop103 ~]# chmod 6050 /opt/module/hadoop-3.1.3/bin/container-executor

[root@hadoop104 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/bin/container-executor
[root@hadoop104 ~]# chmod 6050 /opt/module/hadoop-3.1.3/bin/container-executor

修改所有节点的container-executor.cfg文件的所有者和权限,要求该文件及其所有的上级目录的所有者均为root,所有组为hadoop(启动NodeManger的yarn用户的所属组),权限为400。其默认路径为$HADOOP_HOME/etc/hadoop

 

[root@hadoop102 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg
[root@hadoop102 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop
[root@hadoop102 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc
[root@hadoop102 ~]# chown root:hadoop /opt/module/hadoop-3.1.3
[root@hadoop102 ~]# chown root:hadoop /opt/module
[root@hadoop102 ~]# chmod 400 /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg

[root@hadoop103 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg
[root@hadoop103 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop
[root@hadoop103 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc
[root@hadoop103 ~]# chown root:hadoop /opt/module/hadoop-3.1.3
[root@hadoop103 ~]# chown root:hadoop /opt/module
[root@hadoop103 ~]# chmod 400 /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg

[root@hadoop104 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg
[root@hadoop104 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc/hadoop
[root@hadoop104 ~]# chown root:hadoop /opt/module/hadoop-3.1.3/etc
[root@hadoop104 ~]# chown root:hadoop /opt/module/hadoop-3.1.3
[root@hadoop104 ~]# chown root:hadoop /opt/module
[root@hadoop104 ~]# chmod 400 /opt/module/hadoop-3.1.3/etc/hadoop/container-executor.cfg

  

vim $HADOOP_HOME/etc/hadoop/container-executor.cfg

yarn.nodemanager.linux-container-executor.group=hadoop
banned.users=hdfs,yarn,mapred
min.user.id=1000
allowed.system.users=
feature.tc.enabled=false

  

vim $HADOOP_HOME/etc/hadoop/yarn-site.xml

<!-- 配置Node Manager使用LinuxContainerExecutor管理Container -->
<property>
  <name>yarn.nodemanager.container-executor.class</name>
  <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
</property>

<!-- 配置Node Manager的启动用户的所属组 -->
<property>
  <name>yarn.nodemanager.linux-container-executor.group</name>
  <value>hadoop</value>
</property>

<!-- LinuxContainerExecutor脚本路径 -->
<property>
  <name>yarn.nodemanager.linux-container-executor.path</name>
  <value>/opt/module/hadoop-3.1.3/bin/container-executor</value>
</property>

分发container-executor.cfg和yarn-site.xml文件

 

五、安全模式下启动集群

local

$HADOOP_LOG_DIR

hdfs:hadoop

drwxrwxr-x

local

dfs.namenode.name.dir

hdfs:hadoop

drwx------

local

dfs.datanode.data.dir

hdfs:hadoop

drwx------

local

dfs.namenode.checkpoint.dir

hdfs:hadoop

drwx------

local

yarn.nodemanager.local-dirs

yarn:hadoop

drwxrwxr-x

local

yarn.nodemanager.log-dirs

yarn:hadoop

drwxrwxr-x

 

 

$HADOOP_LOG_DIR(所有节点)该变量位于hadoop-env.sh文件,默认值为 ${HADOOP_HOME}/logs

[root@hadoop102 ~]# chown hdfs:hadoop /opt/module/hadoop-3.1.3/logs/
[root@hadoop102 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/

[root@hadoop103 ~]# chown hdfs:hadoop /opt/module/hadoop-3.1.3/logs/
[root@hadoop103 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/

[root@hadoop104 ~]# chown hdfs:hadoop /opt/module/hadoop-3.1.3/logs/
[root@hadoop104 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/

  

dfs.namenode.name.dir(NameNode节点)该参数位于hdfs-site.xml文件,默认值为file://${hadoop.tmp.dir}/dfs/name

[root@hadoop102 ~]# chown -R hdfs:hadoop /opt/module/hadoop-3.1.3/data/dfs/name/
[root@hadoop102 ~]# chmod 700 /opt/module/hadoop-3.1.3/data/dfs/name/

  

dfs.datanode.data.dir(DataNode节点)该参数为于hdfs-site.xml文件,默认值为file://${hadoop.tmp.dir}/dfs/data

[root@hadoop102 ~]# chown -R hdfs:hadoop /opt/module/hadoop-3.1.3/data/dfs/data/
[root@hadoop102 ~]# chmod 700 /opt/module/hadoop-3.1.3/data/dfs/data/

[root@hadoop103 ~]# chown -R hdfs:hadoop /opt/module/hadoop-3.1.3/data/dfs/data/
[root@hadoop103 ~]# chmod 700 /opt/module/hadoop-3.1.3/data/dfs/data/

[root@hadoop104 ~]# chown -R hdfs:hadoop /opt/module/hadoop-3.1.3/data/dfs/data/
[root@hadoop104 ~]# chmod 700 /opt/module/hadoop-3.1.3/data/dfs/data/

  

dfs.namenode.checkpoint.dir(SecondaryNameNode节点)该参数位于hdfs-site.xml文件,默认值为file://${hadoop.tmp.dir}/dfs/namesecondary

[root@hadoop104 ~]# chown -R hdfs:hadoop /opt/module/hadoop-3.1.3/data/dfs/namesecondary/
[root@hadoop104 ~]# chmod 700 /opt/module/hadoop-3.1.3/data/dfs/namesecondary/

 

yarn.nodemanager.local-dirs(NodeManager节点)该参数位于yarn-site.xml文件,默认值为file://${hadoop.tmp.dir}/nm-local-dir

[root@hadoop102 ~]# chown -R yarn:hadoop /opt/module/hadoop-3.1.3/data/nm-local-dir/
[root@hadoop102 ~]# chmod -R 775 /opt/module/hadoop-3.1.3/data/nm-local-dir/

[root@hadoop103 ~]# chown -R yarn:hadoop /opt/module/hadoop-3.1.3/data/nm-local-dir/
[root@hadoop103 ~]# chmod -R 775 /opt/module/hadoop-3.1.3/data/nm-local-dir/

[root@hadoop104 ~]# chown -R yarn:hadoop /opt/module/hadoop-3.1.3/data/nm-local-dir/
[root@hadoop104 ~]# chmod -R 775 /opt/module/hadoop-3.1.3/data/nm-local-dir/

 

yarn.nodemanager.log-dirs(NodeManager节点)该参数位于yarn-site.xml文件,默认值为$HADOOP_LOG_DIR/userlogs

 

 

[root@hadoop102 ~]# chown yarn:hadoop /opt/module/hadoop-3.1.3/logs/userlogs/
[root@hadoop102 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/userlogs/

[root@hadoop103 ~]# chown yarn:hadoop /opt/module/hadoop-3.1.3/logs/userlogs/
[root@hadoop103 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/userlogs/

[root@hadoop104 ~]# chown yarn:hadoop /opt/module/hadoop-3.1.3/logs/userlogs/
[root@hadoop104 ~]# chmod 775 /opt/module/hadoop-3.1.3/logs/userlogs/

 

  

启动hdfs

1.单点

sudo -i -u hdfs hdfs --daemon start namenode
[root@hadoop102 ~]# sudo -i -u hdfs hdfs --daemon start datanode
[root@hadoop103 ~]# sudo -i -u hdfs hdfs --daemon start datanode
[root@hadoop104 ~]# sudo -i -u hdfs hdfs --daemon start datanode
[root@hadoop104 ~]# sudo -i -u hdfs hdfs --daemon start secondarynamenode

2.群起

先有ssh

再者修改配置

vim $HADOOP_HOME/sbin/start-dfs.sh 再顶部添加

HDFS_DATANODE_USER=hdfs
HDFS_NAMENODE_USER=hdfs
HDFS_SECONDARYNAMENODE_USER=hdfs

[root@hadoop102 ~]# start-dfs.sh

查看页面

 

6.修改hdfs特定路径访问权限

hdfs

/

hdfs:hadoop

drwxr-xr-x

hdfs

/tmp

hdfs:hadoop

drwxrwxrwxt

hdfs

/user

hdfs:hadoop

drwxrwxr-x

hdfs

yarn.nodemanager.remote-app-log-dir

yarn:hadoop

drwxrwxrwxt

hdfs

mapreduce.jobhistory.intermediate-done-dir

mapred:hadoop

drwxrwxrwxt

hdfs

mapreduce.jobhistory.done-dir

mapred:hadoop

drwxrwx---

  

 

创建hdfs/hadoop主体,执行以下命令并按照提示输入密码

 

[root@hadoop102 ~]# kadmin.local -q "addprinc hdfs/hadoop"

认证hdfs/hadoop主体,执行以下命令并按照提示输入密码

 

 

[root@hadoop102 ~]# kinit hdfs/hadoop

修改/、/tmp、/user路径

[root@hadoop102 ~]# hadoop fs -chown hdfs:hadoop / /tmp /user
[root@hadoop102 ~]# hadoop fs -chmod 755 /
[root@hadoop102 ~]# hadoop fs -chmod 1777 /tmp
[root@hadoop102 ~]# hadoop fs -chmod 775 /user

参数yarn.nodemanager.remote-app-log-dir位于yarn-site.xml文件,默认值/tmp/logs

 

[root@hadoop102 ~]# hadoop fs -chown yarn:hadoop /tmp/logs
[root@hadoop102 ~]# hadoop fs -chmod 1777 /tmp/logs

 

  

参数mapreduce.jobhistory.intermediate-done-dir位于mapred-site.xml文件,默认值为/tmp/hadoop-yarn/staging/history/done_intermediate,需保证该路径的所有上级目录(除/tmp)的所有者均为mapred,所属组为hadoop,权限为770

[root@hadoop102 ~]# hadoop fs -chown -R mapred:hadoop /tmp/hadoop-yarn/staging/history/done_intermediate
[root@hadoop102 ~]# hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging/history/done_intermediate

[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/staging/history/
[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/staging/
[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/

[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/staging/history/
[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/staging/
[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/

参数mapreduce.jobhistory.done-dir位于mapred-site.xml文件,默认值为/tmp/hadoop-yarn/staging/history/done,需保证该路径的所有上级目录(除/tmp)的所有者均为mapred,所属组为hadoop,权限为770

[root@hadoop102 ~]# hadoop fs -chown -R mapred:hadoop /tmp/hadoop-yarn/staging/history/done
[root@hadoop102 ~]# hadoop fs -chmod -R 750 /tmp/hadoop-yarn/staging/history/done

[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/staging/history/
[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/staging/
[root@hadoop102 ~]# hadoop fs -chown mapred:hadoop /tmp/hadoop-yarn/

[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/staging/history/
[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/staging/
[root@hadoop102 ~]# hadoop fs -chmod 770 /tmp/hadoop-yarn/

 

启动yarn

vim $HADOOP_HOME/sbin/start-yarn.sh

在顶部增加如下内容

YARN_RESOURCEMANAGER_USER=yarn
YARN_NODEMANAGER_USER=yarn
[root@hadoop103 ~]# start-yarn.sh

  

启动历史服务器

[root@hadoop102 ~]# sudo -i -u mapred mapred --daemon start historyserver

  

6.安全集群使用说明