一、Hadoop集群搭建
1、安装虚拟机
1、安装vmtools
hadoop@ubuntu:sudo apt-get install open-vm-tools-desktop -y
2、安装vim编辑器
hadoop@ubuntu:sudo apt install vim
2、安装jdk
1、解压安装包
hadoop@ubuntu:~$ sudo tar -zxvf jdk-8u171-linux-x64.tar.gz -C /usr/local
2、修改环境变量
hadoop@ubuntu:~$ sudo vim ~/.bashrc
#JAVA
export JAVA_HOME=/usr/local/jdk1.8.0_121
PATH=$PATH:$JAVA_HOME/bin
3、环境变量生效
hadoop@ubuntu:~$ source ~/.bashrc
4、查看jdk是否安装成功
hadoop@ubuntu:~$ java -version
java version "1.8.0_121"
Java(TM) SE Runtime Environment (build 1.8.0_121-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.121-b13, mixed mode)
3、安装hadoop
1、解压安装包
hadoop@ubuntu:~$ sudo tar -zxvf hadoop-2.7.7.tar.gz -C /usr/local
2、修改环境变量
hadoop@ubuntu:~$ sudo vim ~/.bashrc
#Hadoop
export HADOOP_HOME=/usr/local/hadoop-2.7.7
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
3、环境变量生效
hadoop@ubuntu:~$ source ~/.bashrc
4、克隆两个子节点
jdk和hadoop无需安装配置
5、配置主机名、静态IP地址、主机名和ip映射
1、以root用户登录,分别修改3台虚拟的主机名伟maser、slave1、slave2
1、//第1台虚机主机名改为master
root@ubuntu:~# vi /etc/hostname
master
2、//第2台虚机主机名改为slave 1
root@ubuntu:~# vi /etc/hostname
slave1
root@ubuntu:~# reboot
3、//第3台虚机主机名改为slave2
root@ubuntu:~# vi /etc/hostname
slave2
root@ubuntu:~# reboot
2、以root用户登录,设置静态ip地址(3个节点都要配置)
1、//以mster为例,修改网络配置文件,方法如下:
root@master:~# vim /etc/netplan/01-network-manager-all.yaml
# Let NetworkManager manage all devices on this system
#network:
# version: 2
# renderer: NetworkManager
network:
ethernets:
ens33: # 配置的网卡名称
dhcp4: no # 关闭dhcp4
dhcp6: no # 关闭dhcp6
addresses: [192.168.126.143/24] # 设置本机IP地址及掩码
gateway4: 192.168.126.2 # 设置网关
nameservers:
addresses: [192.168.126.2, 114.114.114.114, 8.8.8.8] # 设置DNS
version: 2
注意:网关需与电脑主机vmnet8中网关相同
2、配置生效
root@master:~# netplan apply
3、配置ip与主机名的映射关系
//以root用户的登录,分别在3台虚机上配置ip与主机名的映射关系
root@master:~# vi /etc/hosts
127.0.0.1 localhost
192.168.126.143 master
192.168.126.146 slave1
192.168.126.147 slave2
root@slave1:~# vi /etc/hosts
127.0.0.1 localhost
192.168.126.143 master
192.168.126.146 slave1
192.168.126.147 slave2
root@slave2:~# vi /etc/hosts
127.0.0.1 localhost
192.168.126.143 master
192.168.126.146 slave1
192.168.126.147 slave2
6、配置免密ssh
1、以root登录,每台linux主机上均打开22端口
hadoop@master:~$ su - root
Password:
root@ master:~# vim /etc/ssh/ssh_config
port 22
2、每台linux主机安装openssh-server,并生成密钥对
//hadoop用户登录master
root@master:~# su - hadoop
hadoop@master:~$ sudo apt install openssh-server
hadoop@master:~$ ssh-keygen -t rsa
hadoop@master:~$ ls ~/.ssh
id_rsa id_rsa.pub
// hadoop用户登录slave1
root@slave1:~# su - hadoop
hadoop@slave1:~$ sudo apt install openssh-server
hadoop@slave1:~$ ssh-keygen -t rsa
hadoop@slave1:~$ ls ~/.ssh
id_rsa id_rsa.pub
//hadoop用户登录slave2
root@slave2:~# su - hadoop
hadoop@slave2:~$ sudo apt install openssh-server
hadoop@slave2:~$ ssh-keygen -t rsa
hadoop@slave2:~$ ls ~/.ssh
id_rsa id_rsa.pub
3、设置各节点免密登录
//hadoop用户登录master,在master节点上将公钥拷到一个特定文件authorized_keys中
hadoop@master:~/.ssh$ cp id_rsa.pub authorized_keys
//将master节点的authorized_keys传至slave1节点
hadoop@master:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave1:/home/hadoop/.ssh/authorized_keys
authorized_keys
// hadoop用户登录slave1,将slave1节点的公钥追加到authorized_keys中
hadoop@slave1:~/.ssh$ cat id_rsa.pub >> authorized_keys
//将slave1节点的authorized_keys传至slave2节点
hadoop@slave1:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave2:/home/hadoop/.ssh/authorized_keys
// hadoop用户登录slave2,将slave2节点的公钥追加到authorized_keys中
hadoop@slave2:~/.ssh$ cat id_rsa.pub >> authorized_keys
//将slave2节点中的authorized_keys分别传到slave1中
hadoop@slave2:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@slave1:/home/hadoop/.ssh/authorized_keys
hadoop@slave1's password:
authorized_keys 100% 1196 804.0KB/s 00:00
//将slave2节点中的authorized_keys分别传到master中
hadoop@slave2:~/.ssh$ scp /home/hadoop/.ssh/authorized_keys hadoop@master:/home/hadoop/.ssh/authorized_keys
hadoop@master's password:
authorized_keys
4、验证免密登录
使用ssh登录各节点测试是否免密
7、集群节点配置
1、配置master的xml文件
slaves、core-site.xml、hdfs-site.xml、mapred-site.xml、yarn-site.xml
1、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/slaves
slave1
slave2
2、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<!--hadoop用户创建tmp目录赋予读写执行权限-->
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop-2.7.7/data/</value>
<description>A base for other temporary directories.</description>
</property>
</configuration>
3、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 指定Hadoop辅助名称节点主机配置 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<property>
<name>dfs.datanode.directoryscan.throttle.limit.ms.per.sec</name>
<value>1000</value>
</property>
</configuration>
4、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
5、hadoop@master:~$ vim /usr/local/hadoop-2.7.7/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux_services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
</configuration>
2、检查yarn-env.sh 、mapred-env.sh、 hadoop-env.sh中均配置了JAVA_HOME的环境变量
# The java implementation to use.
export JAVA_HOME=/usr/local/jdk1.8.0_121
8、同步各节点配置
1、在slave1建立临时文件,并修改权限
hadoop@slave1:~$ cd /usr/local
hadoop@slave1:/usr/local$ sudo mkdir tmp
hadoop@slave1:/usr/local$ sudo chown -R hadoop tmp
hadoop@slave1:/usr/local$ chgrp hadoop tmp
hadoop@slave1:/usr/local$ chmod -R 777 tmp
2、scp传输文件
hadoop@master:/usr/local$ scp -r /usr/local/jdk1.8.0_121/ hadoop@slave1:/usr/local/tmp
hadoop@master:/usr/local$ scp -r /usr/local/hadoop-2.7.7/ hadoop@slave1:/usr/local/tmp
3、将临时文件移动到/usr/local
hadoop@slave1:/usr/local$ su - root
Password:
root@slave1:~# mv -f /usr/local/tmp/jdk1.8.0_121/ /usr/local/jdk1.8.0_121/
root@slave1:~# mv -f /usr/local/tmp/hadoop-2.7.7/ /usr/local/hadoop-2.7.7/
4、操作(1-3)在slave2上再执行一遍
9、启动集群
1、首次启动集群需格式化,再次启动不要再格式化
2、如果需要重新进行格式化,则需要3个步骤进行操作
hadoop@slave2:/usr/local/hadoop-2.7.7$ stop-all.sh
hadoop@slave2:/usr/local/hadoop-2.7.7/data$ rm -rf *
hadoop@master:/usr/local/hadoop-2.7.7/logs$ rm -rf *
hadoop@master:/usr/local/hadoop-2.7.7/logs$ hdfs namenode –format
21/08/19 19:47:22 INFO util.ExitUtil: Exiting with status 0
3、start-all.sh启动所有进程
hadoop@master:/usr/local/hadoop-2.7.7/logs$ start-all.sh
4、检查各节点是否正常
//检查master节点是否都正常
hadoop@master:/usr/local/hadoop-2.7.7/logs$ jps
7286 Jps
6631 NameNode
6874 SecondaryNameNode
7036 ResourceManager
//检查slave1节点是否都正常
hadoop@master:/usr/local/hadoop-2.7.7/logs$ ssh slave1
hadoop@slave1:~$ jps
2791 DataNode
3213 Jps
3039 NodeManager
hadoop@slave1:~$ exit
//检查slave2节点是否都正常
hadoop@master:/usr/local/hadoop-2.7.7/logs$ ssh slave2
hadoop@slave2:~$ jps
2801 NodeManager
2977 Jps
2553 DataNode
hadoop@slave2:~$ exit
10、测试HDFS和YARN
1、虚拟机中访问
二、Zookeeper集群搭建
1、解压安装文件
hadoop@master:~$ sudo rm -rf /usr/local/zookeeper/
hadoop@master:~$sudo mkdir /usr/local/zookeeper/
hadoop@master:~$sudo chown -R hadoop:hadoop /usr/local/zookeeper/
hadoop@master:~$sudo tar -zxvf ~/Downloads/apache-zookeeper-3.5.9-bin.tar.gz* -C /usr/local/zookeeper/
hadoop@master:~$cd /usr/local/zookeeper
hadoop@master:/usr/local/zookeeper$sudo chown -R hadoop:hadoop apache-zookeeper-3.5.9-bin/
hadoop@master:/usr/local/zookeeper$sudo mv apache-zookeeper-3.5.9-bin/ zookeeper
hadoop@master:/usr/local/zookeeper$ cd zookeeper/
hadoop@master:/usr/local/zookeeper/zookeeper$ ll
total 48
drwxr-xr-x 6 hadoop hadoop 4096 Nov 22 18:33 ./
drwxr-xr-x 3 hadoop hadoop 4096 Nov 22 18:34 ../
drwxr-xr-x 2 hadoop hadoop 4096 Jan 6 2021 bin/
drwxr-xr-x 2 hadoop hadoop 4096 Jan 6 2021 conf/
drwxr-xr-x 5 hadoop hadoop 4096 Jan 6 2021 docs/
drwxr-xr-x 2 hadoop hadoop 4096 Nov 22 18:34 lib/
-rw-r--r-- 1 hadoop hadoop 11358 Oct 5 2020 LICENSE.txt
-rw-r--r-- 1 hadoop hadoop 432 Jan 6 2021 NOTICE.txt
-rw-r--r-- 1 hadoop hadoop 1560 Jan 6 2021 README.md
-rw-r--r-- 1 hadoop hadoop 1347 Jan 6 2021 README_packaging.txt
2、配置环境变量
hadoop@master:/usr/local$sudo vim ~/.bashrc
#Zookeeper
export ZK_HOME=/usr/local/zookeeper/zookeeper
export PATH=$PATH:$ZK_HOME/bin
hadoop@master:/usr/local$source ~/.bashrc
3、指定目录下创建data和datalog文件夹
hadoop@master:/usr/local/zookeeper/zookeeper$mkdir data
hadoop@master:/usr/local/zookeeper/zookeeper$mkdir datalog
4、配置zoo.cfg文件
hadoop@master:/usr/local/zookeeper/zookeeper$cd conf
hadoop@master:/usr/local/zookeeper/zookeeper/conf$cp zoo_sample.cfg zoo.cfg
hadoop@master:/usr/local/zookeeper/zookeeper/conf$vim zoo.cfg
#Thenumberofmillisecondsofeachtick
tickTime=2000
#Thenumberofticksthattheinitial
#synchronizationphasecantake
initLimit=10
#Thenumberofticksthatcanpassbetween
#sendingarequestandgettinganacknowledgement
syncLimit=5
#thedirectorywherethesnapshotisstored.
#donotuse/tmpforstorage,/tmphereisjust
#examplesakes.
dataDir=/usr/local/zookeeper/zookeeper/data
dataLogDir=/usr/local/zookeeper/zookeeper/datalog
#theportatwhichtheclientswillconnect
clientPort=2181
#themaximumnumberofclientconnections.
#increasethisifyouneedtohandlemoreclients
server.1=192.168.126.153:2888:3888 ——此处3个ip地址配置集群环境的实际ip地址
server.2=192.168.126.154:2889:3889
server.3=192.168.126.155:2890:3890
maxClientCnxns=60
#
#Besuretoreadthemaintenancesectionofthe
#administratorguidebeforeturningonautopurge.
#
#http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
#ThenumberofsnapshotstoretainindataDir
#autopurge.snapRetainCount=3
#Purgetaskintervalinhours
#Setto"0"todisableautopurgefeature
#autopurge.purgeInterval=1
5、配置同步至其他节点
1、scp传输文件
hadoop@master:/usr/local$ scp -r /usr/local/zookeeper/ hadoop@slave1:/usr/local/tmp
2、将临时目录下的文件移动到/usr/local
hadoop@slave1:~$ sudo mv -f /usr/local/tmp/zookeeper/ /usr/local/zookeeper/
3、配置环境变量
root@slave1:~#su - hadoop
hadoop@slave1:~$ sudo vim ~/.bashrc
#Zookeeper
export ZK_HOME=/usr/local/zookeeper/zookeeper
export PATH=$PATH:$ZK_HOME/bin
4、环境变量生效
hadoop@slave1:~$ source ~/.bashrc
5、将操作(1-4)在slave2上再执行一遍
6、创建myid文件
hadoop@master:~$vim /usr/local/zookeeper/zookeeper/data/myid
1
hadoop@slave1:~$vim /usr/local/zookeeper/zookeeper/data/myid
2
hadoop@slave2:~$vim /usr/local/zookeeper/zookeeper/data/myid
3
7、启动zookeeper
启动master的zookeeper
hadoop@master:~$ zkServer.sh start
启动slave1的zookeeper
hadoop@slave1:~$ zkServer.sh start
启动slave2的zookeeper
hadoop@slave2:~$ zkServer.sh start
8、测试连接zookeeper
hadoop@slave2:/usr/local/zookeeper/zookeeper/bin$zkCli.sh