1. 安装Docker Desktop
官网地址
2. 下载linux内核更新包(安装好Docker重启之后,会自动提示安装它)
官网地址
3. 重启计算机后打开docker desktop
4. win + r 打开 cmd窗口
5. 拉取镜像
docker pull ubuntu:16.04
6. 创建桥接网络(Docker默认使用桥接,此处亦使用桥接)
将网络名称命名为hadoop
docker network create -d bridge hadoop
7. 替换镜像源
挂载本地文件至docker
- 将本地文件挂载至镜像ubuntu:16.04,并启动容器,打开交互式终端
此处将目录E:COURSESpark 挂载至 目录 /home
将本地文件挂载至镜像ubuntu:16.04,并启动容器,打开交互式终端
docker run -v E:COURSESpark:/home -it ubuntu:16.04
- 在目录E:COURSESpark下创建文件sourcelist.txt,文件内容如下:
阿里云镜像源(Ubuntu)其他系统镜像源还请自行寻找,注意替换时系统版本须一致
deb http://mirrors.aliyun.com/ubuntu/ xenial main
deb-src http://mirrors.aliyun.com/ubuntu/ xenial main
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main
deb http://mirrors.aliyun.com/ubuntu/ xenial universe
deb-src http://mirrors.aliyun.com/ubuntu/ xenial universe
deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates universe
deb http://mirrors.aliyun.com/ubuntu/ xenial-security main
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main
deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe
deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security universe - 在cmd窗口中执行命令
使用挂载到/home下的文件(sourcelist.txt)替换文件/etc/apt/sources.list
mv /home/sourcelist.txt /etc/apt/sources.list
解释
参数
含义
- i
交互式
-t
terminal(终端)
-v
指定挂载(源)目录到(目标)目录
5. 安装openssh、jdk等
运行如下命令:
apt-get update
apt install vim
apt install net-tools
apt install openjdk-8-jdk
apt install scala
apt-get install openssh-server
# 生成密钥
ssh-keygen -t rsa -P "“
# 将公钥追加到authorized_keys 文件中
cat .ssh/id_rsa.pub >> .ssh/authorized_keys
# 启动 SSH 服务
service ssh start
# 免密登录自己
ssh 127.0.0.1
# 自动启动 SSH 服务,将命令service ssh start写入文件~/.bashrc中
vim ~/.bashrc
service ssh start
6. hadoop 下载地址
清华软件镜像站清华镜像站中hadoop地址
# 在哪个目录下执行以下命令,文件便下载到哪个目录
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/stable/hadoop-3.3.2.tar.gz
7. 解压hadoop
# 解压文件hadoop-3.3.2.tar.gz至目录/usr/local/
tar -zxvf hadoop-3.3.2.tar.gz -C /usr/local/
# 文件重命名
mv /usr/local/hadoop-3.3.2 /usr/local/hadoop
8. 配置hadoop
- 修改 /etc/profile
vim /etc/profile
在该文件中添加如下内容:
#java
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_PREFIX=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_LIBEXEC_DIR=$HADOOP_HOME/libexec
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH
export HADOOP_CONF_DIR=$HADOOP_PREFIX/etc/hadoop
export HDFS_DATANODE_USER=root
export HDFS_DATANODE_SECURE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export HDFS_NAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
# 使变量生效
source /etc/profile
- 修改hadoop-env.sh
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
在文件中添加如下内容:
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
3.修改core-site.xml
vim /usr/local/hadoop/etc/hadoop/core-site.xml
将原文件的替换为如下内容:
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://h01:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop3/hadoop/tmp</value>
</property>
</configuration>
4.修改hdfs-site.xml
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
将原文件的替换为如下内容:
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop3/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.namenode.data.dir</name>
<value>/home/hadoop3/hadoop/hdfs/data</value>
</property>
</configuration>
5.修改mapred-site.xml
vim /usr/local/hadoop/etc/hadoop/mapred-site.xml
将原文件的替换为如下内容:
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/local/hadoop/etc/hadoop,
/usr/local/hadoop/share/hadoop/common/*,
/usr/local/hadoop/share/hadoop/common/lib/*,
/usr/local/hadoop/share/hadoop/hdfs/*,
/usr/local/hadoop/share/hadoop/hdfs/lib/*,
/usr/local/hadoop/share/hadoop/mapreduce/*,
/usr/local/hadoop/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop/share/hadoop/yarn/*,
/usr/local/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
6.修改yarn-site.xml
vim /usr/local/hadoop/etc/hadoop/yarn-site.xml
将原文件的替换为如下内容:
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>h01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
7. 修改workers文件
vim /usr/local/hadoop/etc/hadoop/workers
填入如下内容(伪分布式)
h01
h02
名称
地位
h01
主节点
h02
从节点
8. 保存镜像(另外打开一个cmd窗口[第二个cmd窗口])
# 当前容器 可使用命令 docker ps 查看镜像ID
docker commit -m 说明 -a 作者 当前容器 目标镜像
本人此处为
docker commit -m “haddop” -a “hadoop“ 5cd9799528ec hdp
9. 运行master主节点容器
docker run -it --network hadoop -h "h01" --name "h01" -p 9870:9870 -p 8088:8088 hdp /bin/bash
9. 运行slave从节点容器(另外打开一个cmd窗口[第三个cmd窗口])
docker run -it --network hadoop -h "h02" --name "h02" hdp /bin/bash
10. 格式化(第二个cmd窗口下)
/usr/local/hadoop/bin/hadoop namenode -format
11. 启动集群(第二个cmd窗口下)
/usr/local/hadoop/sbin/start-all.sh
12. 查看集群状态(第二个cmd窗口下)
1. 当前进程
jps
2. 查看状态
/usr/local/hadoop/bin/hadoop dfsadmin -report
3. 通过web端
地址栏输入
127.0.0.1:8088
13. 测试wordcount(第二个cmd窗口下)
- 创建输入文件
/usr/local/hadoop/bin/hadoop dfs -mkdir /input
/usr/local/hadoop/bin/hadoop dfs -copyFromLocal /usr/local/hadoop/LICENSE.txt /input - 运行实例
/usr/local/hadoop/bin/hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.0.jar wordcount /input /output - 查看结果
/usr/local/hadoop/bin/hadoop dfs -ls /output
/usr/local/hadoop/bin/hadoop dfs -cat /output/part-r-00000
附录
1. 连接容器
docker exec -it 容器ID /bin/bash
2. 删除镜像
docker rmi 镜像名