hadoop自动安装脚本

  大数据老师说hadoop安装非常容易失败,我在安装过程中确实踩了不少坑,仅仅按照林子雨老师的教程可能会出问题。所以我总结了遇到的问题,写了一个自动安装脚本,直接能在ubuntu上装好伪分布式hadoop。

  因为水平有限,如果在你的的电脑上不能正常安装也请见谅。sudo userdel -r hadoop命令可以删除新建的hadoop用户。

  本脚本可以顺便装java,如果你安装过java,我默认环境变量的配置是在/etc/profile里的,如果配置在~/.bashrc里,脚本会视为未安装java。所以最好把java的环境变量写在/etc/profile里。(话说如果配置在~/.bashrc里新建的用户也不能直接用。。。)所以最好的情况是一台刚刚装好系统的电脑运行此脚本,这样能避免很多杂七杂八的问题。

  终端输入touch hadoop_autoinstall.sh, 把代码复制到里面,按提示运行就行。注意,这装的是伪分布式的hadoop,所以单机hadoop的功能无法使用,要想运行单机hadoop,要把core-site.xml和hdfs-site.xml里面<configuration></configuration>间的东西删去。

#!/bin/bash
#author:	hahahehe
#time  :	2021.3.14

#在ubuntu里运行,作者在ubuntu20.04里测试成功
#把hadoop的压缩包,“jdk的压缩包”和这个脚本放到同一目录下,最好把这3个全复制到/home/xxx里(xxx为用户名)
#Hadoop官网:https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/ ,作者用的是hadoop-3.2.2
#注意,是jdk的压缩包,不是jre的压缩包,jdk压缩包要到官网去下,比如作者用的jdk15:https://www.oracle.com/java/technologies/javase-jdk15-downloads.html
#选择“Linux x64 Compressed Archive”下载
#如果java安装过了那就不需要jdk了
#查看自己java是否成功安装就在终端里运行命令:java -verson
#----运行脚本----
#到脚本所在目录,右键打开终端并输入:source hadoop_autoinstall.sh
#当脚本提示重新登陆后,切换用户为hadoop,然后点开桌面上的hadoop文件夹,右键打开终端,再次输入:source hadoop_autoinstall.sh

installJava() {
    echo "Checking jdk*.tar.gz file..."
    FILES_LIST="$(ls jdk*.tar.gz)"
    if [ "${FILES_LIST[0]}" != "" ]; then
        echo "Extracting jdk*.tar.gz file to /usr/local..."
        sudo tar -xzvf ${FILES_LIST[0]} -C /usr/local
        tmp=${FILES_LIST[0]}
        jdk_files=${tmp%%_*}		#去除后缀
        sudo chmod -R 777 /usr/local/$jdk_files
        cd /usr/local
        sudo mv $jdk_files java
        echo "Finish..."
    else
        echo "Cannot find jdk*.tar.gz file, have you putting this file in current dir?"
        sleep 1m
        exit
    fi
}

configureJava() {
    java_home=$(grep "export JAVA_HOME=.*" /etc/profile)
    sudo chmod 777 /etc/profile
    if [ "$java_home" != "" ];then
        echo "JAVA_HOME exist..."
    else
        echo "JAVA_HOME not exist!"
        echo "Installing java..."
        sudo rm -rf /usr/local/java	#防止冲突
        installJava
        echo "Configuring environment variable: JAVA_HOME..."
        echo "Using default path: /usr/local/java"
        java_home="export JAVA_HOME=/usr/local/java"
        sudo echo $java_home >> /etc/profile
        echo "Finish!"
    fi
    classpath=$(grep "export CLASSPATH=.*\$JAVA_HOME.*" /etc/profile)
    if [ "$classpath" != "" ];then
        echo "CLASSPATH exist..."
    else
        echo "CLASSPATH not exist!"
        echo "Configuring environment variable: JAVA_HOME..."
        classpath="export CLASSPATH=.:\$JAVA_HOME/lib/dt.jar:\$JAVA_HOME/lib/tools.jar"
        sudo echo $classpath >> /etc/profile
        echo "Finish!"
    fi
    path_java=$(grep "export PATH=.*\$JAVA_HOME.*" /etc/profile)
    if [ "$path_java" != "" ];then
        echo "PATH-JAVA exist..."
    else
        echo "PATH-JAVA not exist!"
        echo "Configuring environment variable: PATH..."
        path_java="export PATH=\$PATH:\$JAVA_HOME/bin"
        sudo echo $path_java >> /etc/profile
        echo "Finish!"
    fi
    
    echo "Reloading profile..."
    source /etc/profile
    echo -e "\033[0;36;1mInstall java successful!\033[0m"
    echo "Checking java version..."
    echo
    java -version
    return 1
}

main() {
	cur_path="$PWD" 	#先保存当前目录
	sudo echo "Checking java..."
	configureJava
	echo "done..."

	echo "Checking user..."
	if [ "$USER" != "hadoop" ]; then
		echo "User 'hadoop' not created..."
		echo "Creating user 'hadoop'..."
		sudo useradd -m hadoop -s /bin/bash
		sudo passwd hadoop
		sudo adduser hadoop sudo
		echo "Finish"
		echo "User 'hadoop' created..."
		echo "Moving script and hadoop compressed archive to '/home/hadoop'..."
		cd $cur_path
		sudo cp -r hadoop* /home/hadoop
		echo "Finish!"
		echo -e '\033[0;33;1mPlease relogin as user "hadoop" and cd to "/home/hadoop" to rerun this script!\033[0m'
		sleep 1m
		exit
	fi
	echo "done..."

	echo "Login as user 'hadoop'..."
	sudo rm -rf /usr/local/hadoop		#避免冲突
	sudo apt-get update
	echo "Installing openssh"
	sudo apt-get -y install openssh-server
	sudo apt-get -y install openssh-client
	echo "Finish"

	#echo "After you input 'yes', input 'exit' to log out localhost..."
	#ssh localhost
	echo "Generating SSH key..."
	echo -e 'Default save path: ~/.ssh, \033[0;33;1mpress <Enter> to continue...\033[0m'
	cd ~/.ssh/
	ssh-keygen -t rsa -P ""
	echo "Finish!"
	echo "Setting password free login..."
	cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
	echo "Finish..."

	cd /home/hadoop
	echo "Extracting the hadoop packages..."
	tmp1=$(ls hadoop-*.tar.gz)
	sudo tar -zvxf $tmp1 -C /usr/local
	tmp2=${tmp1%.*}
	tmp3=${tmp2%.*}		#去除tar.gz
	cd /usr/local
	sudo mv $tmp3 hadoop
	echo "Finish!"
	cd /usr/local/hadoop
	sudo chmod -R 777 /usr/local/hadoop
	
	echo "Configuring hadoop-env.sh..."
	sudo echo 'export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"' >> ./etc/hadoop/hadoop-env.sh
	sudo echo "export JAVA_HOME=$JAVA_HOME" >> ./etc/hadoop/hadoop-env.sh
	echo "Finish!"
	echo "Configuring core-site.xml..."
	sudo sed -i '/<configuration>/a\<property>\n<name>hadoop.tmp.dir</name>\n<value>file:/usr/local/hadoop/tmp</value>\n<description>Abase for other temporary directories.</description>\n</property>\n<property>\n<name>fs.defaultFS</name>\n<value>hdfs://localhost:9000</value>\n</property>' ./etc/hadoop/core-site.xml
	echo "Finish!"
	echo "Configuring hdfs-site.xml..."
	sudo sed -i '/<configuration>/a\<property>\n<name>dfs.replication</name>\n<value>1</value>\n</property>\n<property>\n<name>dfs.namenode.name.dir</name>\n<value>file:/usr/local/hadoop/tmp/dfs/name</value>\n</property>\n<property>\n<name>dfs.datanode.data.dir</name>\n<value>file:/usr/local/hadoop/tmp/dfs/data</value>\n</property>' ./etc/hadoop/hdfs-site.xml
	echo "Finish!"

	echo "Configuring environment variables for hadoop..."
	echo "export HADOOP_HOME=/usr/local/hadoop" >> ~/.bashrc
	echo "export HADOOP_COMMON_LIB_NATIVE_DIR=\$HADOOP_HOME/lib/native" >> ~/.bashrc
	echo 'export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"' >> ~/.bashrc
	echo "export HADOOP_YARN_HOME=\$HADOOP_HOME" >> ~/.bashrc
	echo "export HADOOP_MAPRED_HOME=\$HADOOP_HOME" >> ~/.bashrc
	echo "export HADOOP_HDFS_HOME=\$HADOOP_HOME" >> ~/.bashrc
	echo "export YARN_HOME=\$HADOOP_HOME" >> ~/.bashrc
	echo "export JAVA_LIBRARY_PATH=\$HADOOP_HOME/lib/native" >> ~/.bashrc
	echo "export PATH=\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin\$PATH" >> ~/.bashrc
	echo "Finish!"
	source ~/.bashrc
}
read -p "请先打开本脚本阅读作者的提示,不然很有可能无法成功安装!继续安装请输入'yes'" ans
if [ "$ans" != "yes" ]; then
	exit
fi
main
sudo chmod 777 /etc/hosts
sudo echo "127.0.0.1	master" >> /etc/hosts
hInfo=($(hadoop version))	#检查安装成果
if [ "${hInfo[0]}" == "Hadoop" ]; then
	echo -e "\033[0;36;1mSuccessfully install Hadoop! version: ${hInfo[1]}...\033[0m"
	cd /usr/local/hadoop
	hadoop namenode -format
	./sbin/start-all.sh
	firefox http://localhost:8088
	#firefox http://localhost:50070
else
	sudo rm -rf /usr/local/hadoop
	echo -e "\033[0;41;1mSorry, some error(s) occured...\033[0m"
	echo -e "\033[0;41;1mMaybe you should install Hadoop manually...\033[0m"
fi