- 下载安装 Hadoop
# 下载,移动目录
wget https:///hadoop/common/hadoop-3.1.4/hadoop-3.1.4.tar.gz
tar -xzvf hadoop-3.1.4.tar.gz
sudo mv hadoop-3.1.4/ /usr/local/
sudo ln -sf /usr/local/hadoop-3.1.4 /usr/local/hadoop
sudo chown -R tqc:tqc /usr/local/hadoop # 修改为你的用户名
# 可以登录自己 `ssh localhost`
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
用vscode打开这个目录,然后把这篇博客里的内容粘贴进去
用启动,然后用jps查看是否成功启动Hadoop
(py36) ~ ᐅ start-all.sh
WARNING: Attempting to start all Apache Hadoop daemons as tqc in 10 seconds.
WARNING: This is not a recommended production deployment configuration.
WARNING: Use CTRL-C to abort.
Starting namenodes on [localhost]
Starting datanodes
Starting secondary namenodes [tianqi]
Starting resourcemanager
Starting nodemanagers
(py36) ~ ᐅ jps
1242466 Jps
1241523 DataNode
3368 NutstoreGUI
1241944 ResourceManager
1242280 NodeManager
1241726 SecondaryNameNode
发现没有启动Namenode
可能是没建相应的文件夹
mkdir -p /usr/local/hadoop/tmp/dfs/name
mkdir -p /usr/local/hadoop/tmp/dfs/data
第一次启动Hadoop之前需要对Namenode进行格式化
hdfs namenode -format
安装Scala
wget https://downloads.lightbend.com/scala/2.13.4/scala-2.13.4.tgz
tar -xzvf scala-2.13.4.tgz
sudo mv scala-2.13.4/ /usr/local
sudo ln -sf /usr/local/scala-2.13.4 /usr/local/scala
sudo chown -R tqc:tqc /usr/local/scala # 修改为你的用户名
安装 Spark
wget https:///spark/spark-3.0.1/spark-3.0.1-bin-hadoop2.7.tgz
tar -xzvf spark-3.0.1-bin-hadoop2.7.tgz
sudo mv spark-3.0.1-bin-hadoop2.7 /usr/local
sudo ln -sf /usr/local/spark-3.0.1-bin-hadoop2.7
sudo ln -sf /usr/local/spark-3.0.1-bin-hadoop2.7 /usr/local/spark
- 配置PYTHONPATH使pyspark可用
export PYTHONPATH=$PYTHONPATH:/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9-src.zip
Hive
https://blog.csdn.net/weixx3/article/details/94133847
https://zhuanlan.zhihu.com/p/125547605
















