################ 修改目录权限 #################### [root@vwmaster hadoop]# chown -R root:root hadoop260/
################ JDK ####################
[root@vwmaster hadoop260]# vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/bigdata/java/jdk180
################ hadoop fs 文件系统 #################### 集群配置时将 IP 改为 主机名 [root@vwmaster hadoop260]# vi etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://vwmaster:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/bigdata/hadoop/hadoop260</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
################ hadoop replicas 备份 ####################
dfs.replication 备份数量改为2
dfs.hosts 添加节点白名单
[root@vwmaster hadoop]# pwd
/opt/bigdata/hadoop
[root@vwmaster hadoop]# mkdir hdfs [root@vwmaster hadoop]# ls hadoop260 hdfs
[root@vwmaster hadoop]# cd hdfs/ [root@vwmaster hdfs]# mkdir namenode datanode [root@vwmaster hdfs]# ls datanode namenode
[root@vwmaster hadoop260]# vi etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!--
<property>
<name>dfs.hosts</name>
<value>/opt/bigdata/hadoop/hadoop260/etc/hadoop/slaves</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>vwmaster:50090</value>
</property>
<property>
<name>dfs.namenode.dir</name>
<value>/opt/bigdata/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.dir</name>
<value>/opt/bigdata/hadoop/hdfs/datanode</value>
</property>
-->
</configuration>
################ hadoop mapreduce 计算框架 ####################
mapreduce.jobhistory.address 添加jobhistory访问端口
mapreduce.jobhistory.webapp.address 添加jobhistory web访问端口
[root@vwmaster hadoop260]# cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
[root@vwmaster hadoop260]# vi etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--
<property>
<name>mapreduce.jobhistory.address</name>
<value>vwmaster:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>vwmaster:19888</value>
</property>
-->
</configuration>
################ hadoop yarn 管理调度 #################### yarn.log-aggregation.retain-seconds 添加yarn日志保留时间为7天(单位秒) yarn.nodemanager.aux-services.mapreduce.shuffle.class 添加指定shuffle计算具体类型 yarn.resourcemanager.hostname 添加yarn主机名 [root@vwmaster hadoop260]# vi etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>vwmaster</value>
</property>
<!--
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
-->
</configuration>
################ hadoop slaves 主机名 ####################
注意:hadoop 3.0 以上版本 slaves 更名为 workers
添加所有节点主机名 vwmaster:yarn.resourcemanager 添加所有节点主机名 vwjob:mapreduce.jobhistory 添加所有节点主机名 vwslave01/02:datanode [root@vwmaster hadoop260]# vi etc/hadoop/slaves
vwslave01
vwslave02
vwslave03
################ hadoop 环境变量 #################### [root@vwmaster hadoop260]# vi /etc/profile
export JAVA_HOME=/opt/bigdata/java/jdk180
export TOMCAT_HOME=/opt/bigdata/java/tomcat85
export NODE_HOME=/opt/bigdata/elk/node891
export HADOOP_HOME=/opt/bigdata/hadoop/hadoop260
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NODE_HOME/bin:$JAVA_HOME/bin:$TOMCAT_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
[root@vwmaster hadoop260]# source /etc/profile
################ hadoop 格式化 HDFS #################### [root@vwmaster hadoop260]# cd bin [root@vwmaster bin]# hdfs namenode -format ##见到以下信息说明格式化成功 /************************************************************ SHUTDOWN_MSG: Shutting down NameNode at vwmaster/20.0.0.100 ************************************************************/ ##如果格式化失败:需要去检查之前各项配置
################ hadoop-native库 ####################
[root@vwmaster bin]# cd /opt/install/hadoop
[root@vwmaster hadoop]# tar -xf hadoop-native-64-2.6.0.tar -C /opt/bigdata/hadoop/hadoop260/lib/native/
################ 启动 hadoop #################### [root@vwmaster hadoop]# cd /opt/bigdata/hadoop/hadoop260/bin [root@vwmaster bin]# start-dfs.sh [root@vwmaster bin]# start-yarn.sh [root@vwmaster bin]# stop-all.sh
################ 启动或关闭 hadoop 需要输入密码的解决方法 #################### [root@vwmaster bin]# cd ~ [root@vwmaster ~]# cd .ssh/ [root@vwmaster .ssh]# ls authorized_keys id_rsa id_rsa.pub known_hosts [root@vwmaster .ssh]# cat id_rsa.pub >> authorized_keys [root@vwmaster .ssh]# ssh localhost
################ 启动 jobhistory #################### [root@vwmaster bin]# cd ../sbin/ [root@vwmaster sbin]# jps [root@vwmaster sbin]# ./mr-jobhistory-daemon.sh start historyserver
################ 查看 hadoop 服务状态 #################### [root@vwmaster sbin]# jps 6800 NodeManager 7329 Jps 6387 DataNode 6548 SecondaryNameNode 6264 NameNode 6697 ResourceManager 7259 JobHistoryServer
http://20.0.0.100: 50070 8088 19888
#------------------------- 操作 hdfs -----------------------------------# 查看hdfs目录 如果存在警告 20/08/15 08:24:13 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes 解决方法:在log4j.properties中添加:log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
cd ~/kb08/data
hdfs dfs -ls / hdfs dfs -du [-s] /kb08/data 统计文件夹信息 hdfs dfs -mkdir /kb08 hdfs dfs -mkdir -p /kb08/data 创建多级目录 hdfs dfs -rmdir /kb08/data 删除目录
hdfs dfs -put ~/kb08/hadoop/story.csv /kb08/data 上传文件 hdfs dfs -get /kb08/data/story.csv ~ 下载文件 hdfs dfs -getmerge /kb08/data/* ~/merge.txt 合并下载 hdfs dfs -rm /kb08/data/story.csv hdfs dfs -cat /kb08/data/story.csv
touch append.txt vi append.txt hello guys, I am so hornored to see you agin let us enjoy this moment
hdfs dfs -appendToFile append.txt /kb08/data/story.csv 追加文件内容 hdfs dfs -chmod 777 /kb08/data/story.csv hdfs dfs -chown henry:root /kb08/hadoop/story.csv hdfs dfs -cp /kb08/hadoop/story.csv /kbcom/data hdfs dfs -mv /kb08/hadoop/story.csv /kbcom/data
hdfs dfs -setrep 3 /kb08/data/story.csv 设置文件备份数量(不能超过节点数量) hdfs dfsadmin -safemode get 查看安全模式 hdfs dfsadmin -safemode enter 进入安全模式 hdfs dfsadmin -safemode enter 离开安全模式
#--------------------- java 操作hadoop ----------------------# #依赖 <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <maven.compiler.source>1.8</maven.compiler.source> <maven.compiler.target>1.8</maven.compiler.target> <hadoop.version>2.6.0-cdh5.14.2</hadoop.version> </properties>
<dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> <scope>test</scope> </dependency> <!-- hadoop-common --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> <!-- hadoop-hdfs --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <!-- hadoop-client --> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <!-- log4j --> <dependency> <groupId>log4j</groupId> <artifactId>log4j</artifactId> <version>1.2.17</version> </dependency> </dependencies>
#--------------- 因为maven没提供hadoop仓库,所以手动添加 --------------------------# <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories>