环境 rhel6.5
hadoop 安装与测试
[root@server6 ~]# useradd -u 800 hadoop
##id随意,需要注意的是所有节点id必须一致
[root@server6 ~]# id hadoop
uid=800(hadoop) gid=800(hadoop) 组=800(hadoop
[root@server6 ~]# su - hadoop
[hadoop@server6 ~]$ ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz
[hadoop@server6 ~]$ tar -zxf hadoop-2.7.3.tar.gz
[hadoop@server6 ~]$ tar -zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server6 ~]$ ln -s hadoop-2.7.3 hadoop
[hadoop@server6 ~]$ ln -s jdk1.7.0_79/ jdk
[hadoop@server6 ~]$ source ~/.bash_profile
[hadoop@server6 ~]$ echo $JAVA_HOME
/home/hadoop/jdk
[hadoop@server6 ~]$ cd hadoop
[hadoop@server6 hadoop]$ mkdir input
[hadoop@server6 hadoop]$ cp etc/hadoop/.xml input/
[hadoop@server6 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
[hadoop@server6 hadoop]$ ls output/
part-r-00000 _SUCCESS
[hadoop@server6 hadoop]$ cat output/

1 dfsadmin
[hadoop@server6 hadoop]$ vim etc/hadoop/hadoop-env.sh

The java implementation to use.

export JAVA_HOME=/home/hadoop/jdk
##注意java变量路径,否则后面系统起不来。
[hadoop@server6 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount input output
伪分布式操作(需要ssh免密)
[hadoop@server6 hadoop]$ vim etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://172.25.35.6:9000</value>
</property>
</configuration>
[hadoop@server6 hadoop]$ vim etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
[hadoop@server6 hadoop]$ vim etc/hadoop/slaves
172.25.35.6 ##将默认localhost改为master本主机ip地址
ssh 免密
[hadoop@server6 hadoop]$ exit
logout
[root@server6 ~]# passwd hadoop
[root@server6 ~]# su - hadoop
[hadoop@server6 ~]$ ssh-keygen
[hadoop@server6 ~]$ ssh-copy-id 172.25.35.6
[hadoop@server6 ~]$ ssh 172.25.35.6 ##测试登陆,不需要输密码就ok
也可这样:
[hadoop@server6 ~]$ ssh-keygen
得到公钥后:
[hadoop@server6 ~]$ cd .ssh/
[hadoop@server6 .ssh]$ ls
[hadoop@server6 .ssh]$ mv id_rsa.pub authorized_keys
[hadoop@server6 hadoop]$ bin/hdfs namenode -format ##进行格式化
[hadoop@server6 hadoop]$ sbin/start-dfs.sh ##启动hadoop
[hadoop@server6 hadoop]$ jps ##用jps检验各后台进程是否成功启动,看到以下四个进程,就成功了
6376 DataNode
6274 NameNode
6544 SecondaryNameNode
6687 Jps
浏览器输入:172.25.35.5:70050

伪分布的操作:
[hadoop@server6 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server6 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server6 hadoop]$ bin/hdfs dfs -put input test ##上传本地的 input 并改名为 test
那怎么查看呢?用下面的命令
[hadoop@server6 hadoop]$ bin/hdfs dfs -cat output/*
...
within 1
without 1
work 1
writing, 8
you 9
[hadoop@server6 hadoop]$ bin/hdfs dfs -get output . ##将output下载到本地
[hadoop@server6 hadoop]$ ls
bin include libexec logs output sbin
etc lib LICENSE.txt NOTICE.txt README.txt share
[hadoop@server6 hadoop]$ bin/hdfs dfs -rm -r output ##删除
17/10/24 21:11:24 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.
Deleted output

hadoop 完全分布模式搭建
用nfs网络文件系统,就不用每个节点安装一遍了,需要rpcbind和nfs开启
hadoop@server6 hadoop]$ sbin/stop-dfs.sh
[hadoop@server6 hadoop]$ logout
[root@server6 ~]# yum install -y rpcbind
root@server6 ~]# yum install -y nfs-utils
[root@server6 ~]# vim /etc/exports
/home/hadoop (rw,anonuid=800,anongid=800)
[root@server6 ~]# /etc/init.d/rpcbind start
[root@server6 ~]# /etc/init.d/rpcbind status
[root@server6 ~]# /etc/init.d/nfs restart
启动 NFS 服务: [确定]
关掉 NFS 配额: [确定]
启动 NFS mountd: [确定]
启动 NFS 守护进程:
[root@server6 ~]# showmount -e
Export list for server6:
/home/hadoop

[root@server6 ~]# exportfs -v
/home/hadoop <world>(rw,wdelay,root_squash,no_subtree_check,anonuid=800,anongid=800)
[hadoop@server6 hadoop]$ vim etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://masters</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>172.25.35.7:2181,172.25.35.8:2181</value>
</property>
</configuration>