1.虚拟机三台
2.jdk环境变量
jdk版本:jdk-8u144-linux-x64.tar
环境变量配置/etc/profile.d/env.sh
#JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_144
export PATH=$PATH:$JAVA_HOME/bin
配置完需要source /etc/profile.d/env.shHadoop环境变量
Hadoop版本:hadoop-2.7.2.tar
环境变量配置/etc/profile.d/env.sh
#HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
配置完需要source /etc/profile.d/env.shhadoop文件修改

hadoop集群发布
hadoop102 启动hadoop sbin/
hadoop103启动yarn sbin/

配置core-site.xml
<!-- 指定 HDFS 中 NameNode 的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop102:9000</value>
</property>
<!-- 指定 Hadoop 运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.2/data/tmp</value>
</property>配置hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 指定 Hadoop 辅助名称节点主机配置 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop104:50090</value>
</property>配置yarn-site.xml
<!-- Reducer 获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定 YARN 的 ResourceManager 的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop103</value>
</property>
<!-- 指定日志聚合的地址 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 指定日志保存7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>640800</value>
</property>配置mapred-site.xml
<!-- 指定 MR 运行在 Yarn 上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 历史服务器地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop102:10020</value>
</property>
<!-- 历史服务器WEB地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop102:19888</value>
</property>配置
export JAVA_HOME=/opt/module/jdk1.8.0_144配置
export JAVA_HOME=/opt/module/jdk1.8.0_144配置
export JAVA_HOME=/opt/module/jdk1.8.0_144配置slaves
hadoop102
hadoop103
hadoop104
配置完成将hadoop分发到hadoop103,hadoop104上
脚步方法
在根目录创建文件bin mkdir bin
在bin目录中touch xsync vi xsync
在xsync中写入
#!/bin/bash
#1 获取输入参数个数,如果没有参数,直接退出
pcount=$#
if((pcount==0)); then
echo no args;
exit;
fi
#2 获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname
#3 获取上级目录到绝对路径
pdir=`cd -P $(dirname $p1); pwd`
echo pdir=$pdir
#4 获取当前用户名称
user=`whoami`
#5 循环
for((host=103; host<105; host++)); do
echo ------------------- hadoop$host --------------
rsync -rvl $pdir/$fname $user@hadoop$host:$pdir
done虚拟机需要安装rsync:yum install rsync -y 安装完成可以进行文件的分发
xsync hadoop-2.7.2安装LZO,是hadoop可以使用LZO压缩
上传JAR包hadoop-lzo-0.4.20.jar 将jar包放到/opt/module/hadoop-2.7.2/share/hadoop/common 并将jar分发到其他虚拟机中 Hadoop103,hadoop104 修改配置文件core-site.xml
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>LZO练习
上传文件到hdfs 中

执行wordcount程序
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /input /output1建立LZO索引:会在hdfs中出现索引文件
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /input/bigtable.lzo在使用索引文件执行wordcount程序
hadoop jar /opt/module/hadoop-2.7.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.2.jar wordcount /input /output2安装Zookeeper
Zookeeper安装一定要注意myid上传zookeeper到虚拟机中zookeeper-3.4.10.tar 解压文件到指定目录tart zxvf zookeeper-3.4.10.tar -C /opt/module 配置文件
在zookeeper文件下创建文件夹mkdir -p zkData 在zkData中创建文件myidvim myid并输入id 2 到conf目录下配置文件zoo.cfg
mv zoo_sample.cfg zoo.cfg修改zookeeper唯一标识id路径
dataDir=/opt/module/zookeeper-3.4.10/zkData添加集群配置
#######################cluster##########################
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
server.4=hadoop104:2888:3888将配置好的zookeeper分发到每一台机器中并且修改zkData/myid的值
编写zookeeper集群启动脚步
vim #!/bin/bash
case $1 in
"start"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ start"
done
};;
"stop"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ stop"
done
};;
"status"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ status"
done
};;
esacFlume安装

1.上传解压
2.修改配置文件
mv .template
vim
添加
export JAVA_HOME=/opt/module/jdk1.8.0_144Kafka的安装
1.上传解压
2.修改配置文件
在kafka的目录下创建文件夹mkdir logs 到conf目录下修改文件vi server.properties
## id不能重复
broker.id=0
## 删除topic功能
delete.topic.enable=true
## kafka日志存储路径
log.dirs=/opt/module/kafka/logs
#配置连接 Zookeeper 集群地址
zookeeper.connect=hadoop102:2181,hadoop103:2181,hadoop104:2181修改后kafka分发到其它机器中,之后修改id
编写集群启动脚步
vim #!/bin/bash
case $1 in
"start"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ start"
done
};;
"stop"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ stop"
done
};;
"status"){
for i in hadoop102 hadoop103 hadoop104
do
ssh $i "/opt/module/zookeeper-3.4.10/bin/ status"
done
};;
esacMySQL安装
- 安装之前需要先卸载linux中自带的MySQL相关服务
// 查看虚拟机中的MySQL服务
rpm -qa|grep mysql
//卸载
rpm -e --nodeps mysql-libs-5.1.73-7.el6.x86_64- 上传MySQL的安装包,并解压
unzip mysql-libs.zip
// 需要安装unzip
yum install -y unzip zip- 安装MySQL服务端
rpm -ivh MySQL-server-5.6.24-1.el6.x86_64.rpm如果这时候报错无法安装的话,主要是因为CentOS本身自带MariaDB冲突。

解决方案
// 1查看依赖
yum list | grep mysql
// 2 删除依赖
yum remove mysql-libs
// 3 再次执行
rpm -ivh MySQL-server-5.6.24-1.el6.x86_64.rpm
// 此时会报错,原因为缺少autoconf库
yum -y install autoconf
/usr/bin/mysql_install_db --user=mysql
// 启动MySQL服务
service mysql start- 查看MySQL默认登录密码,如果没有就可以免密登录
cat /root/.mysql_secret- 链接MySQL
mysql -uroot -p- 设置密码
SET PASSWORD=PASSWORD('root');MySQL主机配置
use mysql;
select User,Host,Password from user;
update user set host='%' where host='localhost';
// 删除其它host
delete from user where Host='hadoop102';
delete from user where Host='127.0.0.1';
delete from user where Host='::1';
//刷新
flush privileges;MySQL数据库安装完毕!
Sqoop安装
- 上传sqoop的压缩包
- 减压压缩包到指定目录中
tar -zxf sqoop-1.4.6.bin__hadoop-2.0.4-alpha.tar.gz -C /opt/module/- 修改配置文件
在/opt/module/sqoop/conf中
mv
vim 增加如下内容
export HADOOP_COMMON_HOME=/opt/module/hadoop-2.7.2
export HADOOP_MAPRED_HOME=/opt/module/hadoop-2.7.2
export HIVE_HOME=/opt/module/hive
export ZOOKEEPER_HOME=/opt/module/zookeeper-3.4.10
export ZOOCFGDIR=/opt/module/zookeeper-3.4.10/conf
export HBASE_HOME=/opt/module/hbase- 拷贝JDBC驱动到sqoop的lib中
我使用的jdbc版本为mysql-connector-java-5.1.27-bin.jar
cp mysql-connector-java-5.1.27-bin.jar /opt/module/sqoop/lib/- 验证sqoop
bin/sqoop help出现各种帮助指令即为安装成功
6. 测试sqoop是否可以连接到mysql
bin/sqoop list-databases --connect jdbc:mysql://hadoop102:3306/ --username root --password root出现数据库中的各个库名即为成功
Hive的安装
上传hive的压缩包解压到指定目录中
tar -zxvf apache-hive-2.3.6-bin.tar.gz -C /opt/module/
mv apache-hive-2.3.6-bin hive拷贝MySQL驱动jar包到hive的lib包下
cp /opt/software/mysql-libs/mysql-connector-java-5.1.27/mysql-connector-java-5.1.27-bin.jar /opt/module/hive/lib/在hive的/opt/module/hive/conf下创建文件hive-site.xml
注意指定的MySQL驱动地址,用户名,密码
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://hadoop102:3306/metastore?createDatabaseIfN otExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>root</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<property>
<name>datanucleus.schema.autoCreateAll</name>
<value>true</value>
</property>
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>
</configuration>启动hive命令bin/hive
hive集成tez
为什么要使用tez,hive底层使用的是MR,处理速度受限,tez是基于内存运行的,可以提高hive的性能。
下载安装包,并将其上传到hdfs中
hadoop fs -mkdir /tez
hadoop fs -put /opt/software/apache-tez-0.9.1-bin.tar.gz/ /tez并且在本地也解压一份
tar -zxvf apache-tez-0.9.1-bin.tar.gz -C /opt/module
mv apache-tez-0.9.1-bin/ tez-0.9.1集成TEZ
到hive的conf目录中创建一个tez-site.xml文件
vim tez-site.xml添加内容
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>tez.lib.uris</name>
<value>${fs.defaultFS}/tez/apache-tez-0.9.1-bin.tar.gz</value>
</property>
<property>
<name>tez.use.cluster.hadoop-libs</name>
<value>true</value>
</property>
<property>
<name>tez.history.logging.service.class</name>
<value>org.apache.tez.dag.history.logging.ats.ATSHistoryLoggingService</value>
</property>
</configuration>在hive的中加入tez依赖
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export HIVE_CONF_DIR=/opt/module/hive/conf
export TEZ_HOME=/opt/module/tez-0.9.1
export TEZ_JARS=""
for jar in `ls $TEZ_HOME |grep jar`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/$jar
done
for jar in `ls $TEZ_HOME/lib`; do
export TEZ_JARS=$TEZ_JARS:$TEZ_HOME/lib/$jar
done
export HIVE_AUX_JARS_PATH=/opt/module/hadoop-2.7.2/share/hadoop/common/hadoop-lzo-0.4.20.jar$TEZ_JARS在中更改计算引擎为tez
<property>
<name>hive.execution.engine</name>
<value>tez</value>
</property>在修改玩hive的计算引擎为tez之后启动hive之后,会出现无法插入数据等错误,原因时出现OOM或者NodeManager 被kill掉了,此时需要关闭yarn的虚拟内存。
具体操作为,到hadoop/etc/haddop目录下修改
添加
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>之后将分发到剩余的所有节点中,并重启yarn,可以修复这个错误

















