前提:成功搭建Hadoop集群 实验要求:搭建基本hive运行平台,并初步了解HIVE shell的基本操作命令
MySQL版本:mysql-5.7.16-1.el7.x86_64.rpm-bundle.tar
Hive版本:apache-hive-1.2.2-bin.tar.gz
链接: https://pan.baidu.com/s/1ijY8eH604NOkv3aqVtKosg 提取码: jmmd 复制这段内容后打开百度网盘手机App,操作更方便哦
MySQL和Hive的基础配置
[root@hadoop1 bin]# cd /opt/
[root@hadoop1 opt]# rz
[root@hadoop1 opt]# ls
apache-hive-1.2.2-bin.tar.gz mysql-5.7.16-1.el7.x86_64.rpm-bundle.tar
#将Hive软件包和mysql安装包解压到/hadoop/soft目录中,并将解压的Hive软件包目录改名为hive
[root@hadoop1 opt]# mkdir /hadoop/soft/mysql
[root@hadoop1 opt]# tar -zxf apache-hive-1.2.2-bin.tar.gz -C /hadoop/soft/
[root@hadoop1 opt]# tar -xf mysql-5.7.21-1.el7.x86_64.rpm-bundle.tar -C /hadoop/soft/mysql
[root@hadoop1 opt]# cd /hadoop/soft/
[root@hadoop1 soft]# ls
apache-hive-1.2.2-bin hbase mysql
hadoop jdk1.7.0_79 zookeeper
[root@hadoop1 soft]#
[root@hadoop1 soft]# mv apache-hive-1.2.2-bin hive
[root@hadoop1 soft]# ls
hadoop hive mysql
hbase jdk1.7.0_79 zookeeper
[root@hadoop1 soft]#
MySQL的安装与配置
1)创建组和用户
[root@hadoop1 soft]# groupadd mysql
[root@hadoop1 soft]# useradd -r -g mysql -p root mysql
2)卸载mariadb
[root@hadoop1 soft]# rpm -qa|grep mariadb
mariadb-libs-5.5.60-1.el7_5.x86_64
[root@hadoop1 soft]# rpm -e --nodeps mariadb-libs-5.5.60-1.el7_5.x86_64
[root@hadoop1 soft]# rpm -qa|grep mariadb
[root@hadoop1 soft]#
3)安装MySQL
[root@hadoop1 soft]# cd /hadoop/soft/mysql/
[root@hadoop1 mysql]# rpm -ivh mysql-community-common-5.7.21-1.el7.x86_64.rpm
warning: mysql-community-common-5.7.21-1.el7.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ################################# [100%]
Updating / installing...
1:mysql-community-common-5.7.21-1.e################################# [100%]
[root@hadoop1 mysql]# rpm -ivh mysql-community-libs-5.7.21-1.el7.x86_64.rpm
warning: mysql-community-libs-5.7.21-1.el7.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ################################# [100%]
Updating / installing...
1:mysql-community-libs-5.7.21-1.el7################################# [100%]
[root@hadoop1 mysql]# rpm -ivh mysql-community-client-5.7.21-1.el7.x86_64.rpm
warning: mysql-community-client-5.7.21-1.el7.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ################################# [100%]
Updating / installing...
1:mysql-community-client-5.7.21-1.e################################# [100%]
[root@hadoop1 mysql]# rpm -ivh mysql-community-server-5.7.21-1.el7.x86_64.rpm
warning: mysql-community-server-5.7.21-1.el7.x86_64.rpm: Header V3 DSA/SHA1 Signature, key ID 5072e1f5: NOKEY
Preparing... ################################# [100%]
Updating / installing...
1:mysql-community-server-5.7.21-1.e################################# [100%]
[root@hadoop1 mysql]#
4)mysql初始化
[root@hadoop1 mysql]# cd /usr/local/bin/
[root@hadoop1 bin]# mysqld --initialize --user=mysql
[root@hadoop1 bin]# cd /var/lib/
[root@hadoop1 lib]# rm -rf mysql
[root@hadoop1 lib]# mysqld --initialize-insecure --user=mysql
[root@hadoop1 lib]# systemctl start mysqld //启动mysqld服务
5)设置mysql远程访问
[root@hadoop1 lib]# mysql
mysql> use mysql;
mysql> select host, user from user;
+-----------+---------------+
| host | user |
+-----------+---------------+
| localhost | mysql.session |
| localhost | mysql.sys |
| localhost | root |
+-----------+---------------+
3 rows in set (0.00 sec)
mysql> update user set host = '%' where user = 'root';
Query OK, 1 row affected (0.00 sec)
Rows matched: 1 Changed: 1 Warnings: 0
mysql> select host, user from user;
+-----------+---------------+
| host | user |
+-----------+---------------+
| % | root |
| localhost | mysql.session |
| localhost | mysql.sys |
+-----------+---------------+
3 rows in set (0.00 sec)
mysql> exit // 退出mysql
Bye
Hive的配置
1)设置环境变量
[root@hadoop1 lib]# vim /etc/profile
增加以下内容
export HIVE_HOME=/hadoop/soft/hive
export PATH=$PATH:$HIVE_HOME/bin
#保存退出,并利用source命令更新环境变量
[root@hadoop1 lib]# source /etc/profile
2)设置权限
#在hdfs下建立如下目录,并为其设定权限
[root@hadoop1 lib]# cd
[root@hadoop1 ~]# hdfs dfs -mkdir -p /user/hive/warehouse
[root@hadoop1 ~]# hdfs dfs -mkdir -p /user/hive/tmp
[root@hadoop1 ~]# hdfs dfs -mkdir -p /user/hive/log
[root@hadoop1 ~]# hdfs dfs -chmod g+w /user/hive/warehouse
[root@hadoop1 ~]# hdfs dfs -chmod g+w /user/hive/tmp
[root@hadoop1 ~]# hdfs dfs -chmod g+w /user/hive/log
#切换到hive下的conf目录
[root@hadoop1 ~]# cd /hadoop/soft/hive/conf/
[root@hadoop1 conf]# ls
beeline-log4j.properties.template hive-exec-log4j.properties.template
hive-default.xml.template hive-log4j.properties.template
hive-env.sh.template ivysettings.xml
#利用相关模板文件建立配置文件
[root@hadoop1 conf]# cp hive-log4j.properties.template hive-log4j.properties
[root@hadoop1 conf]# cp hive-env.sh.template hive-env.sh
[root@hadoop1 conf]# cp hive-default.xml.template hive-site.xml
[root@hadoop1 conf]# cp hive-exec-log4j.properties.template hive-exec-log4j.properties
3)修改配置文件
#修改conf目录下hive-site.xm文件的内容;/hadoop/soft/hive是指安装hive的目录路径。
[root@hadoop1 conf]# vim hive-site.xml
46 <name>hive.exec.scratchdir</name>
47 <value>/tmp/hive</value>
🔺将hive.exec.scratchdir的value值修改成以下内容:
<name>hive.exec.scratchdir</name>
<value>/hadoop/soft/hive</value>
334 <name>hive.metastore.warehouse.dir</name>
335 <value>/user/hive/warehouse</value>
🔺将hive.metastore.warehouse.dir的value值修改成以下内容:
<name>hive.metastore.warehouse.dir</name>
<value>/hadoop/soft/hive/warehouse</value>
🔺在50行之前添加以下内容:
45 <property>
46 <name>system:java.io.tmpdir</name>
47 <value>/hadoop/soft/hive</value>
48 </property>
55 <name>hive.exec.local.scratchdir</name>
56 <value>${system:java.io.tmpdir}/${system:user.name}</value>
🔺将hive.exec.local.scratchdir的value值修改成以下内容:
<name>hive.exec.local.scratchdir</name>
<value>/hadoop/soft/hive/scratchdir</value>
60 <name>hive.downloaded.resources.dir</name>
61 <value>${system:java.io.tmpdir}/${hive.session.id}_resources</value>
🔺将hive.downloaded.resources.dir的value值修改成以下内容:
<name>hive.downloaded.resources.dir</name>
<value>/hadoop/soft/hive/resourcesdir//${hive.session.id}_resources</value>
1323 <name>hive.querylog.location</name>
1324 <value>${system:java.io.tmpdir}/${system:user.name}</value>
🔺将hive.querylog.location的value值修改成以下内容:
<name>hive.querylog.location</name>
<value>/hadoop/soft/hive/query_logs</value>
2915 <name>hive.server2.logging.operation.log.location</name>
2916 <value>${system:java.io.tmpdir}/${system:user.name}/operation_logs</value>
🔺将hive.server2.logging.operation.log.location的value值修改成以下内容:
<name>hive.server2.logging.operation.log.location</name>
<value>/hadoop/soft/hive/operation_logs</value>
4)验证Hive
#启动Hive
[root@hadoop1 conf]# cd /hadoop/soft/hive/bin/
[root@hadoop1 bin]# mkdir /hadoop/soft/hive/resourcesdir
[root@hadoop1 bin]# mkdir /hadoop/soft/hive/scratchdir
[root@hadoop1 bin]# hive
Logging initialized using configuration in file:/hadoop/soft/hive/conf/hive-log4j.properties
hive> show databases; //查看数据库
OK
default
Time taken: 2.054 seconds, Fetched: 1 row(s)
hive> create database databasetest; //建立数据库databasetest
OK
Time taken: 1.296 seconds
hive> show databases;
OK
databasetest
default
Time taken: 0.023 seconds, Fetched: 2 row(s)
HIVE的基础操作
1)创建表
#利用create database命令创建测试数据库databasetest。
hive> show databases; // 查看数据库
OK
default
Time taken: 0.011 seconds, Fetched: 1 row(s)
hive> create database databasetest; //建立测试数据库databasetest
OK
Time taken: 0.973 seconds
hive> show databases;
OK
databasetest
default
Time taken: 0.01 seconds, Fetched: 2 row(s)
2)数据导入
#使用 Hive 工具来创建数据表 tabletest,将 tabletest.txt 导入到该表中,其中 tabletest 表的数据结构如下表所示。导入完成后,通过 hive 查询数据表tabletest的信息。
stname(string) stID(int) class(st ring) opt_cour(string)
hive> create table tabletest (stname string,stID int,class string,opt_cour string) row format delimited fields terminated by '\t' lines terminated by '\n' stored as textfile; //建立数据表tabletest
OK
Time taken: 0.66 seconds
hive> select * from tabletest; // 浏览表tabletest信息
OK
Time taken: 1.504 seconds
hive> load data local inpath '/opt/tabletest.txt' into table tabletest;
//将本地路径/opt/tabletest.txt文件内容导入到表tabletest
Loading data to table databasetest.tabletest
Table databasetest.tabletest stats: [numFiles=1, totalSize=89444]
OK
Time taken: 2.947 seconds
hive> select * from tabletest;
说明:Hive表有内部表、外部表、分区表,桶表四种类型的表。
3)查询表中信息
#查询表tabletest表中某高校 Software_1403 班级报名选修 volleyball 的成员所有信息。
hive> select * from tabletest where class='Software_1403' and opt_cour='volleyball';
OK
student409 10120408 Software_1403 volleyball
student411 10120410 Software_1403 volleyball
student413 10120412 Software_1403 volleyball
student419 10120418 Software_1403 volleyball
student421 10120420 Software_1403 volleyball
student422 10120421 Software_1403 volleyball
student424 10120423 Software_1403 volleyball
student432 10120431 Software_1403 volleyball
student438 10120437 Software_1403 volleyball
student447 10120446 Software_1403 volleyball
Time taken: 0.554 seconds, Fetched: 10 row(s)
4)删除表
#删除已经建立的测试表tabletest和测试数据库databasetest
hive> show tables;
OK
tabletest
Time taken: 0.016 seconds, Fetched: 1 row(s)
hive> drop table tabletest ; //删除测试表tabletest
OK
Time taken: 1.936 seconds
hive> show tables ;
OK
Time taken: 0.047 seconds // 测试表tabletest已经删除
hive> drop database databasetest; // 删除测试数据库databasetest
OK
Time taken: 0.373 seconds
hive> show databases;
OK
default
Time taken: 0.013 seconds, Fetched: 1 row(s) // 测试数据库已经删除