hive表打开parquet报错 hive show tables

转载

架构设计师之光 2023-07-13 21:19:57

文章标签 hive表打开parquet报错 hive 大数据 hadoop HDFS 文章分类 Hive 大数据

hive中常见的show方法总结。

show databases 显示数据库
show tables 显示表
desc formatted table_name 查看表的元数据信息
show create table table_name 获取建表语句
show functions 函数方法

hive中DML语句load加载数据

功能：load加载操作是将数据文件移动到与hive表对应为止的纯复制操作

LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO
TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
--local表示是从本地文件系统加载数据到Hive的表中
--如果没有local表示的是从HDFS文件系统加载数据到Hive表中
--例如
--step1:建表
--建表student_local 用于演示从本地加载数据
create table student_local(num int,name string,sex string,age int,dept string) row format delimited fields terminated by ',';
--建表student_HDFS  用于演示从HDFS加载数据到非分区表
create table student_HDFS(num int,name string,sex string,age int,dept string) row format delimited fields terminated by ',';
--建表student_HDFS_p 用于演示从HDFS加载数据到分区表
create table student_HDFS_p(num int,name string,sex string,age int,dept string) partitioned by(country string) row format delimited fields terminated by ',';

--step2:加载数据
-- 从本地加载数据  数据位于HS2（node1）本地文件系统  本质是hadoop fs -put上传操作
--Loading data to table itheima.student_local from file:/root/hivedata/students.txt
LOAD DATA LOCAL INPATH '/root/hivedata/students.txt' INTO TABLE student_local;

--从HDFS加载数据  数据位于HDFS文件系统根目录下  本质是hadoop fs -mv 移动操作
--Loading data to table itheima.student_hdfs from hdfs://node1:8020/stu/students.txt

--先把数据上传到HDFS上  hadoop fs -put /root/hivedata/students.txt /
LOAD DATA INPATH '/students.txt' INTO TABLE student_HDFS;

----从HDFS加载数据到分区表中并指定分区  数据位于HDFS文件系统根目录下
--先把数据上传到HDFS上 hadoop fs -put /root/hivedata/students.txt /
LOAD DATA INPATH '/students.txt' INTO TABLE student_HDFS_p partition(country ="China");

hive中DML中insert语法

--如果使用操作Mysql的思维来玩Hive会如何  使用insert+values的方式插入数据。

create table t(id int,name string);

insert into table t values(1,"a");

--可以执行 但是效率极低 因为底层是通过MapReduce插入数据的  因此实际中推荐使用load加载数据
--在hive中，insert主要是结合 select 查询语句使用，将查询结果插入到表中
--step1:创建一张源表student
drop table if exists student;
create table student(num int,name string,sex string,age int,dept string)
row format delimited
fields terminated by ',';
--加载数据
load data local inpath '/root/hivedata/students.txt' into table student;

select * from student;

--step2：创建一张目标表  只有两个字段
create table student_from_insert(sno int,sname string);
--使用insert+select插入数据到新表中
insert into table student_from_insert
select num,name from student;

select *
from student_from_insert;

动态分区插入

--针对的是分区表。
--问题：分区表中分区字段值是如何确定的？

--1、如果是在加载数据的时候人手动写死指定的  叫做静态分区 
load data local inpath '/root/hivedata/usa_dezhou.txt'  into table t_user_double_p partition(guojia="meiguo",sheng="dezhou");

--2、如果是通过insert+select 动态确定分区值的，叫做动态分区
insert table partition (分区字段) +select 

--1、首先设置动态分区模式为非严格模式 默认已经开启了动态分区功能
set hive.exec.dynamic.partition = true;
set hive.exec.dynamic.partition.mode = nonstrict;

--2、当前库下已有一张表student
select * from student;

--3、创建分区表 以sdept作为分区字段
create table student_partition(Sno int,Sname string,Sex string,Sage int) partitioned by(Sdept string);

--4、执行动态分区插入操作
insert into table student_partition partition(Sdept)
select num,name,sex,age,dept from student;
--其中，num,name,sex,age作为表的字段内容插入表中
--dept作为分区字段值

select *
from student_partition;

show partitions student_partition;

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。