1.  orc+snappy

create table if not exists dwd.dwd_event_detail(
    visit_user_id string,
    session_id string,
    user_id string,
    event string,
    app_version string,
    platform string
)
comment 'event明细表'
partitioned by (dt string)
row format delimited fields terminated by '\t'
stored as orc
tblproperties ("orc.compress"="SNAPPY");

2.  读写hdfs上json文件,使用外部表

create external table if not exists ods.ods_backup_log(doc string)
location '存储地址';

3. 读取hdfs上txt文件数据,按照’,‘分割

create external table if not exists log.log_search_log(
    user_id string,
    keyword string,
    channel string,
    app_type string,
    hotkey_set string,
    result_num string,
    time_stamp string,
    created_at string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES ("separatorChar" = ",")
location '/data/day/search/'
;

4. hive读取HBase数据

CREATE EXTERNAL TABLE src.src_study_log (key string, 
    b1_course_id string,
    b1_lesson_id string,
    b1_play_time bigint,
    b1_play_from string,
    b1_play_position string,
    b1_play_source string,
    b1_user_id string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' 
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,
basic_info:courseId,
basic_info:lessonId,
basic_info:playTime#b,
basic_info:playFrom,
basic_info:playPosition,
basic_info:playSource,
basic_info:userId
")
TBLPROPERTIES("hbase.table.name" = "test_model");

-- select * from dwd_play_model limit 10;
-- drop table dwd.dwd_play_model;

备注

1.   外部表数据存储位置由hdfs管理,存储位置可以由自己制定存在hdfs上任一目录,内部表默认存在在/user/hive/warehouse/

      删除外部表,仅仅删除元数据,hdfs上的文件并不会被删除;

      删除内部表,直接删除元数据和存储数据。

2.   TBLPROPERTIES用途:向表中添加自定义或预定义的元数据属性

3.   partitioned by:分区

4.   row format delimited fields terminated by '\t': 行分隔符,'\t'

5.  stored as orc: 文件存储格式,orc-列式存储,数据按照一列一列存储在block中,压缩比高,减少I/O存储,提高查询效率。