Hive 自定义UDF函数实现日期格式化和字段AES加密

  • 自定义日期格式化UDF函数
  • 自定义字段AES加密函数
  • 函数的临时注册和永久注册
  • 测试UDF函数使用
  • 项目pom.xml




自定义日期格式化UDF函数

  • 首先自定义日期解析转换的工具类
import org.apache.commons.lang3.StringUtils;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;

/**
 * 日期解析转换工具类
 * @author root
 */
public class DateUtils {
    private static String[] PARSE_PATTERNS = {"yyyy-MM-dd", "yyyy年MM月dd日",
            "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm", "yyyy/MM/dd",
            "yyyy/MM/dd HH:mm:ss", "yyyy/MM/dd HH:mm", "yyyyMMdd","yyyyMMdd HH:mm:ss"};


    public static String parseDate(String string, String pattern) {
        if (string == null) {
            return null;
        }
        try {
            org.apache.commons.lang3.time.DateUtils.parseDate(string, pattern);
            return pattern;
        } catch (ParseException e) {
            return null;
        }
    }
    /**
     * 输入日期统一转换为yyyy-MM-dd 格式
     * parseDate 自动判断日期类型
     * @param rawDate
     * @return
     */
    public static String unionformDate(String rawDate) {
        String result = "";
        for (String parsePattern : PARSE_PATTERNS) {
            try {
                String s = parseDate(rawDate, parsePattern);
                if (StringUtils.isNotBlank(s)) {
                    SimpleDateFormat format = new SimpleDateFormat(s);
                    Date parse = format.parse(rawDate);
                    SimpleDateFormat format2 = new SimpleDateFormat("yyyy-MM-dd");
                    result = format2.format(parse);
                }

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return result;
    }
    /**
     * 输入日期统一转换为目标日期格式 格式
     * parseDate 自动判断日期类型
     * @param rawDate
     * @return
     */
    public static String unionformDateWithPattern(String rawDate, String resultPattern) {
        String result = "";
        for (String parsePattern : PARSE_PATTERNS) {
            try {
                String s = parseDate(rawDate, parsePattern);
                if (StringUtils.isNotBlank(s)) {
                    SimpleDateFormat format = new SimpleDateFormat(s);
                    Date parse = format.parse(rawDate);
                    SimpleDateFormat format2 = new SimpleDateFormat(resultPattern);
                    result = format2.format(parse);
                }

            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        return result;
    }
}
  • 自定义日期转换UDF函数
import com.miminglamp.utils.DateUtils;
import org.apache.hadoop.hive.ql.exec.UDF;

/**
 * 日期统一UDF函数
 */
public class DateUnionUDF extends UDF {
    /**
     * 传递原始日期和format
     *
     * @param singleDate
     * @param format
     * @return
     */
    public String evaluate(String singleDate, String format) {
        if ("null".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if ("NULL".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if ("".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if (singleDate != null) {
            String result = DateUtils.unionformDateWithPattern(singleDate, format);
            return result;
        } else {
            return "9999-99-99";
        }
    }

    /**
     * 不传递format默认为 yyyy-MM-dd格式
     *
     * @param singleDate
     * @return
     */
    public String evaluate(String singleDate) {
        if ("null".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if ("NULL".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if ("".equals(singleDate)) {
            singleDate = "9999-99-99 00:00:00";
        }
        if (singleDate != null) {
            String result = DateUtils.unionformDateWithPattern(singleDate, "yyyy-MM-dd");
            return result;
        } else {
            return "9999-99-99";
        }
    }

    public static void main(String[] args) {
        DateUnionUDF dateUnionUDF = new DateUnionUDF();
        System.out.println(dateUnionUDF.evaluate("2020/12/12"));
        System.out.println(dateUnionUDF.evaluate("2020/12/12", "yyyy-MM-dd HH:mm:ss"));
    }
}

自定义字段AES加密函数

  • 首先创建AES加密的工具类
import sun.misc.BASE64Decoder;
import sun.misc.BASE64Encoder;
import javax.crypto.*;
import javax.crypto.spec.SecretKeySpec;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.security.InvalidKeyException;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Scanner;

/*
 * AES对称加密和解密
 */
public class AES {
    /*
     * 加密
     * 1.构造密钥生成器
     * 2.根据ecnodeRules规则初始化密钥生成器
     * 3.产生密钥
     * 4.创建和初始化密码器
     * 5.内容加密
     * 6.返回字符串
     */
    public static String AESEncode(String encodeRules,String content){
        try {
            //1.构造密钥生成器,指定为AES算法,不区分大小写
            KeyGenerator keygen=KeyGenerator.getInstance("AES");
            //2.根据ecnodeRules规则初始化密钥生成器
            //生成一个128位的随机源,根据传入的字节数组
            SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
            random.setSeed(encodeRules.getBytes());
            keygen.init(128, random);            //3.产生原始对称密钥
            SecretKey original_key=keygen.generateKey();
            //4.获得原始对称密钥的字节数组
            byte [] raw=original_key.getEncoded();
            //5.根据字节数组生成AES密钥
            SecretKey key=new SecretKeySpec(raw, "AES");
            //6.根据指定算法AES自成密码器
            Cipher cipher=Cipher.getInstance("AES");
            //7.初始化密码器,第一个参数为加密(Encrypt_mode)或者解密解密(Decrypt_mode)操作,第二个参数为使用的KEY
            cipher.init(Cipher.ENCRYPT_MODE, key);
            //8.获取加密内容的字节数组(这里要设置为utf-8)不然内容中如果有中文和英文混合中文就会解密为乱码
            byte [] byte_encode=content.getBytes(StandardCharsets.UTF_8);
            //9.根据密码器的初始化方式--加密:将数据加密
            byte [] byte_AES=cipher.doFinal(byte_encode);
            //10.将加密后的数据转换为字符串
            //这里用Base64Encoder中会找不到包
            //解决办法:
            //在项目的Build path中先移除JRE System Library,再添加库JRE System Library,重新编译后就一切正常了。
            //11.将字符串返回
            return new BASE64Encoder().encode(byte_AES);
        } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | IllegalBlockSizeException | BadPaddingException e) {
            e.printStackTrace();
        }

        //如果有错就返加nulll
        return null;
    }
    /*
     * 解密
     * 解密过程:
     * 1.同加密1-4步
     * 2.将加密后的字符串反纺成byte[]数组
     * 3.将加密内容解密
     */
    public static String AESDncode(String encodeRules,String content){
        try {
            //1.构造密钥生成器,指定为AES算法,不区分大小写
            KeyGenerator keygen=KeyGenerator.getInstance("AES");
            //2.根据ecnodeRules规则初始化密钥生成器
            //生成一个128位的随机源,根据传入的字节数组
            SecureRandom random = SecureRandom.getInstance("SHA1PRNG");
            random.setSeed(encodeRules.getBytes());
            keygen.init(128, random);
            //3.产生原始对称密钥
            SecretKey original_key=keygen.generateKey();
            //4.获得原始对称密钥的字节数组
            byte [] raw=original_key.getEncoded();
            //5.根据字节数组生成AES密钥
            SecretKey key=new SecretKeySpec(raw, "AES");
            //6.根据指定算法AES自成密码器
            Cipher cipher=Cipher.getInstance("AES");
            //7.初始化密码器,第一个参数为加密(Encrypt_mode)或者解密(Decrypt_mode)操作,第二个参数为使用的KEY
            cipher.init(Cipher.DECRYPT_MODE, key);
            //8.将加密并编码后的内容解码成字节数组
            byte [] byte_content= new BASE64Decoder().decodeBuffer(content);
            /*
             * 解密
             */
            byte [] byte_decode=cipher.doFinal(byte_content);
            return new String(byte_decode, StandardCharsets.UTF_8);
        } catch (NoSuchAlgorithmException | NoSuchPaddingException | InvalidKeyException | IOException | IllegalBlockSizeException | BadPaddingException e) {
            e.printStackTrace();
        }
        //如果有错就返加nulll
        return null;
    }

    public static void main(String[] args) {
        Scanner scanner=new Scanner(System.in);
        /*
         * 加密
         */
        String encodeRules = "cdp2021";
        System.out.println("使用AES对称加密,请输入加密的规则");
        System.out.println("请输入要加密的内容:");
        String content = scanner.next();
        System.out.println("根据输入的规则"+encodeRules+"加密后的密文是:"+ AESEncode(encodeRules, content));
        /*
         * 解密
         */
        System.out.println("使用AES对称解密,请输入加密的规则:(须与加密相同)");
        System.out.println("请输入要解密的内容(密文):");
        content = scanner.next();
        System.out.println("根据输入的规则"+encodeRules+"解密后的明文是:"+ AESDncode(encodeRules, content));
    }

}
  • 创建AES UDF函数
import com.miminglamp.aes.AES;
import org.apache.hadoop.hive.ql.exec.UDF;

public class AESUDF extends UDF {
    String password = "CDP2021";

    public String evaluate(String type, String content) throws Exception {
        if (content == null) return null;

        if (!type.equals("encode") && !type.equals("decode")) {
            throw new Exception("Parmeter one is needed encode/decode");
        }

        if (type.equals("encode")) {
            //进行加密
            return AES.AESEncode(password, content);
        } else {
            //进行解密
            return AES.AESDncode(password, content);
        }
    }

    public static void main(String[] args) {
        try {
            System.out.println(new AESUDF().evaluate("encode", "test"));
            System.out.println(new AESUDF().evaluate("decode", "2YDc41+Y7OIKBgfqNMwDD9koGoxAy52jGiRdJC8A/+o="));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

函数的临时注册和永久注册

- 永久注册:
上传Jar包到HDFS的指定路径

CREATE FUNCTION 数据库名.函数名 AS '包名.类名' USING JAR 'hdfs:///path/xxxx.jar';

- 删除永久函数:
drop function dev.dateunion;

临时注册:
add jar /path/xx.jar(存储在本地磁盘)

- 临时注册UDF函数(hive会话生效)
create temporary function 函数名 as '包名.类名';

- 删除临时函数:
drop temporary function 数据库名.函数名;

测试UDF函数使用

# 永久UDF函数
CREATE FUNCTION dev.dateunion AS 'com.miminglamp.udf.DateUnionUDF' USING JAR 'hdfs:///opt/kezhen/hive/udf/HiveUDF-1.0.jar';

# 永久UDF函数
CREATE FUNCTION dev.aesencodes AS 'com.miminglamp.udf.AESUDF' USING JAR 'hdfs:///opt/kezhen/hive/udf/HiveUDF-1.0.jar';

# 使用默认格式yyyy-MM-dd 格式
select id,dev.dateunion(times) times from app_tmp.hive_udf;

# 传入目标格式
select id,dev.dateunion(times,'yyyy-MM-dd HH:mm:ss') times from app_tmp.hive_udf;

# 使用AES加密
select id,dev.aesencodes('encode',times) times from app_tmp.hive_udf;

# 使用AES解密
select id, dev.aesencodes('decode',times) times from app_tmp.aesencode;

项目pom.xml

创建普通的IDEA的maven-quickstart工程即可,创建工程参考:

最后附上Pom.xml

<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.7</maven.compiler.source>
        <maven.compiler.target>1.7</maven.compiler.target>
        <jdk.version>1.8</jdk.version>
        <scala.version>2.11.8</scala.version>
        <scala.compat.version>2.11</scala.compat.version>
        <hadoop.version>3.0.0-cdh6.0.1</hadoop.version>
        <spark.version>2.2.0-cdh6.0.1</spark.version>
        <hive.version>2.1.1-cdh6.0.1</hive.version>
    </properties>

    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
        <repository>
            <id>aliyun</id>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
        </repository>
        <repository>
            <id>jboss</id>
            <url>http://repository.jboss.com/nexus/content/groups/public</url>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>3.0.1</version>
            <!--            <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>${spark.version}</version>
            <!--            <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <version>${spark.version}</version>
            <!--            <scope>provided</scope>-->
        </dependency>


        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.8.sec06</version>
        </dependency>


        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-hive_2.11</artifactId>
            <version>${spark.version}</version>
            <scope>provided</scope>
        </dependency>


        <dependency>
            <groupId>org.apache.commons</groupId>
            <artifactId>commons-lang3</artifactId>
            <version>3.3.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpcore</artifactId>
            <version>4.4.3</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
            <version>4.5.1</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.12</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.12</version>
        </dependency>

        <!--        <dependency>-->
        <!--            <groupId>org.apache.spark</groupId>-->
        <!--            <artifactId>spark-hive-thriftserver_2.11</artifactId>-->
        <!--            <version>2.2.0</version>-->
        <!--        </dependency>-->
        <!--mysql依赖的jar包-->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.35</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <!--hive 依赖-->
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
        </dependency>
    </dependencies>