本文将本地的hadoop日志,加载到Hive数据仓库中,再过滤日志中有用的日志信息转存到Mysql数据库里。

环境:hive-0.12.0 + Hadoop1.2.1

1、日志格式

 


2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_4856124673669777083 to 127.0.0.1:50010 
2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3952951948255939678 to 127.0.0.1:50010 
2014-04-17 22:53:30,629 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_-7659295115271387204 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_1247712269990837159 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3041853621376201540 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5165085626508844330 to 127.0.0.1:50010 
2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5058515618805863349 to 127.0.0.1:50010



日志的格式大概是这样子的,这里采用空格方式对其分隔,组织成多列,不过后面的提示信息不好处理,暂时以3列来存储。

 

表结构定义:

 



 


1. //建立Hive表,用来存储日志信息  
2. if not exists loginfo11   
3. ( rdate String,time ARRAY<string>,type STRING,relateclass STRING,  
4. information1 STRING,information2 STRING,information3 STRING)   
5. ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':'");

 

2、代码:

 

GetConnect.Java 类负责建立与Hive、Mysql的连接与关闭;

 



 


1. //package com.my.hivetest;  
2.   
3. import java.sql.Connection;  
4. import java.sql.DriverManager;  
5. import java.sql.ResultSet;  
6. import java.sql.SQLException;  
7. import java.sql.Statement;  
8.   
9. public class getConnect {  
10.   
11. private static Connection conn = null;  
12. private static Connection conntomysql = null;  
13.   
14. private getConnect() {  
15.     }  
16. public static Connection getHiveConn() throws SQLException {  
17. if (conn == null)  
18.         {  
19. try {  
20. "org.apache.hadoop.hive.jdbc.HiveDriver");  
21. catch (ClassNotFoundException e) {  
22. // TODO Auto-generated catch block  
23.                 e.printStackTrace();  
24. 1);  
25.             }  
26.             conn = DriverManager.getConnection(  
27. "jdbc:hive://localhost:50031/default", "", "");  
28. 1111);  
29.         }  
30. return conn;  
31.     }  
32.   
33. public static Connection getMysqlConn() throws SQLException {  
34. if (conntomysql == null)  
35.         {  
36. try {  
37. "com.mysql.jdbc.Driver");  
38. catch (ClassNotFoundException e) {  
39. // TODO Auto-generated catch block  
40.                 e.printStackTrace();  
41. 1);  
42.             }  
43.             conntomysql = DriverManager.getConnection(  
44. "jdbc:mysql://localhost:3306/hadoop?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=GBK",  
45. "root", "123456");  
46. 1111);  
47.         }  
48. return conntomysql;  
49.     }  
50. public static void closeHive() throws SQLException {  
51. if (conn != null)  
52.             conn.close();  
53.     }  
54. public static void closemysql() throws SQLException {  
55. if (conntomysql != null)  
56.             conntomysql.close();  
57.     }  
58. }

 

 

hiveUtil.java类,用来创建Hive表、加载数据、依据条件查询数据。以及将数据存到mysql中的方法。

 



 

1. //package com.my.hivetest;  
2.   
3. import java.sql.Connection;  
4. import java.sql.ResultSet;  
5. import java.sql.SQLException;  
6. import java.sql.Statement;  
7.   
8. public class HiveUtil {  
9. //创建hive表  
10. public static void createTable(String hiveql) throws SQLException{  
11.          Connection con=getConnect.getHiveConn();  
12.       
13.          Statement stmt = con.createStatement();    
14.          ResultSet res = stmt.executeQuery(hiveql);   
15.     }  
16. //查询hive表  
17. public static ResultSet queryHive(String hiveql) throws SQLException{  
18.         Connection con=getConnect.getHiveConn();  
19.           
20.         Statement stmt = con.createStatement();    
21.         ResultSet res = stmt.executeQuery(hiveql);  
22. return res;  
23.     }  
24. //加载数据  
25. public static void loadDate(String hiveql) throws SQLException{  
26.         Connection con=getConnect.getHiveConn();   
27.         Statement stmt = con.createStatement();  
28.         ResultSet res = stmt.executeQuery(hiveql);  
29.     }  
30. //转存到mysql中  
31. public static void hiveTomysql(ResultSet Hiveres) throws SQLException{  
32.         Connection con=getConnect.getMysqlConn();  
33.         Statement stmt = con.createStatement();   
34. while (Hiveres.next()) {  
35. 1);  
36. 2);  
37. 3);  
38. 4);  
39. 5)+Hiveres.getString(6)+Hiveres.getString(7);//信息组合  
40. "    "+time+"    "+type+"    "+relateclass+" "+information+" ");  
41. int i = stmt.executeUpdate(  
42. "insert into hadooplog values(0,'"+rdate+"','"+time+"','"+type+"','"+relateclass+"','"+information+"')");  
43.             }  
44.     }  
45. }

 

exeHiveQL.java类,执行类,实现main函数。

 



 



    1. //package com.my.hivetest;  
    2.   
    3. import java.sql.Connection;  
    4. import java.sql.DriverManager;  
    5. import java.sql.ResultSet;  
    6. import java.sql.SQLException;  
    7. import java.sql.Statement;  
    8.   
    9. public class exeHiveQL {  
    10. public static void main(String[] args) throws SQLException {  
    11.   
    12. if (args.length < 2) {  
    13. "请输入查询条件: 日志级别 日期");  
    14. 1);  
    15.         }  
    16.   
    17. 0];  
    18. 1];  
    19.   
    20. //在hive中创建表  
    21.         HiveUtil.createTable(  
    22. if not exists loginfo11   
    23.             ( rdate String,time ARRAY<string>,type STRING,  
    24.             relateclass STRING,information1 STRING,information2 STRING,  
    25. ' '   
    26. ',' MAP KEYS TERMINATED BY ':'");  
    27. //加载hadoop日志  
    28. "load data local inpath '/root/hadoop-1.2.1/logs/*.log.*' overwrite into table loginfo11");  
    29. //查询有用的信息  
    30. //test code  
    31. "select rdate,time[0],type,relateclass,information1,information2,information3 from loginfo11 where type='"  
    32. "' and rdate='" + date + "' ";  
    33. "----test");  
    34.         ResultSet res1 = HiveUtil.queryHive(  
    35. 0],type,relateclass,information1,  
    36. '"+ type + "' and rdate='" + date + "' ");  
    37. //查询结果转存到mysql中  
    38.         HiveUtil.hiveTomysql(res1);  
    39. //关闭hive连接  
    40.         getConnect.closeHive();  
    41. //关闭mysql连接  
    42.         getConnect.closemysql();  
    43.     }  
    44. }


     

    在运行之前 需要开启hive server服务,这里端口号 50031 要与GetConnect.Java 类的一致。

     

     



    # bin/hive --service hiveserver -p 50031



    然后在eclipse中运行起来,设置输入参数 

     

     



    ERROR 2014-04-14



    不过在运行之前还需要导入各种包:

     

    hive-jdbc-0.12.0.jar 

    hive-metastore-0.12.0.jar

    mysql-connector-java-5.1.10.jar(mysql的jdbc驱动,可到http://dev.mysql.com/downloads/connector/j/下载)
    以及hive/lib下所有的包。。。(为图省事全导入了。。。)
    还有一点就是,要预先在mysql中建立数据库hadoop,并且在里面建立表hadooplog。表的格式与代码中的保持一致即可。。

     

    源代码:https://github.com/y521263/Hadoop_in_Action

    参考资料:

     

    Apache Hive