一、Griffin编译准备

### --- 软件解压缩

[root@hadoop02 ~]# ls /opt/yanqi/software/griffin-griffin-0.5.0.zip
/opt/yanqi/software/griffin-griffin-0.5.0.zip

[root@hadoop02 ~]# cd /opt/yanqi/software/
[root@hadoop02 software]# unzip griffin-griffin-0.5.0.zip
[root@hadoop02 software]# mv griffin-griffin-0.5.0 ../servers/griffin-0.5.0
[root@hadoop02 software]# cd ../servers/griffin-0.5.0/
### --- 在MySQL中创建数据库quartz,并初始化
~~~ mysql中执行创建数据库

mysql> create database quartz;
~~~     # 初始化脚本地址
[root@hadoop02 ~]# ls /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/Init_quartz_mysql_innodb.sql
/opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/Init_quartz_mysql_innodb.sql

~~~ # 将数据库脚本文件拷贝到mysql.servers服务端
[root@hadoop02 ~]# scp /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/Init_quartz_mysql_innodb.sql \
hadoop05:/opt/yanqi/software/
[root@hadoop05 ~]# vim /opt/yanqi/software/Init_quartz_mysql_innodb.sql

~~~ # 添加配置参数:备注:要做简单的修改,主要是增加 use quartz;
use quartz;
~~~     # 命令行执行,创建表
[root@hadoop05 ~]# mysql -uhive -p12345678 < /opt/yanqi/software/Init_quartz_mysql_innodb.sql

~~~ # 查看表是否初始化成功
mysql> use quartz;
Database changed
mysql> show tables;
+--------------------------+
| Tables_in_quartz |
+--------------------------+
| QRTZ_BLOB_TRIGGERS |
| QRTZ_CALENDARS |
| QRTZ_CRON_TRIGGERS |
| QRTZ_FIRED_TRIGGERS |
| QRTZ_JOB_DETAILS |
| QRTZ_LOCKS |
| QRTZ_PAUSED_TRIGGER_GRPS |
| QRTZ_SCHEDULER_STATE |
| QRTZ_SIMPLE_TRIGGERS |
| QRTZ_SIMPROP_TRIGGERS |
| QRTZ_TRIGGERS |
+--------------------------+
### --- Hadoop和Hive
~~~ 将安装 griffin 所在节点上的 hive-site.xml 文件,上传到 HDFS 对应目录中;
~~~ # 在HDFS上创建/spark/spark_conf目录,并将Hive的配置文件hive-site.xml上传到该目录下

[root@hadoop02 ~]# hdfs dfs -mkdir -p /spark/spark_conf
[root@hadoop02 ~]# hdfs dfs -put $HIVE_HOME/conf/hive-site.xml /spark/spark_conf
### --- 确保设置以下环境变量(/etc/profile)

[root@hadoop02 ~]# vim /etc/profile
~~~ # 查看这些环境变量是否都已设置
export JAVA_HOME=/opt/yanqi/servers/jdk1.8.0_231
export SPARK_HOME=/opt/yanqi/servers/spark-2.2.1/
export LIVY_HOME=/opt/yanqi/servers/livy-0.5.0
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop

二、Griffin配置文件修改

### --- service/pom.xml文件配置

~~~ # 修改pom.xml文件
[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/pom.xml
~~~ # 编辑 service/pom.xml(约113-117行),增加MySQL JDBC 依赖(即删除注释):
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.java.version}</version>
</dependency>
### --- 修改配置文件 service/src/main/resources/application. Properties

~~~ 默认端口是8080,为避免和spark端口冲突,这里端口修改为9876
~~~ 需要启动Hive的 metastore 服务
~~~ 如果Griffin、MySQL没有安装在同一节点,请确认用户有权限能够远程登录
~~~     # 修改application.Properties配置文件

[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/application.properties
server.port = 9876

spring.application.name=griffin_service
spring.datasource.url=jdbc:mysql://hadoop05:3306/quartz?autoReconnect=true&useSSL=false
spring.datasource.username=hive
spring.datasource.password=12345678
spring.jpa.generate-ddl=true
spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.jpa.show-sql=true

# Hive metastore
hive.metastore.uris=thrift://hadoop01:9083;hadoop05:9083
hive.metastore.dbname=hivemetadata
hive.hmshandler.retry.attempts=15
hive.hmshandler.retry.interval=2000ms

# Hive cache time
cache.evict.hive.fixedRate.in.milliseconds=900000

# Kafka schema registry
kafka.schema.registry.url=http://localhost:8081

# Update job instance state at regular intervals
jobInstance.fixedDelay.in.milliseconds=60000

# Expired time of job instance which is 7 days that is 604800000 milliseconds.Time unit only supports milliseconds
jobInstance.expired.milliseconds=604800000

# schedule predicate job every 5 minutes and repeat 12 timesat most
#interval time unit s:second m:minute h:hour d:day,only support these four units
predicate.job.interval=5m
predicate.job.repeat.count=12

# external properties directory location
external.config.location=

# external BATCH or STREAMING env
external.env.location=

# login strategy ("default" or "ldap")
login.strategy=default

# ldap
ldap.url=ldap://hostname:port
ldap.email=@example.com
ldap.searchBase=DC=org,DC=example
ldap.searchPattern=(sAMAccountName={0})

# hdfs default name
fs.defaultFS=

# elasticsearch
elasticsearch.host=hadoop02
elasticsearch.port=9200
elasticsearch.scheme=http
# elasticsearch.user = user
# elasticsearch.password = password

# livy
livy.uri=http://localhost:8998/batches
livy.need.queue=false
livy.task.max.concurrent.count=20
livy.task.submit.interval.second=3
livy.task.appId.retry.count=3

# yarn url
yarn.uri=http://hadoop01:8088

# griffin event listener
internal.event.listeners=GriffinJobEventHook
### --- 修改配置文件 service/src/main/resources/quartz. Properties

[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/quartz.properties
~~~ # 将第26行修改为以下内容:
org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
### --- 修改配置文件 service/src/main/resources/sparkProperties.json
~~~ 修改第11行:"spark.yarn.dist.files": "hdfs:///spark/spark_conf/hivesite.xml"

[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/sparkProperties.json
~~~ sparkProperties.json 在测试环境下使用:
{
"file": "hdfs:///griffin/griffin-measure.jar",
"className": "org.apache.griffin.measure.Application",
"name": "griffin",
"queue": "default",
"numExecutors": 2,
"executorCores": 1,
"driverMemory": "1g",
"executorMemory": "1g",
"conf": {
"spark.yarn.dist.files": "hdfs:///spark/spark_conf/hive-site.xml"
},
"files": [
]
}
~~~     # sparkProperties.json 在生产环境中根据实际资源配置进行修改【生产环境】  

[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/sparkProperties.json
~~~ sparkProperties.json 在测试环境下使用:
{
"file": "hdfs:///griffin/griffin-measure.jar",
"className": "org.apache.griffin.measure.Application",
"name": "griffin",
"queue": "default",
"numExecutors": 8,
"executorCores": 2,
"driverMemory": "4g",
"executorMemory": "5g",
"conf": {
"spark.yarn.dist.files": "hdfs:///spark/spark_conf/hive-site.xml"
},
"files": [
]
}
### --- 修改配置文件 service/src/main/resources/env/env_batch.json

~~~ # 在hdfs创建文件目录
[root@hadoop02 ~]# hdfs dfs -mkdir -p hdfs:///griffin/persist
[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/service/src/main/resources/env/env_batch.json
~~~ # 仅修改第24行
{
"spark": {
"log.level": "WARN"
},
"sinks": [
{
"type": "CONSOLE",
"config": {
"max.log.lines": 10
}
},
{
"type": "HDFS",
"config": {
"path": "hdfs:///griffin/persist",
"max.persist.lines": 10000,
"max.lines.per.file": 10000
}
},
{
"type": "ELASTICSEARCH",
"config": {
"method": "post",
"api": "http://hadoop02:9200/griffin/accuracy",
"connection.timeout": "1m",
"retry": 10
}
}
],
"griffin.checkpoint": []
}

三、Griffin编译

### --- 编译Griffin
~~~ 编译过程中需要下载500M+左右的jar,要将Maven的源设置到阿里
~~~ 如果修改了前面的配置文件,需要重新编译

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0/
[root@hadoop02 griffin-0.5.0]# mvn -Dmaven.test.skip=true clean install
### --- 编译报错解决方案:编译报错:
~~~ 这个文件在编译之前是没有的

[root@hadoop02 griffin-0.5.0]# mvn -Dmaven.test.skip=true clean install
~~~ # 编译报错
[ERROR] Failed to execute goal com.github.eirslett:frontend-maven-plugin:1.6:npm (npm build) on project ui: Failed to run task: 'npm run build' failed. org.apache.commons.exec.ExecuteException: Process exited with an error: 1 (Exit value: 1) -> [Help 1]
[ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,26): Cannot find name 'SVGElementTagNameMap'. [ERROR] ERROR in /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts (4137,89): Cannot find name 'SVGElementTagNameMap'.
### --- 解决方案

[root@hadoop02 ~]# vim /opt/yanqi/servers/griffin-0.5.0/ui/angular/node_modules/@types/jquery/JQuery.d.ts
~~~ # 删除4137行内容
4137 <!-- find<K extends keyof SVGElementTagNameMap>(selector_element: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->
8705 <!-- parents<K extends keyof SVGElementTagNameMap>(selector: K | JQuery<K>): JQuery<SVGElementTagNameMap[K]>; -->
### --- 再次编译

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0
[root@hadoop02 griffin-0.5.0]# mvn -Dmaven.test.skip=true clean install

四、启动Griffin服务拷贝jar包

### --- jar拷贝
~~~ # 将 service-0.5.0.jar 拷贝到 /opt/yanqi/servers/griffin-0.5.0/
~~~ 编译完成后,会在service和measure模块的target
~~~ 目录下分别看到 service-0.5.0.jar和 measure-0.5.0.jar 两个jar,将这两个jar分别拷贝到服务器目录下。

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0/service/target
[root@hadoop02 target]# cp service-0.5.0.jar /opt/yanqi/service/griffin-0.5.0/
### --- 将 measure-0.5.0.jar 拷贝到 /opt/yanqi/servers/griffin-0.5.0/,并改名

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0/measure/target
[root@hadoop02 target]# cp measure-0.5.0.jar /opt/yanqi/servers/griffin-0.5.0/griffinmeasure.jar
### --- 将 griffin-measure.jar 上传到 hdfs:///griffin 中
~~~ spark在yarn集群上执行任务时,需要到HDFS的/griffin目录下加载griffinmeasure.jar,
~~~ 避免发生类org.apache.griffin.measure.Application找不到的错误。

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0
[root@hadoop02 griffin-0.5.0]# hdfs dfs -mkdir /griffin
[root@hadoop02 griffin-0.5.0]# hdfs dfs -put griffin-measure.jar /griffin

五、启动Griffin服务

### --- 启动Griffin管理后台:

[root@hadoop02 ~]# cd /opt/yanqi/servers/griffin-0.5.0
[root@hadoop02 griffin-0.5.0]# nohup java -jar service-0.5.0.jar>service.out 2>&1 &
### --- 通过Chrome访问Griffin服务:
~~~ Apache Griffin的UI:http://hadoop02:9876 用户名口令:admin / admin
### --- 登录后的界面:

|NO.Z.00074|——————————|^^  部署 ^^|——|Hadoop&PB级数仓.V03|——|PB数仓.v03|Griffin数据质量监控工具|Griffin部署|_hadoop

|NO.Z.00074|——————————|^^  部署 ^^|——|Hadoop&PB级数仓.V03|——|PB数仓.v03|Griffin数据质量监控工具|Griffin部署|_hive_02












Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart

                                                                                                                                                   ——W.S.Landor