通过kettle开发工具spoon打开的编辑页面之后,默认是在当前用户下文件夹内创建一个.kettle的文件夹并加入kettle.properties配置文件

注:修改kettle.properties配置文件之后需要重启kettle程序才会重新生效

注意:kettle.properties为全局参数配置文件,启动Spoon时会自动读取本文件中的全部内容至内存中,所以如果在kettle运行过程中手动修改了这个配置文件,则需重启Spoon才会生效。

首先需要在指定文件夹中建好所需要的日志表,接下来就在kettle自动生成中的配置文件kettle.properties中配置每个日志的数据库及表名,这里数据库我选择用的共享的(共享的数据库连接存放在shared.xml文件中),kettle就能读的到

# trans日志(转换)
KETTLE_TRANS_LOG_DB=10.15.30.168
KETTLE_TRANS_LOG_TABLE=r_log_trans

# 步骤日志表 (步骤)
KETTLE_STEP_LOG_DB=10.15.30.168
KETTLE_STEP_LOG_TABLE=r_log_trans_step
#  trans性能日志(运行)
KETTLE_TRANS_PERFORMANCE_LOG_DB=10.15.30.168
KETTLE_TRANS_PERFORMANCE_LOG_TABLE=r_log_trans_step
# 通道日志表(日志通道)
KETTLE_CHANNEL_LOG_DB=10.15.30.168
KETTLE_CHANNEL_LOG_TABLE=r_log_channel
# job和trans的Metrics(度量)日志表
KETTLE_METRICS_LOG_DB=10.15.30.168
KETTLE_METRICS_LOG_TABLE=r_log_trans_metrics

# job日志
KETTLE_JOB_LOG_DB=10.15.30.168
KETTLE_JOB_LOG_TABLE=r_log_job
#  作业项日志表
KETTLE_JOBENTRY_LOG_DB=10.15.30.168
KETTLE_JOBENTRY_LOG_TABLE=r_log_job_step

以下是kettle的建表SQL及部分表注释

-- 转换日志表
-- KETTLE_TRANS_LOG_TABLE

CREATE TABLE r_log_trans
(
  ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, TRANSNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE DATETIME
, ENDDATE DATETIME
, LOGDATE DATETIME
, DEPDATE DATETIME
, REPLAYDATE DATETIME
, LOG_FIELD LONGTEXT
);
CREATE INDEX IDX_r_log_trans_1 ON r_log_trans(ID_BATCH);
CREATE INDEX IDX_r_log_trans_2 ON r_log_trans(ERRORS, STATUS, TRANSNAME);

-- 步骤日志表
-- KETTLE_STEP_LOG_TABLE

CREATE TABLE r_log_trans_step
(
  ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY INT
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
);

-- 运行日志表
-- KETTLE_TRANS_PERFORMANCE_LOG_TABLE

CREATE TABLE r_log_trans_running
(
  ID_BATCH INT
, SEQ_NR INT
, LOGDATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, STEP_COPY INT
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, INPUT_BUFFER_ROWS BIGINT
, OUTPUT_BUFFER_ROWS BIGINT
);
-- 通道日志表(job、trans共用的)
-- KETTLE_CHANNEL_LOG_TABLE

CREATE TABLE r_log_channel
(
  ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, LOGGING_OBJECT_TYPE VARCHAR(255)
, OBJECT_NAME VARCHAR(255)
, OBJECT_COPY VARCHAR(255)
, REPOSITORY_DIRECTORY VARCHAR(255)
, FILENAME VARCHAR(255)
, OBJECT_ID VARCHAR(255)
, OBJECT_REVISION VARCHAR(255)
, PARENT_CHANNEL_ID VARCHAR(255)
, ROOT_CHANNEL_ID VARCHAR(255)
);

-- Metrics log table
-- KETTLE_METRICS_LOG_TABLE

CREATE TABLE r_log_trans_metrics
(
  ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, METRICS_DATE DATETIME
, METRICS_CODE VARCHAR(255)
, METRICS_DESCRIPTION VARCHAR(255)
, METRICS_SUBJECT VARCHAR(255)
, METRICS_TYPE VARCHAR(255)
, METRICS_VALUE BIGINT
);


-- 作业日志表
-- KETTLE_JOB_LOG_TABLE

CREATE TABLE r_log_job
(
  ID_JOB INT
, CHANNEL_ID VARCHAR(255)
, JOBNAME VARCHAR(255)
, STATUS VARCHAR(15)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, STARTDATE DATETIME
, ENDDATE DATETIME
, LOGDATE DATETIME
, DEPDATE DATETIME
, REPLAYDATE DATETIME
, LOG_FIELD LONGTEXT
);
CREATE INDEX IDX_r_log_job_1 ON r_log_job(ID_JOB);
CREATE INDEX IDX_r_log_job_2 ON r_log_job(ERRORS, STATUS, JOBNAME);

-- 作业项日志表
--KETTLE_JOBENTRY_LOG_TABLE

CREATE TABLE r_log_job_step
(
  ID_BATCH INT
, CHANNEL_ID VARCHAR(255)
, LOG_DATE DATETIME
, TRANSNAME VARCHAR(255)
, STEPNAME VARCHAR(255)
, LINES_READ BIGINT
, LINES_WRITTEN BIGINT
, LINES_UPDATED BIGINT
, LINES_INPUT BIGINT
, LINES_OUTPUT BIGINT
, LINES_REJECTED BIGINT
, ERRORS BIGINT
, RESULT BOOLEAN
, NR_RESULT_ROWS BIGINT
, NR_RESULT_FILES BIGINT
);
CREATE INDEX IDX_r_log_job_step_1 ON r_log_job_step(ID_BATCH);



-- 作业日志表:https://www.pudn.com/news/62a2d83c194b3b0e4344799b.html
comment on column T_KETTLE_JOB_LOG.id_job  is '批次ID(即作业ID),自递增,主键';
comment on column T_KETTLE_JOB_LOG.channel_id  is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_JOB_LOG.jobname  is '作业名称';
comment on column T_KETTLE_JOB_LOG.status  is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_JOB_LOG.lines_read  is '最后一个转换,读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_written  is '最后一个转换,写入的行数';
comment on column T_KETTLE_JOB_LOG.lines_updated  is '最后一个转换,更新的行数';
comment on column T_KETTLE_JOB_LOG.lines_input  is '最后一个转换,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_output  is '最后一个转换,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_JOB_LOG.lines_rejected  is '最后一个转换,因错误处理导致拒绝的行数';
comment on column T_KETTLE_JOB_LOG.errors  is '发生的错误数';
comment on column T_KETTLE_JOB_LOG.startdate  is '开始执行时间(kettle的bug,始终是1900-01-01 7:00:00)';
comment on column T_KETTLE_JOB_LOG.enddate  is '结束执行时间';
comment on column T_KETTLE_JOB_LOG.logdate  is '最后记录日志的时间';
comment on column T_KETTLE_JOB_LOG.depdate  is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_JOB_LOG.replaydate  is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_JOB_LOG.log_field  is '详细日志内容';
comment on column T_KETTLE_JOB_LOG.executing_server  is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_JOB_LOG.executing_user  is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_JOB_LOG.start_job_entry  is '当前作业,从哪个转换开始执行(为空代表是从Start开始)';
comment on column T_KETTLE_JOB_LOG.client  is '客户端(SPOON、PAN、KITCHEN、CARTE)';
-- 作业日志通道表
comment on column T_KETTLE_JOB_CHANNEL_LOG.id_batch  is '批次ID(即作业ID),与t_kettle_job_log的id_batch字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.channel_id  is '日志通道ID(GUID),跟t_kettle_step_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.log_date  is '最后记录日志的时间';
comment on column T_KETTLE_JOB_CHANNEL_LOG.logging_object_type  is '被记录对象的类型(如JOB、JOBENTRY、DATABASE、STEP、TRANS)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_name  is '被记录对象的名称(跟LOGGING_OBJECT_TYPE是一对)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_copy  is '被记录步骤对象的复制(不懂)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.repository_directory  is '资源库(或存储)的目录,貌似是JOB、TRANS才有';
comment on column T_KETTLE_JOB_CHANNEL_LOG.filename  is 'JOB、JOBENTRY、TRANS的路径(只有作业、转换才有)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_id  is '当前对象ID';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_revision  is '当前对象版本';
comment on column T_KETTLE_JOB_CHANNEL_LOG.parent_channel_id  is '所属转换日志的日志通道ID,跟t_kettle_transformation_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.root_channel_id  is '所属作业日志的日志通道ID,跟t_kettle_job_log的channel_id字段有关联';
-- 转换日志表
comment on table T_KETTLE_TRANSFORMATION_LOG  is 'KETTLE转换日志'; 
comment on column T_KETTLE_TRANSFORMATION_LOG.id_batch  is '批次ID(即转换ID),自递增';
comment on column T_KETTLE_TRANSFORMATION_LOG.channel_id  is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_TRANSFORMATION_LOG.transname  is '转换名称';
comment on column T_KETTLE_TRANSFORMATION_LOG.status  is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_read  is '特定步骤,读取的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_written  is '特定步骤,写入的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_updated  is '特定步骤,执行的更新语句的数量';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_input  is '特定步骤,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_output  is '特定步骤,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_rejected  is '特定步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.errors  is '发生的错误数';
comment on column T_KETTLE_TRANSFORMATION_LOG.startdate  is '开始执行时间(kettle的bug,有些是1900-01-01 7:00:00)';
comment on column T_KETTLE_TRANSFORMATION_LOG.enddate  is '结束执行时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.logdate  is '最后记录日志的时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.depdate  is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.replaydate  is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.log_field  is '详细日志内容';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_server  is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_user  is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_TRANSFORMATION_LOG.client  is '客户端(SPOON、PAN、KITCHEN、CARTE)';

-- 转换步骤日志表
comment on table T_KETTLE_STEP_LOG  is 'KETTLE转换-步骤日志';
comment on column T_KETTLE_STEP_LOG.id_batch  is '批次ID(转换ID),自递增,主键';
comment on column T_KETTLE_STEP_LOG.channel_id  is '日志通道ID(GUID),跟t_kettle_job_channel_log的channel_id字段有关联';
comment on column T_KETTLE_STEP_LOG.transname  is '转换名称';
comment on column T_KETTLE_STEP_LOG.stepname  is '步骤名称';
comment on column T_KETTLE_STEP_LOG.step_copy  is '当前步骤复制的数量';
comment on column T_KETTLE_STEP_LOG.lines_read  is '从上一个步骤读取的数量';
comment on column T_KETTLE_STEP_LOG.lines_written  is '输出到跟随的步骤(因为支持并发多个步骤)。假设输入1w条数据,且跟随两个步骤,那么LINES_WRITTEN是2w(1w * 2)';
comment on column T_KETTLE_STEP_LOG.lines_updated  is '当前步骤,执行的更新语句的数量';
comment on column T_KETTLE_STEP_LOG.lines_input  is '当前步骤,从来源(文件、数据库、网络等),读取的行数';
comment on column T_KETTLE_STEP_LOG.lines_output  is '当前步骤,输出到输出端(文件、数据库、网络等)的行数';
comment on column T_KETTLE_STEP_LOG.lines_rejected  is '当前步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_STEP_LOG.errors  is '当前步骤,发生的错误数';
comment on column T_KETTLE_STEP_LOG.log_field  is '当前步骤,产生的详细日志内容';
comment on column T_KETTLE_STEP_LOG.log_date  is '当前步骤,最后记录日志的时间';