filebeat8自定义索引 filebeat 配置

转载

mob6454cc747bda 2024-07-23 09:22:54

文章标签 filebeat8自定义索引 elk linux json elastic 文章分类 数据仓库大数据

prospector、harvesters 概念及执行流程

# 其主要由三部分构成: 
# https://www.elastic.co/guide/en/beats/filebeat/current/how-filebeat-works.html

prospector "探测器"
# 其管理所有 Harvsters 并找到所有需读取的数据源，启动时会启动一或若干 prospectors 探测器进程去检测指定的目录或文件
# 若input type为log则Prospector将去配置的路径下查找所有能匹配到的文件然后为每个文件创建一个Harvster
# 每个Prospector都运行在自己的 Go Routime 里

harvesters "收集器"
# 负责针对单个文件内容的收集，每个Harvester (收割机) 会对一个文件逐行读取并把内容发到output段
# 若在收集文件时将其删除、重命名，此时仍会继续读取该文件，造成磁盘上的空间保留，直到"收割机"关闭为止
# 默认情况下 Filebeat 保持文件打开直到 close_inactive 参数的时间到达
# 启用此选项后若在指定时间未收到新事件，将会关闭文件句柄 (当读取最后一条日志行时定义周期的计数器开始计数)

Register "SinceDB"
# 其保持每个被采集文件的状态并频繁刷新状态到磁盘上的注册文件，用于存储"Harvesters"读取的最后偏移量并确保所有事件被发送
# 若Elasticsearch或Logstash的输出不可达时Filebeat将持续追踪发送的最后一次事件并继续读取文件，努力尽快变为可用输出
# Filebeat重启时读取注册文件内的数据来重建状态，从而让每个收割者"Harvesters"从最后的位置开始读取
# Filebeat保证事件将至少一次传递到输出且不丢数据。因为注册表文件中存储了每个事件的传递状态

# -------------------------------------------------------------- Index Template

# 其将消息事件输出时传递 json，如Nginx等应用的日志文本会作为 json 的Message字段存在
# 默认Elasticsearch需要的Index template在安装Filebeat时已提供，以RPM方式安装时其路径为 /etc/filebeat/filebeat.template.json

# 可使用如下命令手动装载该模板到Elasticsearch
curl -XPUT -d @/etc/filebeat/filebeat.template.json 'http://localhost:9200/_template/filebeat?pretty'

# -------------------------------------------------------------- Directory
# https://www.elastic.co/guide/en/beats/filebeat/current/directory-layout.html

# [root@localhost filebeat-7.6.2-linux-x86_64]# ll
# total 76692
# -rw-r--r--  1 root root   500235 Mar 26 13:23 fields.yml              # Index Template
# -rwxr-xr-x  1 root root 77562560 Mar 26 13:25 filebeat                # 启动文件
# -rw-r--r--  1 root root    89359 Mar 26 13:23 filebeat.reference.yml  # 配置示例，含所有未弃用选项
# -rw-------  1 root root     8333 Mar 26 13:23 filebeat.yml            # 配置文件
# drwxr-xr-x  3 root root       15 Mar 26 13:23 kibana                  # Kibana UI 相关配置
# -rw-r--r--  1 root root    13675 Mar 26 12:44 LICENSE.txt
# drwxr-xr-x 39 root root     4096 Mar 26 13:23 module                  # 模块的: pipeline、template、config ...
# drwxr-xr-x  2 root root     4096 Mar 26 13:23 modules.d               # 模块需修改的默认导出变量
# -rw-r--r--  1 root root   328580 Mar 26 12:44 NOTICE.txt
# -rw-r--r--  1 root root      802 Mar 26 13:26 README.md

部署filebeat

# 命令参考: https://www.elastic.co/guide/en/beats/filebeat/current/command-line-options.html
# 容器镜像: https://www.elastic.co/guide/en/beats/filebeat/current/running-on-docker.html

[root@localhost bin]# curl -L -O https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-7.6.2-linux-x86_64.tar.gz
[root@localhost bin]# tar xzvf filebeat-7.6.2-linux-x86_64.tar.gz
[root@localhost filebeat-7.6.2-linux-x86_64]$ cd filebeat-7.6.2-linux-x86_64
[root@localhost filebeat-7.6.2-linux-x86_64]$ ./filebeat --help
Usage:
  filebeat [flags]
  filebeat [command]

Available Commands:
  enroll      Enroll in Kibana for Central Management
  export      Export current config or index template               # 将配置、索引模板、ILM策略或仪表板导到标准输出
  generate    Generate Filebeat modules, filesets and fields.yml
  help        Help about any command
  keystore    Manage secrets keystore                               # 管理秘钥库
  modules     Manage configured modules                             # 管理模块，用于快速处理常见日志 (启/禁用modules.d中定义的特定模块)
  run         Run filebeat                                          # 若不指定参数启动 Filebeat 则默认使用此参数
  setup       Setup index template, dashboards and ML jobs          # 向 Elasticsearch 中创建索引模板、kibana UI ...
  test        Test config                                           # 测试配置
  version     Show current version info 

Flags:
  -E, --E setting=value      Configuration overwrite                # 以命令行形式覆盖配置文件的配置
  -M, --M setting=value      Module configuration overwrite         # 以命令行形式覆盖模块的默认配置
  -N, --N                    Disable actual publishing for testing
  -c, --c string             Configuration file, relative to path.config (default "filebeat.yml")
      --cpuprofile string    Write cpu profile to file
  -d, --d string             Enable certain debug selectors
  -e, --e                    Log to stderr and disable syslog/file output # 输出错误到标准输出
  -h, --help                 help for filebeat
      --httpprof string      Start pprof http server
      --memprofile string    Write memory profile to this file
      --modules string       List of enabled modules (comma separated)
      --once                 Run filebeat only once until all harvesters reach EOF
      --path.config string   Configuration path
      --path.data string     Data path                                    # 
      --path.home string     Home path                                    # 
      --path.logs string     Logs path                                    # 
      --plugin pluginList    Load additional plugins
      --strict.perms         Strict permission checking on config files (default true)
  -v, --v                    Log at INFO level

Use "filebeat [command] --help" for more information about a command.

# -------------------------------------------------------------- 覆盖配置文件 -E ...

# 在 filebeat.yaml 中关于日志记录级别相关设置示例
logging.level: info
logging.to_files: true
logging.files:
  path: /var/log/filebeat
  name: filebeat
  keepfiles: 7
  permissions: 0644

# 若需临时修改日志级别并将其输出到标准错误而不是文件，可在运行时使用 -E 标志以命令行形式覆盖配置文件的配置
-E "logging.to_files=false" -E "logging.to_stderr=true" -E "logging.level=error"

# -------------------------------------------------------------- 覆盖模块设置 -M ...

# 在 modules.d/nginx.yml.enable 中
- module: nginx
  access:
    var.paths: ["/var/log/nginx/access.log*"]

# 若要临时修改此设置，可在运行Filebeat时使用 -E 标志，注意变量名称必须包含模块和文件集名称:
-M "nginx.access.var.paths=[/path/to/log/nginx/access.log*]"

# ------------------------------------------------------------------------------ 格式化输出配置字段样例

[root@localhost filebeat-7.6.2-linux-x86_64]# ./filebeat export config
# filebeat:
#   config:
#     modules:                                                      # 默认在 modules.d 中加载启用的模块配置
#       path: /root/filebeat-7.6.2-linux-x86_64/modules.d/*.yml     # 指定模块存放路径 (该路径配置将影响 filebeat modules 命令)
#       reload.enabled: true                                        # 自动重载
#       reload.period: 10s                                          # 检查间隔
#   modules:                                                        # 启用特定模块
#   - module: nginx
#   - module: mysql
#   - module: system
#   registry:                                                       # 注册文件相关配置字段
#     path: registry                                                # 默认路径: ${path.data}/registry
#     flush: 1s                                                     # 为0s时成功发布每一批事件后将注册表写入磁盘，缺省为0s
#   inputs:                                                         # 采集相关
#   - type: log                                                     # 采集类型: log、stdin、redis、udp、docker、tcp、syslog、netflow ...
#     enabled: false                                                # 是否启用该采集段
#     paths:                                                        # 采集路径
#      - /var/log/*.log
# name: "app1"                                                      # 实例名称 ( 默认使用主机名 )
# max_procs: 8                                                      # 可使用的最大核心数，默认为系统中可用的逻辑CPU数量
# output:                                                           # 输出相关
#   elasticsearch:                                                  # 输出类型
#     hosts:
#     - localhost:9200
# path:                                                             # Filebeat运行时依赖的相关组件路径设置
#   config: /root/filebeat-7.6.2-linux-x86_64                       # 配置文件目录
#   data: /root/filebeat-7.6.2-linux-x86_64/data                    # 存放注册文件
#   home: /root/filebeat-7.6.2-linux-x86_64                         # 主目录
#   logs: /root/filebeat-7.6.2-linux-x86_64/logs                    # 日志路径
# processors:                                                       # 消息按顺序执行处理器，实现对将要输出的数据进行全局级别的处理
# - add_host_metadata: null                                         #
# - add_cloud_metadata: null                                        #
# - add_docker_metadata: null                                       # 
# - add_kubernetes_metadata: null                                   # 
# setup:                                                            # setup.template部分用于在Elasticsearch中设置索引模板
#   kibana: null                                                    # 若启用模板加载（默认）则Filebeat成功连接到Elasticsearch后将自动加载索引模板
#   template:                                                       # https://www.elastic.co/guide/en/beats/filebeat/current/configuration-template.html
#     enabled: true
#     name: xxx                             # 模板名默认filebeat，其版本始终添加到给定名称后，因此最终为: filebeat-%{[agent.version]}
#     pattern: xxx                          # 模板中使用的索引匹配模式，默认 filebeat-*，其版本始终含在模式中，因此最终为: filebeat-%{[agent.version]}-*
#     overwrite: false                      # 是否覆盖以存在的模板，默认 false
#     fields: "fields.yml"                  # 描述字段的YAML的路径，默认 fields.yml，若设置相对路径，则将其视为相对于配置的路径
#     json.enabled: true                    # 加载基于JSON的模板文件 ( 将忽略 setup.template.fields 设置，此外还需 json.path和json.name字段)
#     settings:                             # 
#       index.number_of_shards: 1           # 分片数
#       index.number_of_replicas: 1         # 副本书
#   kibana.host: "http://localhost:5601"    # 该部分用于Kibana仪表板，Filebeat通过其API加载到Kibana中，因此这里需要Kibana端点的信息
#   kibana.username: ""                     # 连接到Kibana的基本身份验证账号，若没有为此指定值，则使用elasticsearch的值
#   kibana.password: ""                     # 连接到Kibana的基本身份验证密码，若没有为此指定值，则使用elasticsearch的值
#   kibana.space.id: ""                     # 使用的Kibana空间ID，若未指定则会将加载到默认的空间中
#   kibana.path: /kibana

# ------------------------------------------------------------------------------

# https://www.elastic.co/guide/en/beats/filebeat/current/_live_reloading.html
# 单独存放采集配置时可在用如下方式进行配置:
# filebeat.config.inputs:
#   enabled: true
#   path: configs/*.yml
#   reload.enabled: true
#   reload.period: 10s

filebeat.yaml 配置说明

name: "192.168.xx.xx"                   # 实例名称 ( 默认使用主机名 )

queue.mem:                              # 内部队列: https://www.elastic.co/guide/en/beats/filebeat/current/configuring-internal-queue.html#configuring-internal-queue
  events: 4096                          # 将内存队列设为最多缓冲4096个事件
  flush.min_events: 512                 # 默认2048，发布所需最小事件数，若设为0则直接输出事件，无需额外等待
  flush.timeout: 5s                     # 默认1s，需等待的最大时间，若设为0则事件立即可供使用
                                        # 当有512个事件可用或最早的可用事件已在队列等待5秒时，将直接输出

# ------------------------------        https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-log.html

filebeat.inputs:
  - type: log                           # 采集类型: log、stdin、redis、udp、docker、tcp、syslog、netflow ...
    enabled: true                       # 每个prospectors的开关，默认true
    paths:                              #
      - /tmp/Test/access.log
      - /var/log/*/*.log              
    encoding: plain                     # 被监控文件的编码类型，plain、utf-8 都可以处理中文日志
    tags: ["json"]                      # 使用标记实现在Kibana中选择特定事件或在Logstash中应用标签过滤
    fields:                             # 向输出的每条日志添加额外字段，如 level:debug，方便后续的处理
      Level: "debug"                    # 可用 values，arrays，dictionaries 或任何嵌套数据
      review: 1                         # 默认在输出信息的fields键下，如 fields.Level
    fields_under_root: true             # 将fields新增的字段设为顶级的JSON字段，而不是将其附加信息放在fields字段下
    # Processors: ...                   # https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html
    keep_null: false                    # 默认关闭，用于保持空数据的输出
    document_type: oslog                # 可以在logstash中使用 [type] 对其值进行判断，设定输出时的document的type字段可用来给日志分类
    scan_frequency: 5s                  # 扫描频率，默认10秒，过快会占用CPU
    include_lines: ['^ERR','^WARN']     # 匹配要读取的行，后接正则组成的列表，默认无，若启用则仅输出匹配行并省略空行
    exclude_lines: ["^DBG"]             # 在输入中排除符合正则表达式列表的行
    exclude_files: [".gz$"]             # 由正则组成的需排除的文件名组成的列表，默认无
    max_bytes: 10485760                 # 一行是一个事件，此处用于限制在一次日志事件中最多上传的字节数，默认10M
    close_inactive: 12h                 # 当文件删除或不更新之后一段时间则关闭句柄，它不基于文件的修改时间，若关闭的文件再次更改则将启动新的收割机
    ignore_older: 24h                   # 若启用将忽略在指定时间跨度之前修改的所有文件 ( 需大于 close_inactive 的值 )
    force_close_files: true             # Flebeat在未到达 close_older 之前保持handle，若在这个时间窗内删除文件则会有问题
                                        # 所以可将其设为true，只要检测到文件名变化就关掉handle
    close_removed: true                 # 若文件不存在则关闭处理。若后面又出现则会在 scan_frequency 之后继续从已知position处开始收集，默认 true
    tail_files: true                    # 从尾部开始监控并把新增的每行作为一个事件依次发送，而不是从开始处
    recursive_glob.enabled: true        # 默认启用，每个路径最右边的**将扩展为固定数量: /foo/** 会扩展为: /foo、/foo/*、/foo/*/*（将单个**扩展为8个级别的深度）
    pipeline: xxxx                      # 为此输入生成的事件设置的 Ingest pipeline ID。也可在ES输出中配置，但此处使用会导致配置更简单
                                        # 若在输入和输出中都配置了 pipeline，则使用输入中的选项
  - type: kafka                         # https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-kafka.html
    hosts:
      - kafka-broker-1:9092
      - kafka-broker-2:9092
    topics: ["my-topic"]
    group_id: "${VAR:default_value}"    # Filebeat的配置文件支持环境变量的导入，当使用 -E 从命令行覆盖配置时也可以指定环境变量: -E name=${NAME}
    username: "${VAR:?error_text}"      # 环境变量支持错误体制，类似于SHELL的语法
    password: "<your connection string>"

  - type: tcp                           # https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-tcp.html
    max_message_size: 10MiB
    host: "localhost:9000"

processors:                             # 处理器用于在输出前对事件内容进行处理
- drop_fields:
    fields: ["beat", "input_type", "source", "offset"]
- drop_event:
 　 when:
   　 regexp:                           # 支持的正则: https://www.elastic.co/guide/en/beats/filebeat/current/regexp-support.html
      　message: "^DBG:"
- drop_event:
    when:
      contains:
        source: "test" 

output.console:
  codec.format:                         # codec.format 用于创建自定义格式消息的可配置格式字符串
    string: '%{[@timestamp]} %{[message]}'  # 使用format编解码器将事件时间戳和消息字段打印到控制台的示例

output.logstash:                        # https://www.elastic.co/guide/en/beats/filebeat/current/logstash-output.html
  hosts: ["localhost:5044", "localhost:5045"]
  loadbalance: true                     # 当配置多个 logstash 时可采用负载均衡的方式进行输出
  worker: 2                             # number of hosts * workers，该例配置中参加负载平衡的Worker总数为4
  index: filebeat                       # 

output.elasticsearch:                   # https://www.elastic.co/guide/en/beats/filebeat/current/elasticsearch-output.html#index-option-es
  hosts: ["https://myEShost:9200"]
  username: "filebeat_writer"
  password: "YOUR_PASSWORD"
  index: "filebeat-%{[agent.version]}-%{+yyyy.MM.dd}"   # 该参数的默认值
  protocol: https
  ssl.certificate: "/etc/pki/client/cert.pem"
  ssl.key: "/etc/pki/client/cert.key"
  pipelines:                            # 根据事件内容在输出是调用特定的 Ingest pipeline
    - pipeline: "warning_pipeline"
      when.contains:
        message: "WARN"
    - pipeline: "error_pipeline"        # https://www.elastic.co/guide/en/beats/filebeat/current/configuring-ingest-node.html
      when.contains:
        message: "ERR"
    - pipeline: "%{[fields.log_type]}"
      mappings:
        critical: "sev1_pipeline"
        normal: "sev2_pipeline"
      default: "sev3_pipeline"

logging.level: info                     # https://www.elastic.co/guide/en/beats/filebeat/current/configuration-logging.html
logging.to_files: true
logging.files:
  path: /var/log/filebeat
  name: filebeat
  keepfiles: 7
  permissions: 0644

# -----------------------------------------------------------------------------

# 自动发现容器日志
# https://www.elastic.co/guide/en/beats/filebeat/current/configuration-autodiscover.html

# 支持的正则
# https://www.elastic.co/guide/en/beats/filebeat/current/regexp-support.html

模块

# 模块提供了处理常见日志格式的快速方法，包含默认配置、Ingest节点的管道定义、Kibana UI ...
# 可以在modules.d目录（推荐）或Filebeat配置文件中配置模块，modules.d/* 包含所有可用模块的默认配置 ...

# 启用或禁用modules.d下的特定模块配置:
filebeat modules [enable|disable] system nginx mysql

多行匹配

multiline:       # 适用于日志中每条日志占据多行的情况，如各种语言的报错信息调用栈。此配置包含如下子配置
    pattern:     # 多行日志开始的那一行匹配的pattern
    negate:      # 是否需要对pattern条件转置使用，不翻转设为true，反转设置为false
    match:       # 匹配pattern后与前面（before）还是后面（after）的内容合并为一条日志
    max_lines:   # 合并的最多行数，含匹配pattern的那一行
    timeout:     # 到了timeout之后，即使没有匹配一个新的pattern（发生新的事件）也把已经匹配的日志事件发送出去

# ---------------------------- Example
filebeat.inputs:
  - type: log
    paths:
      - /path/test.log
    input_type: log 
    multiline:
      pattern: '^\['
      negate: true
      match: after

command

https://www.elastic.co/guide/en/beats/filebeat/current/command-line-options.html#keystore-command

# 启用禁用特定模块（将创建对应的 pipeline、template、kibana UI 等等...）
# 管理已配置的模块，可使用此命令来启用和禁用modules.d目录中定义的特定模块配置，使用此命令所做的更改将保留并用于以后的Filebeat运行
filebeat modules [enable|disable] system nginx mysql

# 查看启用或禁用的模块列表
./filebeat modules list

# 启用特定模块中Ingest节点使用的pipeline信息（需连接Elasticsearch所需的信息）
filebeat setup --pipelines --modules system

# 检查配置文件
./filebeat test config -e

# 输出配置信息
./filebeat export config

# 设置初始环境
./filebeat setup -e

# 启动
nohup ./filebeat -c filebeat.yml >/dev/null  2>&1  &

Logstash 端接收 filebeat 数据

# Logstash-input-beats
input {
  beats {
    port => 5044
  }
}

pipeline

# https://www.elastic.co/guide/en/beats/filebeat/current/load-ingest-pipelines.html

# 若启用Elasticsearch输出则在首次运行Filebeat时会自动设置用于解析日志行的 Ingest pipeline
# 若要将事件发到Logstash，则需手动加载 Ingest pipeline
# 为此需在指定 --pipelines 选项的情况下运行setup命令，如果使用modules命令在modules.d目录中启用模块，则还要指定--modules标志

# 例如以下命令加载system、nginx、mysql模块中启用的所有文件集使用的Ingest pipeline: 
./filebeat setup --pipelines --modules system,nginx,mysql

修改 Filebeat 中对应类型的模块下 ingest - pipeline 的时区

cat  /usr/share/filebeat/module/nginx/error/ingest/pipeline.json
{
    # ......
    "date": {
      "field": "nginx.error.time",          # 来源字段
      "target_field": "@timestamp",         # 目标字段
      "formats": ["YYYY/MM/dd H:m:s"],      # 解析格式
      "timezone": "Asia/Shanghai"           # 修改时区
   }
   # ......
}

附

# 使用Elasticsearch的APM监测Filebeat的性能:
# https://www.elastic.co/guide/en/beats/filebeat/current/configuration-instrumentation.html
# instrumentation:
#   enabled: true
#   environment: production
#   hosts:
#     - "http://localhost:8200"
#   api_key: L5ER6FEvjkmlfalBealQ3f3fLqf03fazfOV

# -----------------------------------------------------

# 关于Filebeat与Elasticsearch/Logstash的安全通信设置
# https://www.elastic.co/guide/en/beats/filebeat/current/securing-filebeat.html

本文章为转载内容，我们尊重原作者对文章享有的著作权。如有内容错误或侵权问题，欢迎原作者联系我们进行内容更正或删除文章。