elasticsearch 慢日志收集，采用fluent

原创

云原生总监 2022-09-14 11:02:58 博主文章分类：elk ©著作权

文章标签 elasticsearch fish .net 文章分类 后端开发

©著作权归作者所有：来自51CTO博客作者云原生总监的原创作品，请联系作者获取转载授权，否则将追究法律责任

慢日志配置：

elasticsearch.yml

cluster.name: elasticsearch

node.data: ${NODE_DATA:true}
node.master: ${NODE_MASTER:true}
node.name: ${HOSTNAME}

network.host: 0.0.0.0

bootstrap.mlockall: ${BOOTSTRAP_MLOCKALL:false}

cloud:
  kubernetes:
    service: ${SERVICE}
    namespace: ${KUBERNETES_NAMESPACE}
    
discovery:
  type: kubernetes
  zen:
    minimum_master_nodes: ${MINIMUM_MASTER_NODES:2}

index.analysis.analyzer.default.type: starfish
index.similarity.default.type: ybSimilarity

index.number_of_shards: 6
index.number_of_replicas: 1
index.store.type: mmapfs
index.refresh_interval: 3600s
index.mapper.dynamic: false

path.conf: /etc/elasticsearch

index.search.slowlog.threshold.query.warn: 10s  
index.search.slowlog.threshold.query.info: 5s  
index.search.slowlog.threshold.query.debug: 2s  
index.search.slowlog.threshold.query.trace: 500ms  
  
index.search.slowlog.threshold.fetch.warn: 1s  
index.search.slowlog.threshold.fetch.info: 800ms  
index.search.slowlog.threshold.fetch.debug:500ms  
index.search.slowlog.threshold.fetch.trace: 200ms 

index.search.slowlog.threshold.index.warn: 10s
index.search.slowlog.threshold.index.info: 5s
index.search.slowlog.threshold.index.debug: 2s
index.search.slowlog.threshold.index.trace: 500ms
index.search.slowlog.level: trace
index.search.slowlog.source: 1000

monitor.jvm.gc.ParNew.info: 700ms
monitor.jvm.gc.ConcurrentMarkSweep.info: 5s

index:
  analysis:
    analyzer:
      starfish_small_query_syno:
          type: custom
          tokenizer: starfish_small_query
          filter: [my_synonym_small]
    filter:
      my_synonym_small:
          type: synonym
          ignore_case: true
          synonyms_path: analysis/synonym.txt

index:
  analysis:
    analyzer:
      starfish_syno:
          type: custom
          tokenizer: starfish_query
          filter: [my_synonym]
    filter:
      my_synonym:
          type: synonym
          ignore_case: true
          synonyms_path: analysis/synonym.txt

threadpool:
    index:
        type: fixed
        size: 100
        queue_size: 2000

threadpool:
    search:
        type: fixed
        size: 500
        queue_size: 1000

# see https://github.com/elastic/elasticsearch-definitive-guide/pull/679
processors: ${PROCESSORS:}

# avoid split-brain w/ a minimum consensus of two masters plus a data node
gateway.expected_master_nodes: ${EXPECTED_MASTER_NODES:2}
gateway.expected_data_nodes: ${EXPECTED_DATA_NODES:1}
gateway.recover_after_time: ${RECOVER_AFTER_TIME:5m}
gateway.recover_after_master_nodes: ${RECOVER_AFTER_MASTER_NODES:2}
gateway.recover_after_data_nodes: ${RECOVER_AFTER_DATA_NODES:1}

logging.yml(2.4版本)

# you can override this using by setting a system property, for example -Des.logger.level=DEBUG
es.logger.level: INFO
rootLogger: ${es.logger.level}, console, file
logger:
  # log action execution errors for easier debugging
  action: DEBUG

  # deprecation logging, turn to DEBUG to see them
  deprecation: INFO, deprecation_log_file

  # reduce the logging for aws, too much is logged under the default INFO
  com.amazonaws: WARN
  # aws will try to do some sketchy JMX stuff, but its not needed.
  com.amazonaws.jmx.SdkMBeanRegistrySupport: ERROR
  com.amazonaws.metrics.AwsSdkMetrics: ERROR

  org.apache.http: INFO

  # gateway
  #gateway: DEBUG
  #index.gateway: DEBUG

  # peer shard recovery
  #indices.recovery: DEBUG

  # discovery
  #discovery: TRACE

  index.search.slowlog: TRACE, index_search_slow_log_file
  index.indexing.slowlog: TRACE, index_indexing_slow_log_file

additivity:
  index.search.slowlog: true
  index.indexing.slowlog: true
  deprecation: true

appender:
  console:
    type: console
    layout:
      type: consolePattern
      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

  file:
    type: dailyRollingFile
    file: ${path.logs}/${cluster.name}.log
    datePattern: "'.'yyyy-MM-dd"
    layout:
      type: pattern
      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %.10000m%n"

  # Use the following log4j-extras RollingFileAppender to enable gzip compression of log files. 
  # For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html
  #file:
    #type: extrasRollingFile
    #file: ${path.logs}/${cluster.name}.log
    #rollingPolicy: timeBased
    #rollingPolicy.FileNamePattern: ${path.logs}/${cluster.name}.log.%d{yyyy-MM-dd}.gz
    #layout:
      #type: pattern
      #conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

  deprecation_log_file:
    type: dailyRollingFile
    file: ${path.logs}/${cluster.name}_deprecation.log
    datePattern: "'.'yyyy-MM-dd"
    layout:
      type: pattern
      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

  index_search_slow_log_file:
    type: dailyRollingFile
    file: ${path.logs}/${cluster.name}_index_search_slowlog.log
    datePattern: "'.'yyyy-MM-dd"
    layout:
      type: pattern
      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

  index_indexing_slow_log_file:
    type: dailyRollingFile
    file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log
    datePattern: "'.'yyyy-MM-dd"
    layout:
      type: pattern
      conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

收集配置在es data节点上：

apiVersion: apps/v1
kind: StatefulSet
metadata:
  labels:
    app: elasticsearch
    chart: elasticsearch-0.4.9
    component: data
    heritage: Tiller
    release: es
  name: es-elasticsearch-data
  namespace: dev-es
spec:
  podManagementPolicy: OrderedReady
  replicas: 2
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      app: elasticsearch
      component: data
      release: es
  serviceName: es-elasticsearch-data
  template:
    metadata:
      creationTimestamp: null
      labels:
        app: elasticsearch
        component: data
        release: es
    spec:
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - podAffinityTerm:
              labelSelector:
                matchLabels:
                  app: elasticsearch
                  component: data
                  release: es
              topologyKey: kubernetes.io/hostname
            weight: 1
      containers:
      - env:
        - name: SERVICE
          value: es-elasticsearch-master
        - name: KUBERNETES_MASTER
          value: kubernetes.default.svc.cluster.local
        - name: KUBERNETES_NAMESPACE
          valueFrom:
            fieldRef:
              apiVersion: v1
              fieldPath: metadata.namespace
        - name: NODE_MASTER
          value: "false"
        - name: PROCESSORS
          valueFrom:
            resourceFieldRef:
              divisor: "0"
              resource: limits.cpu
        - name: ES_JAVA_OPTS
          value: -Djava.net.preferIPv4Stack=true -Xms1536m -Xmx1536m
        - name: MINIMUM_MASTER_NODES
          value: "2"
        image: 192.168.1.225:5000/elasticsearch:2.4-youben
        imagePullPolicy: Always
        lifecycle:
          preStop:
            exec:
              command:
              - /bin/bash
              - /pre-stop-hook.sh
        name: elasticsearch
        ports:
        - containerPort: 9300
          name: transport
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /_cluster/health?local=true
            port: 9200
            scheme: HTTP
          initialDelaySeconds: 5
          periodSeconds: 10
          successThreshold: 1
          timeoutSeconds: 1
        resources:
          limits:
            cpu: "1"
          requests:
            cpu: 25m
            memory: 1536Mi
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /usr/share/elasticsearch/data
          name: data
        - mountPath: /usr/share/elasticsearch/config/logging.yml
          name: config
          subPath: logging.yml
        - mountPath: /pre-stop-hook.sh
          name: config
          subPath: pre-stop-hook.sh
        - name: shared-data
          mountPath: /usr/share/elasticsearch/logs
      - name: fluentd-es
        image: 192.168.1.225:5000/fluentd-elasticsearch:v2.0.4-youben
        imagePullPolicy: Always
        command: ["/bin/sh"]
        args: ["-c", "/run.sh $FLUENTD_ARGS"]
        env:
        - name: FLUENTD_ARGS
          value: --no-supervisor -q
        volumeMounts:
        - name: config-volume
          mountPath: /etc/fluent/config.d
        - name: shared-data
          mountPath: /usr/share/elasticsearch/logs
      dnsPolicy: ClusterFirst
      initContainers:
      - command:
        - sysctl
        - -w
        - vm.max_map_count=262144
        image: busybox
        imagePullPolicy: Always
        name: sysctl
        resources: {}
        securityContext:
          privileged: true
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      - command:
        - /bin/bash
        - -c
        - chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data && chown
          -R elasticsearch:elasticsearch /usr/share/elasticsearch/logs
        image: 192.168.1.225:5000/elasticsearch:2.4-youben
        imagePullPolicy: Always
        name: chown
        resources: {}
        securityContext:
          runAsUser: 0
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
        volumeMounts:
        - mountPath: /usr/share/elasticsearch/data
          name: data
      nodeSelector:
        deploy: app
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      serviceAccount: es-elasticsearch
      serviceAccountName: es-elasticsearch
      terminationGracePeriodSeconds: 3600
      volumes:
      - configMap:
          defaultMode: 420
          name: es-elasticsearch
        name: config
      - name: config-volume
        configMap:
          name: fluentd-es-config-es-slow
      - name: shared-data
        emptyDir: {}
  updateStrategy:
    type: OnDelete
  volumeClaimTemplates:
  - metadata:
      creationTimestamp: null
      name: data
    spec:
      accessModes:
      - ReadWriteOnce
      resources:
        requests:
          storage: 30Gi
      storageClassName: glusterfs-storage-dv

、

fluent配置：

kind: ConfigMap
apiVersion: v1
metadata:
  name: fluentd-es-config-es-slow
  labels:
    addonmanager.kubernetes.io/mode: Reconcile
data:
  system.conf: |-
    <system>
      root_dir /tmp/fluentd-buffers/
    </system>

  input.conf: |- 
    <source>
      @type tail
      path /usr/share/elasticsearch/logs/elasticsearch_index_search_slowlog.log
      tag elasticsearch.search_slowlog_query
      pos_file /usr/share/elasticsearch/logs/elasticsearch-search-slow.pos
      format /^\[(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\[(?<severity>[a-zA-Z]+\s*)\]\[(?<source>\S+)\] \[(?<node>\S+)\] \[(?<index>.+)\]\[(?<shard>\d+)\] took\[(?<took>.+)\], took_millis\[(?<took_millis>\d+)\], types\[(?<types>.*)\], stats\[(?<stats>.*)\], search_type\[(?<search_type>.*)\], total_shards\[(?<total_shards>\d+)\], source\[(?<source_body>.*)\], extra_source\[(?<extra_source>.*)\], /
    </source>

    <source>
      @type tail
      path /usr/share/elasticsearch/logs/elasticsearch_index_indexing_slowlog.log
      tag elasticsearch.indexing_slowlog_query
      pos_file /usr/share/elasticsearch/logs/elasticsearch-indexing-slow.pos
      format /^\[(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\[(?<severity>[a-zA-Z]+\s*)\]\[(?<source>\S+)\] \[(?<node>\S+)\] \[(?<index>.+)\]\[(?<shard>\d+)\] took\[(?<took>.+)\], took_millis\[(?<took_millis>\d+)\], type\[(?<type>.+)\], id\[(?<indexing_id>.*)\], routing\[(?<routing>.*)\], source\[(?<source_body>.*)\]/
    </source>
    
  output.conf: |-
    <match elasticsearch.*>
      @id elasticsearch.es
      @type elasticsearch
      @log_level info
      include_tag_key true
      host elasticsearch-logging.kube-system.svc.cluster.local
      port 9200
      logstash_format true
      logstash_prefix docker.es.slow
      logstash_dateformat %Y-%m-%d
      type_name docker_es_slow
      <buffer>
        @type file
        path /var/log/fluentd-buffers/kubernetes.system.buffer
        flush_mode interval
        retry_type exponential_backoff
        flush_thread_count 2
        flush_interval 5s
        retry_forever
        retry_max_interval 30
        chunk_limit_size 2M
        queue_limit_length 8
        overflow_action block
      </buffer>
    </match>