慢日志配置:

elasticsearch.yml 

cluster.name: elasticsearch

node.data: ${NODE_DATA:true}
node.master: ${NODE_MASTER:true}
node.name: ${HOSTNAME}

network.host: 0.0.0.0

bootstrap.mlockall: ${BOOTSTRAP_MLOCKALL:false}

cloud:
kubernetes:
service: ${SERVICE}
namespace: ${KUBERNETES_NAMESPACE}

discovery:
type: kubernetes
zen:
minimum_master_nodes: ${MINIMUM_MASTER_NODES:2}

index.analysis.analyzer.default.type: starfish
index.similarity.default.type: ybSimilarity

index.number_of_shards: 6
index.number_of_replicas: 1
index.store.type: mmapfs
index.refresh_interval: 3600s
index.mapper.dynamic: false

path.conf: /etc/elasticsearch

index.search.slowlog.threshold.query.warn: 10s
index.search.slowlog.threshold.query.info: 5s
index.search.slowlog.threshold.query.debug: 2s
index.search.slowlog.threshold.query.trace: 500ms

index.search.slowlog.threshold.fetch.warn: 1s
index.search.slowlog.threshold.fetch.info: 800ms
index.search.slowlog.threshold.fetch.debug:500ms
index.search.slowlog.threshold.fetch.trace: 200ms

index.search.slowlog.threshold.index.warn: 10s
index.search.slowlog.threshold.index.info: 5s
index.search.slowlog.threshold.index.debug: 2s
index.search.slowlog.threshold.index.trace: 500ms
index.search.slowlog.level: trace
index.search.slowlog.source: 1000

monitor.jvm.gc.ParNew.info: 700ms
monitor.jvm.gc.ConcurrentMarkSweep.info: 5s

index:
analysis:
analyzer:
starfish_small_query_syno:
type: custom
tokenizer: starfish_small_query
filter: [my_synonym_small]
filter:
my_synonym_small:
type: synonym
ignore_case: true
synonyms_path: analysis/synonym.txt

index:
analysis:
analyzer:
starfish_syno:
type: custom
tokenizer: starfish_query
filter: [my_synonym]
filter:
my_synonym:
type: synonym
ignore_case: true
synonyms_path: analysis/synonym.txt

threadpool:
index:
type: fixed
size: 100
queue_size: 2000

threadpool:
search:
type: fixed
size: 500
queue_size: 1000

# see https://github.com/elastic/elasticsearch-definitive-guide/pull/679
processors: ${PROCESSORS:}

# avoid split-brain w/ a minimum consensus of two masters plus a data node
gateway.expected_master_nodes: ${EXPECTED_MASTER_NODES:2}
gateway.expected_data_nodes: ${EXPECTED_DATA_NODES:1}
gateway.recover_after_time: ${RECOVER_AFTER_TIME:5m}
gateway.recover_after_master_nodes: ${RECOVER_AFTER_MASTER_NODES:2}
gateway.recover_after_data_nodes: ${RECOVER_AFTER_DATA_NODES:1}

logging.yml(2.4版本)

# you can override this using by setting a system property, for example -Des.logger.level=DEBUG
es.logger.level: INFO
rootLogger: ${es.logger.level}, console, file
logger:
# log action execution errors for easier debugging
action: DEBUG

# deprecation logging, turn to DEBUG to see them
deprecation: INFO, deprecation_log_file

# reduce the logging for aws, too much is logged under the default INFO
com.amazonaws: WARN
# aws will try to do some sketchy JMX stuff, but its not needed.
com.amazonaws.jmx.SdkMBeanRegistrySupport: ERROR
com.amazonaws.metrics.AwsSdkMetrics: ERROR

org.apache.http: INFO

# gateway
#gateway: DEBUG
#index.gateway: DEBUG

# peer shard recovery
#indices.recovery: DEBUG

# discovery
#discovery: TRACE

index.search.slowlog: TRACE, index_search_slow_log_file
index.indexing.slowlog: TRACE, index_indexing_slow_log_file

additivity:
index.search.slowlog: true
index.indexing.slowlog: true
deprecation: true

appender:
console:
type: console
layout:
type: consolePattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %.10000m%n"

# Use the following log4j-extras RollingFileAppender to enable gzip compression of log files.
# For more information see https://logging.apache.org/log4j/extras/apidocs/org/apache/log4j/rolling/RollingFileAppender.html
#file:
#type: extrasRollingFile
#file: ${path.logs}/${cluster.name}.log
#rollingPolicy: timeBased
#rollingPolicy.FileNamePattern: ${path.logs}/${cluster.name}.log.%d{yyyy-MM-dd}.gz
#layout:
#type: pattern
#conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

deprecation_log_file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}_deprecation.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

index_search_slow_log_file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}_index_search_slowlog.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

index_indexing_slow_log_file:
type: dailyRollingFile
file: ${path.logs}/${cluster.name}_index_indexing_slowlog.log
datePattern: "'.'yyyy-MM-dd"
layout:
type: pattern
conversionPattern: "[%d{ISO8601}][%-5p][%-25c] %m%n"

 

收集配置在es data节点上:

 

apiVersion: apps/v1
kind: StatefulSet
metadata:
labels:
app: elasticsearch
chart: elasticsearch-0.4.9
component: data
heritage: Tiller
release: es
name: es-elasticsearch-data
namespace: dev-es
spec:
podManagementPolicy: OrderedReady
replicas: 2
revisionHistoryLimit: 10
selector:
matchLabels:
app: elasticsearch
component: data
release: es
serviceName: es-elasticsearch-data
template:
metadata:
creationTimestamp: null
labels:
app: elasticsearch
component: data
release: es
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchLabels:
app: elasticsearch
component: data
release: es
topologyKey: kubernetes.io/hostname
weight: 1
containers:
- env:
- name: SERVICE
value: es-elasticsearch-master
- name: KUBERNETES_MASTER
value: kubernetes.default.svc.cluster.local
- name: KUBERNETES_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: NODE_MASTER
value: "false"
- name: PROCESSORS
valueFrom:
resourceFieldRef:
divisor: "0"
resource: limits.cpu
- name: ES_JAVA_OPTS
value: -Djava.net.preferIPv4Stack=true -Xms1536m -Xmx1536m
- name: MINIMUM_MASTER_NODES
value: "2"
image: 192.168.1.225:5000/elasticsearch:2.4-youben
imagePullPolicy: Always
lifecycle:
preStop:
exec:
command:
- /bin/bash
- /pre-stop-hook.sh
name: elasticsearch
ports:
- containerPort: 9300
name: transport
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /_cluster/health?local=true
port: 9200
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: "1"
requests:
cpu: 25m
memory: 1536Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /usr/share/elasticsearch/data
name: data
- mountPath: /usr/share/elasticsearch/config/logging.yml
name: config
subPath: logging.yml
- mountPath: /pre-stop-hook.sh
name: config
subPath: pre-stop-hook.sh
- name: shared-data
mountPath: /usr/share/elasticsearch/logs
- name: fluentd-es
image: 192.168.1.225:5000/fluentd-elasticsearch:v2.0.4-youben
imagePullPolicy: Always
command: ["/bin/sh"]
args: ["-c", "/run.sh $FLUENTD_ARGS"]
env:
- name: FLUENTD_ARGS
value: --no-supervisor -q
volumeMounts:
- name: config-volume
mountPath: /etc/fluent/config.d
- name: shared-data
mountPath: /usr/share/elasticsearch/logs
dnsPolicy: ClusterFirst
initContainers:
- command:
- sysctl
- -w
- vm.max_map_count=262144
image: busybox
imagePullPolicy: Always
name: sysctl
resources: {}
securityContext:
privileged: true
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
- command:
- /bin/bash
- -c
- chown -R elasticsearch:elasticsearch /usr/share/elasticsearch/data && chown
-R elasticsearch:elasticsearch /usr/share/elasticsearch/logs
image: 192.168.1.225:5000/elasticsearch:2.4-youben
imagePullPolicy: Always
name: chown
resources: {}
securityContext:
runAsUser: 0
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /usr/share/elasticsearch/data
name: data
nodeSelector:
deploy: app
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: es-elasticsearch
serviceAccountName: es-elasticsearch
terminationGracePeriodSeconds: 3600
volumes:
- configMap:
defaultMode: 420
name: es-elasticsearch
name: config
- name: config-volume
configMap:
name: fluentd-es-config-es-slow
- name: shared-data
emptyDir: {}
updateStrategy:
type: OnDelete
volumeClaimTemplates:
- metadata:
creationTimestamp: null
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 30Gi
storageClassName: glusterfs-storage-dv

 

fluent配置:

kind: ConfigMap
apiVersion: v1
metadata:
name: fluentd-es-config-es-slow
labels:
addonmanager.kubernetes.io/mode: Reconcile
data:
system.conf: |-
<system>
root_dir /tmp/fluentd-buffers/
</system>

input.conf: |-
<source>
@type tail
path /usr/share/elasticsearch/logs/elasticsearch_index_search_slowlog.log
tag elasticsearch.search_slowlog_query
pos_file /usr/share/elasticsearch/logs/elasticsearch-search-slow.pos
format /^\[(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\[(?<severity>[a-zA-Z]+\s*)\]\[(?<source>\S+)\] \[(?<node>\S+)\] \[(?<index>.+)\]\[(?<shard>\d+)\] took\[(?<took>.+)\], took_millis\[(?<took_millis>\d+)\], types\[(?<types>.*)\], stats\[(?<stats>.*)\], search_type\[(?<search_type>.*)\], total_shards\[(?<total_shards>\d+)\], source\[(?<source_body>.*)\], extra_source\[(?<extra_source>.*)\], /
</source>

<source>
@type tail
path /usr/share/elasticsearch/logs/elasticsearch_index_indexing_slowlog.log
tag elasticsearch.indexing_slowlog_query
pos_file /usr/share/elasticsearch/logs/elasticsearch-indexing-slow.pos
format /^\[(?<time>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3})\]\[(?<severity>[a-zA-Z]+\s*)\]\[(?<source>\S+)\] \[(?<node>\S+)\] \[(?<index>.+)\]\[(?<shard>\d+)\] took\[(?<took>.+)\], took_millis\[(?<took_millis>\d+)\], type\[(?<type>.+)\], id\[(?<indexing_id>.*)\], routing\[(?<routing>.*)\], source\[(?<source_body>.*)\]/
</source>

output.conf: |-
<match elasticsearch.*>
@id elasticsearch.es
@type elasticsearch
@log_level info
include_tag_key true
host elasticsearch-logging.kube-system.svc.cluster.local
port 9200
logstash_format true
logstash_prefix docker.es.slow
logstash_dateformat %Y-%m-%d
type_name docker_es_slow
<buffer>
@type file
path /var/log/fluentd-buffers/kubernetes.system.buffer
flush_mode interval
retry_type exponential_backoff
flush_thread_count 2
flush_interval 5s
retry_forever
retry_max_interval 30
chunk_limit_size 2M
queue_limit_length 8
overflow_action block
</buffer>
</match>