文章目录


0 结果

原始数据写入:
Python写入日志到Elasticsearch(logging/loguru,可个性化定制写入信息)————附带详细代码和示例_elasticsearch

修改写入后的格式:

Python写入日志到Elasticsearch(logging/loguru,可个性化定制写入信息)————附带详细代码和示例_es_02

1 准备

这里使用的是Python自带的​​logging​​​模块或​​loguru​​​模块(封装了​​logging​​模块)进行es的日志写入。

使用如下的方法进行包安装(建议使用es的版本为8以下,以防出现找不到包的错误):

pip3 install "elasticsearch==7.9.1" -i  https://pypi.tuna.tsinghua.edu.cn/simple
pip3 install loguru -i https://pypi.tuna.tsinghua.edu.cn/simple

这里使用修改后的CMRESHandler文件来进行es写入,修改了​​获取ip错误​​的后的代码如下:

#!/usr/bin/env python3

# 链接和初始化elasticsearch

import logging
import datetime
import socket
from threading import Timer, Lock
from enum import Enum
from elasticsearch import helpers as eshelpers
from elasticsearch import Elasticsearch, RequestsHttpConnection


# from CMRESSerializer import CMRESSerializer
# from getLocal_ip import get_local_ip
# import settings

from elasticsearch.serializer import JSONSerializer



class CMRESSerializer(JSONSerializer):
def default(self, data):
try:
return super(CMRESSerializer, self).default(data)
except TypeError:
return str(data)


def get_local_ip():
"""
获取本地IP

:return:
"""

try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
except Exception as e:
print(e)
return ''
else:
return ip



class CMRESHandler(logging.Handler):
""" Elasticsearch log handler
"""

class AuthType(Enum):
""" Authentication types supported

The handler supports
- No authentication
- Basic authentication
"""
NO_AUTH = 0
BASIC_AUTH = 1
DEVOPS_AUTH = 2

class IndexNameFrequency(Enum):
""" Index type supported
the handler supports
- Daily indices
- Weekly indices
- Monthly indices
- Year indices
"""
DAILY = 0
WEEKLY = 1
MONTHLY = 2
YEARLY = 3

# Defaults for the class
__DEFAULT_ELASTICSEARCH_HOST = [{'host': 'localhost', 'port': 9200}]
__DEFAULT_AUTH_USER = 'admin'
__DEFAULT_AUTH_PASSWD = 'admin'

__DEFAULT_USE_SSL = False
__DEFAULT_VERIFY_SSL = True
__DEFAULT_AUTH_TYPE = AuthType.NO_AUTH
__DEFAULT_INDEX_FREQUENCY = IndexNameFrequency.DAILY
__DEFAULT_BUFFER_SIZE = 1000
__DEFAULT_FLUSH_FREQ_INSEC = 1
__DEFAULT_ADDITIONAL_FIELDS = {}
__DEFAULT_ES_INDEX_NAME = 'python_logger'
__DEFAULT_ES_DOC_TYPE = '_doc'
__DEFAULT_RAISE_ON_EXCEPTION = False
__DEFAULT_TIMESTAMP_FIELD_NAME = "timestamp"
__DEFAULT_ISO_TIMESTAMP_FIELD_NAME = "iso_timestamp"

__LOGGING_FILTER_FIELDS = ['msecs',
'relativeCreated',
'levelno',
'created']

@staticmethod
def _get_daily_index_name(es_index_name):
""" Returns elasticearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date.
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m.%d'))
return es_index_name

@staticmethod
def _get_weekly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific week
"""
# current_date = datetime.datetime.now()
# start_of_the_week = current_date - datetime.timedelta(days=current_date.weekday())
# return "{0!s}-{1!s}".format(es_index_name, start_of_the_week.strftime('%Y.%m.%d'))
return es_index_name

@staticmethod
def _get_monthly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific moth
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m'))
return es_index_name

@staticmethod
def _get_yearly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific year
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y'))
return es_index_name

_INDEX_FREQUENCY_FUNCION_DICT = {
IndexNameFrequency.DAILY: _get_daily_index_name,
IndexNameFrequency.WEEKLY: _get_weekly_index_name,
IndexNameFrequency.MONTHLY: _get_monthly_index_name,
IndexNameFrequency.YEARLY: _get_yearly_index_name
}

def __init__(self,
hosts=__DEFAULT_ELASTICSEARCH_HOST,
auth_details=(__DEFAULT_AUTH_USER, __DEFAULT_AUTH_PASSWD),
auth_type=__DEFAULT_AUTH_TYPE,
use_ssl=__DEFAULT_USE_SSL,
verify_ssl=__DEFAULT_VERIFY_SSL,
buffer_size=__DEFAULT_BUFFER_SIZE,
flush_frequency_in_sec=__DEFAULT_FLUSH_FREQ_INSEC,
es_index_name=__DEFAULT_ES_INDEX_NAME,
index_name_frequency=__DEFAULT_INDEX_FREQUENCY,
es_doc_type=__DEFAULT_ES_DOC_TYPE,
es_additional_fields=__DEFAULT_ADDITIONAL_FIELDS,
raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_EXCEPTION,
default_iso_timestamp_field_name=__DEFAULT_ISO_TIMESTAMP_FIELD_NAME,
default_timestamp_field_name=__DEFAULT_TIMESTAMP_FIELD_NAME):
""" Handler constructor

:param hosts: The list of hosts that elasticsearch clients will connect. The list can be provided
in the format ```[{'host':'host1','port':9200}, {'host':'host2','port':9200}]```to
make sure the client supports failover of one of the instertion nodes
:param auth_details: When ```CMRESHandler.AuthType.BASIC_AUTH```is used this argument must contain
a tuple of string with the user and password that will be used to authenticate against
the Elasticsearch servers, for example```('User','Password')
:param auth_type: The authentication type to be used in the connection ```CMRESHandler.AuthType```
Currently, NO_AUTH, BASIC_AUTH, DEVOPS_AUTH are supported
:param use_ssl: A boolean that defines if the communications should use SSL encrypted communication
:param verify_ssl: A boolean that defines if the SSL certificates are validated or not
:param buffer_size: An int, Once this size is reached on the internal buffer results are flushed into ES
:param flush_frequency_in_sec: A float representing how often and when the buffer will be flushed, even
if the buffer_size has not been reached yet
:param es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a
date with YYYY.MM.dd, ```python_logger```used by default
:param index_name_frequency: Defines what the date used in the postfix of the name would be. available values
are selected from the IndexNameFrequency class (IndexNameFrequency.DAILY,
IndexNameFrequency.WEEKLY, IndexNameFrequency.MONTHLY, IndexNameFrequency.YEARLY). By default
it uses daily indices.
:param es_doc_type: A string with the name of the document type that will be used ```python_log```used
by default
:param es_additional_fields: A dictionary with all the additional fields that you would like to add
to the logs, such the application, environment, etc.
:param raise_on_indexing_exceptions: A boolean, True only for debugging purposes to raise exceptions
caused when
:return: A ready to be used CMRESHandler.
"""
logging.Handler.__init__(self)

self.hosts = hosts
self.auth_details = auth_details
self.auth_type = auth_type
self.use_ssl = use_ssl
self.verify_certs = verify_ssl

self.buffer_size = buffer_size
self.flush_frequency_in_sec = flush_frequency_in_sec
self.es_index_name = es_index_name
self.index_name_frequency = index_name_frequency
self.es_doc_type = es_doc_type
self.es_additional_fields = es_additional_fields.copy()

# 原始的报错:socket.gaierror: [Errno 8] nodename nor servname provided, or not known
# self.es_additional_fields.update({'host': socket.gethostname(),
# 'host_ip': socket.gethostbyname(socket.gethostname())})

self.raise_on_indexing_exceptions = raise_on_indexing_exceptions
self.default_iso_timestamp_field_name = default_iso_timestamp_field_name
self.default_timestamp_field_name = default_timestamp_field_name

self._client = None
self._buffer = []
self._buffer_lock = Lock()
self._timer = None
self._index_name_func = CMRESHandler._INDEX_FREQUENCY_FUNCION_DICT[self.index_name_frequency]
self.serializer = CMRESSerializer()

def __schedule_flush(self):
if self._timer is None:
self._timer = Timer(self.flush_frequency_in_sec, self.flush)
self._timer.setDaemon(True)
self._timer.start()

def __get_es_client(self):
if self.auth_type == CMRESHandler.AuthType.NO_AUTH:
if self._client is None:
self._client = Elasticsearch(hosts=self.hosts,
use_ssl=self.use_ssl,
verify_certs=self.verify_certs,
connection_class=RequestsHttpConnection,
serializer=self.serializer)
return self._client

if self.auth_type == CMRESHandler.AuthType.BASIC_AUTH:
if self._client is None:
return Elasticsearch(hosts=self.hosts,
http_auth=self.auth_details,
use_ssl=self.use_ssl,
verify_certs=self.verify_certs,
connection_class=RequestsHttpConnection,
serializer=self.serializer)
return self._client

raise ValueError("Authentication method not supported")

def test_es_source(self):
""" Returns True if the handler can ping the Elasticsearch servers

:return: A boolean, True if the connection against elasticserach host was successful
"""
return self.__get_es_client().ping()

@staticmethod
def __get_es_datetime_str(timestamp):
""" Returns elasticsearch utc formatted time for an epoch timestamp

:param timestamp: epoch, including milliseconds
:return: A string valid for elasticsearch time record
"""

current_date = datetime.datetime.utcfromtimestamp(timestamp)
return "{0!s}.{1}".format(
datetime.datetime.strftime(current_date + datetime.timedelta(hours=8), '%Y-%m-%dT%H:%M:%S'),
int(current_date.microsecond))

def flush(self):
""" Flushes the buffer into ES
:return: None
"""
if self._timer is not None and self._timer.is_alive():
self._timer.cancel()
self._timer = None

if self._buffer:
try:
with self._buffer_lock:
logs_buffer = self._buffer
self._buffer = []
actions = (
{
'_index': self._index_name_func.__func__(self.es_index_name),
'_type': self.es_doc_type,
'_source': log_record
}
for log_record in logs_buffer
)
eshelpers.bulk(
client=self.__get_es_client(),
actions=actions,
stats_only=True
)
except Exception as exception:
if self.raise_on_indexing_exceptions:
raise exception

def close(self):
""" Flushes the buffer and release any outstanding resource

:return: None
"""
if self._timer is not None:
self.flush()
self._timer = None

def emit(self, record):
""" Emit overrides the abstract logging.Handler logRecord emit method

Format and records the log

:param record: A class of type ```logging.LogRecord```
:return: None
"""
self.format(record)

rec = self.es_additional_fields.copy()


for key, value in record.__dict__.items():
if key not in CMRESHandler.__LOGGING_FILTER_FIELDS:
rec[key] = "" if value is None else value
rec[self.default_timestamp_field_name] = self.__get_es_datetime_str(record.created)


with self._buffer_lock:
self._buffer.append(rec)

if len(self._buffer) >= self.buffer_size:
self.flush()
else:
self.__schedule_flush()

2 调用

建议使用调用方法3(loguru),不仅可以修改写入到es中的message的格式,还有非常漂亮的终端信息显示,使用方法1、2(logging)的话,message的格式暂时未找到有用的可以修改方法,终端上的显示那不是那么的好看。

2.1 调用方法1(使用logging)

import logging
from handlers import CMRESHandler

LOG_LEVEL = 'DEBUG' # 日志级别
LOG_FORMAT = '%(levelname)s - %(asctime)s - process: %(process)d - %(filename)s - %(name)s - %(lineno)d - %(module)s - %(message)s' # 每条日志输出格式
ELASTIC_SEARCH_HOST = 'localhost' # Elasticsearch Host
ELASTIC_SEARCH_PORT = 9200 # Elasticsearch Port
ELASTIC_SEARCH_INDEX = 'test_log3' # Elasticsearch Index Name
APP_ENVIRONMENT = 'dev' # 运行环境,如测试环境还是生产环境

ELASTICSEARCH_USER = 'admin'
ELASTICSEARCH_PASSWORD = 'admin'

es_handler = CMRESHandler(hosts=[{'host': ELASTIC_SEARCH_HOST, 'port': ELASTIC_SEARCH_PORT}],
# 用户名和密码
auth_details=(ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD),
# 可以配置对应的认证权限
auth_type=CMRESHandler.AuthType.BASIC_AUTH,
# 索引值
es_index_name=ELASTIC_SEARCH_INDEX,
# 额外增加环境标识
es_additional_fields={'environment': APP_ENVIRONMENT}
)

# 被注释的格式并未起任何作用
# es_handler.setLevel(level=LOG_LEVEL)
# formatter = logging.Formatter(LOG_FORMAT)
# es_handler.setFormatter(formatter)
logger = logging.getLogger('test')
logger.setLevel(LOG_LEVEL)
logger.addHandler(es_handler)
logger.debug('test write es2')

if __name__ == '__main__':
# logger = get_logger()
# logger.debug('this is a message 12')
pass

2.2 调用方法2(使用logging,并使用配置)

# es环境参数
import datetime
import logging.config
import handlers

ELASTICSEARCH_HOST = 'localhost'
ELASTICSEARCH_PORT = '9200'
LOG_INDEX = 'test_log6'
ELASTICSEARCH_USER = 'admin'
ELASTICSEARCH_PASSWORD = 'admin'

if __name__ == '__main__':
config = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'simple': {
'format': '%(levelname)s - %(asctime)s - process: %(process)d - %(filename)s - %(name)s - %(lineno)d - %(module)s - %(message)s' # 格式字段并未起作用
}
},
'handlers': {
'console': {
'level': 'ERROR',
'class': 'logging.StreamHandler',
'formatter': 'simple'
},
'elasticsearch': {
'level': 'ERROR',
'class': 'handlers.CMRESHandler',
'hosts': [{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT}],
'es_index_name': LOG_INDEX,
'es_additional_fields': {'logTime': datetime.datetime.now()},
'auth_type': handlers.CMRESHandler.AuthType.BASIC_AUTH,
'auth_details': (ELASTICSEARCH_USER, ELASTICSEARCH_PASSWORD),
# 'flush_frequency_in_sec': 10,
'use_ssl': False,
'formatter': 'simple'
}
},
'loggers': {
'log': {
'handlers': ['console', 'elasticsearch'],
'level': 'ERROR',
'propagate': True,
'disable_existing_loggers': False,
'formatter': 'simple'
}
},
}
logging.config.dictConfig(config)
logger = logging.getLogger('log') # 使用log模块
logger.error('test9')

终端输出效果如下:

Python写入日志到Elasticsearch(logging/loguru,可个性化定制写入信息)————附带详细代码和示例_es_03

2.3 调用方法3(使用loguru)

from loguru import logger
from handlers import CMRESHandler
from typing import AnyStr # 控制类型

def writeLog2Elasticsearch(message:AnyStr, log_index:AnyStr, log_level='info', host_name='localhost', port=9200, user_name='admin', password='admin', environment_level='test', log_format='{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}'):

handler = CMRESHandler(hosts=[{'host': host_name, 'port': port}],
auth_details=(user_name, password),
# 可以配置对应的认证权限
auth_type=CMRESHandler.AuthType.BASIC_AUTH,
# es中的索引值
es_index_name=log_index,
# 一个月分一个 Index
# index_name_frequency=CMRESHandler.IndexNameFrequency.MONTHLY,
# 添加额外的字段
es_additional_fields={'environment': environment_level}
)

# 添加message的格式
logger.add(handler, format=log_format)

if log_level == 'info':
logger.info(message)
elif log_level == 'warning':
logger.warning(message)
elif log_level == 'error':
logger.error(message)
elif log_level == 'debug':
logger.debug(message)
else:
pass


if __name__ == '__main__':
writeLog2Elasticsearch(log_index='test_es3', message='test213', log_level='error')

终端输出效果如下:
Python写入日志到Elasticsearch(logging/loguru,可个性化定制写入信息)————附带详细代码和示例_日志_04

es中的detail中的信息(按照格式输出):
Python写入日志到Elasticsearch(logging/loguru,可个性化定制写入信息)————附带详细代码和示例_日志_05

3 添加或删除写入的es字段信息

3.1 添加字段

在​​CMRESHandler​​​中使用​​es_additional_fields​​字段即可。

handler = CMRESHandler(hosts=[{'host': host_name, 'port': port}],
auth_details=(user_name, password),
# 可以配置对应的认证权限
auth_type=CMRESHandler.AuthType.BASIC_AUTH,
# es中的索引值
es_index_name=log_index,
# 一个月分一个 Index
# index_name_frequency=CMRESHandler.IndexNameFrequency.MONTHLY,
# 添加额外的字段
es_additional_fields={'environment': environment_level}
)

3.2 删除字段

如果使用​​from cmreslogging.handlers import CMRESHandler​​​中引入​​CMRESHandle​​​,那么只能做到增加字段,没有找到删除字段的接口(另外还可能会遇到​​socket.gaierror: [Errno 8] nodename nor servname provided, or not known​​的错误)。因此需要自定义发射的字段,需要把库文件提取出来,自己改写其中的内容。

主要修改的内容为​​emit​​​函数和​​__init__​​函数:

​emit​​​函数(发射的主要内容为​​rec​​字典变量中的内容):

def emit(self, record):
""" Emit overrides the abstract logging.Handler logRecord emit method

Format and records the log

:param record: A class of type ```logging.LogRecord```
:return: None
"""
self.format(record)
# 添加自己需要的字段
self.es_additional_fields.update({
'host': get_local_ip(),
# 'service': 'spark-server', # settings.service_name
'logLevel': record.levelname,
'details': record.msg,
'stackTrace': record.exc_info,
'thread': record.__dict__['threadName'],
'logger': record.pathname,
'logData': record.exc_text
})
rec = self.es_additional_fields.copy()

# 删除不需要的字段
# for key, value in record.__dict__.items():
# if key not in CMRESHandler.__LOGGING_FILTER_FIELDS:
# rec[key] = "" if value is None else value

rec[self.default_timestamp_field_name] = self.__get_es_datetime_str(record.created)

with self._buffer_lock:
self._buffer.append(rec)

if len(self._buffer) >= self.buffer_size:
self.flush()
else:
self.__schedule_flush()

​__init__​​函数:

def __init__(self,
hosts=__DEFAULT_ELASTICSEARCH_HOST,
auth_details=(__DEFAULT_AUTH_USER, __DEFAULT_AUTH_PASSWD),
auth_type=__DEFAULT_AUTH_TYPE,
use_ssl=__DEFAULT_USE_SSL,
verify_ssl=__DEFAULT_VERIFY_SSL,
buffer_size=__DEFAULT_BUFFER_SIZE,
flush_frequency_in_sec=__DEFAULT_FLUSH_FREQ_INSEC,
es_index_name=__DEFAULT_ES_INDEX_NAME,
index_name_frequency=__DEFAULT_INDEX_FREQUENCY,
es_doc_type=__DEFAULT_ES_DOC_TYPE,
es_additional_fields=__DEFAULT_ADDITIONAL_FIELDS,
raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_EXCEPTION,
default_iso_timestamp_field_name=__DEFAULT_ISO_TIMESTAMP_FIELD_NAME,
default_timestamp_field_name=__DEFAULT_TIMESTAMP_FIELD_NAME):

# ...
# 注释掉下面原库添加的字段信息
# self.es_additional_fields.update({'host': socket.gethostname(),
# 'host_ip': socket.gethostbyname(socket.gethostname())})
# ...

完整的​​handles.py​​代码内容如下:

#!/usr/bin/env python3

# 链接和初始化elasticsearch

import logging
import datetime
import socket
from threading import Timer, Lock
from enum import Enum
from elasticsearch import helpers as eshelpers
from elasticsearch import Elasticsearch, RequestsHttpConnection


# from CMRESSerializer import CMRESSerializer
# from getLocal_ip import get_local_ip
# import settings

from elasticsearch.serializer import JSONSerializer


class CMRESSerializer(JSONSerializer):
def default(self, data):
try:
return super(CMRESSerializer, self).default(data)
except TypeError:
return str(data)


def get_local_ip():
"""
获取本地IP

:return:
"""

try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
except Exception as e:
print(e)
return ''
else:
return ip



class CMRESHandler(logging.Handler):
""" Elasticsearch log handler
"""

class AuthType(Enum):
""" Authentication types supported

The handler supports
- No authentication
- Basic authentication
"""
NO_AUTH = 0
BASIC_AUTH = 1
DEVOPS_AUTH = 2

class IndexNameFrequency(Enum):
""" Index type supported
the handler supports
- Daily indices
- Weekly indices
- Monthly indices
- Year indices
"""
DAILY = 0
WEEKLY = 1
MONTHLY = 2
YEARLY = 3

# Defaults for the class
__DEFAULT_ELASTICSEARCH_HOST = [{'host': '10.97.138.194', 'port': 9200}]
__DEFAULT_AUTH_USER = 'elastic'
__DEFAULT_AUTH_PASSWD = 'ES@ynzy2020'

__DEFAULT_USE_SSL = False
__DEFAULT_VERIFY_SSL = True
__DEFAULT_AUTH_TYPE = AuthType.NO_AUTH
__DEFAULT_INDEX_FREQUENCY = IndexNameFrequency.DAILY
__DEFAULT_BUFFER_SIZE = 1000
__DEFAULT_FLUSH_FREQ_INSEC = 1
__DEFAULT_ADDITIONAL_FIELDS = {}
__DEFAULT_ES_INDEX_NAME = 'python_logger'
__DEFAULT_ES_DOC_TYPE = '_doc'
__DEFAULT_RAISE_ON_EXCEPTION = False
__DEFAULT_TIMESTAMP_FIELD_NAME = "timestamp"
__DEFAULT_ISO_TIMESTAMP_FIELD_NAME = "iso_timestamp"

__LOGGING_FILTER_FIELDS = ['msecs',
'relativeCreated',
'levelno',
'created']

@staticmethod
def _get_daily_index_name(es_index_name):
""" Returns elasticearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date.
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m.%d'))
return es_index_name

@staticmethod
def _get_weekly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific week
"""
# current_date = datetime.datetime.now()
# start_of_the_week = current_date - datetime.timedelta(days=current_date.weekday())
# return "{0!s}-{1!s}".format(es_index_name, start_of_the_week.strftime('%Y.%m.%d'))
return es_index_name

@staticmethod
def _get_monthly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific moth
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y.%m'))
return es_index_name

@staticmethod
def _get_yearly_index_name(es_index_name):
""" Return elasticsearch index name
:param: index_name the prefix to be used in the index
:return: A srting containing the elasticsearch indexname used which should include the date and specific year
"""
# return "{0!s}-{1!s}".format(es_index_name, datetime.datetime.now().strftime('%Y'))
return es_index_name

_INDEX_FREQUENCY_FUNCION_DICT = {
IndexNameFrequency.DAILY: _get_daily_index_name,
IndexNameFrequency.WEEKLY: _get_weekly_index_name,
IndexNameFrequency.MONTHLY: _get_monthly_index_name,
IndexNameFrequency.YEARLY: _get_yearly_index_name
}

def __init__(self,
hosts=__DEFAULT_ELASTICSEARCH_HOST,
auth_details=(__DEFAULT_AUTH_USER, __DEFAULT_AUTH_PASSWD),
auth_type=__DEFAULT_AUTH_TYPE,
use_ssl=__DEFAULT_USE_SSL,
verify_ssl=__DEFAULT_VERIFY_SSL,
buffer_size=__DEFAULT_BUFFER_SIZE,
flush_frequency_in_sec=__DEFAULT_FLUSH_FREQ_INSEC,
es_index_name=__DEFAULT_ES_INDEX_NAME,
index_name_frequency=__DEFAULT_INDEX_FREQUENCY,
es_doc_type=__DEFAULT_ES_DOC_TYPE,
es_additional_fields=__DEFAULT_ADDITIONAL_FIELDS,
raise_on_indexing_exceptions=__DEFAULT_RAISE_ON_EXCEPTION,
default_iso_timestamp_field_name=__DEFAULT_ISO_TIMESTAMP_FIELD_NAME,
default_timestamp_field_name=__DEFAULT_TIMESTAMP_FIELD_NAME):
""" Handler constructor

:param hosts: The list of hosts that elasticsearch clients will connect. The list can be provided
in the format ```[{'host':'host1','port':9200}, {'host':'host2','port':9200}]```to
make sure the client supports failover of one of the instertion nodes
:param auth_details: When ```CMRESHandler.AuthType.BASIC_AUTH```is used this argument must contain
a tuple of string with the user and password that will be used to authenticate against
the Elasticsearch servers, for example```('User','Password')
:param auth_type: The authentication type to be used in the connection ```CMRESHandler.AuthType```
Currently, NO_AUTH, BASIC_AUTH, DEVOPS_AUTH are supported
:param use_ssl: A boolean that defines if the communications should use SSL encrypted communication
:param verify_ssl: A boolean that defines if the SSL certificates are validated or not
:param buffer_size: An int, Once this size is reached on the internal buffer results are flushed into ES
:param flush_frequency_in_sec: A float representing how often and when the buffer will be flushed, even
if the buffer_size has not been reached yet
:param es_index_name: A string with the prefix of the elasticsearch index that will be created. Note a
date with YYYY.MM.dd, ```python_logger```used by default
:param index_name_frequency: Defines what the date used in the postfix of the name would be. available values
are selected from the IndexNameFrequency class (IndexNameFrequency.DAILY,
IndexNameFrequency.WEEKLY, IndexNameFrequency.MONTHLY, IndexNameFrequency.YEARLY). By default
it uses daily indices.
:param es_doc_type: A string with the name of the document type that will be used ```python_log```used
by default
:param es_additional_fields: A dictionary with all the additional fields that you would like to add
to the logs, such the application, environment, etc.
:param raise_on_indexing_exceptions: A boolean, True only for debugging purposes to raise exceptions
caused when
:return: A ready to be used CMRESHandler.
"""
logging.Handler.__init__(self)

self.hosts = hosts
self.auth_details = auth_details
self.auth_type = auth_type
self.use_ssl = use_ssl
self.verify_certs = verify_ssl

self.buffer_size = buffer_size
self.flush_frequency_in_sec = flush_frequency_in_sec
self.es_index_name = es_index_name
self.index_name_frequency = index_name_frequency
self.es_doc_type = es_doc_type
self.es_additional_fields = es_additional_fields.copy()
self.es_additional_fields.update({'host': socket.gethostname(),
'host_ip': get_local_ip()})

# 原始的报错:socket.gaierror: [Errno 8] nodename nor servname provided, or not known
# self.es_additional_fields.update({'host': socket.gethostname(),
# 'host_ip': socket.gethostbyname(socket.gethostname())})

self.raise_on_indexing_exceptions = raise_on_indexing_exceptions
self.default_iso_timestamp_field_name = default_iso_timestamp_field_name
self.default_timestamp_field_name = default_timestamp_field_name

self._client = None
self._buffer = []
self._buffer_lock = Lock()
self._timer = None
self._index_name_func = CMRESHandler._INDEX_FREQUENCY_FUNCION_DICT[self.index_name_frequency]
self.serializer = CMRESSerializer()

def __schedule_flush(self):
if self._timer is None:
self._timer = Timer(self.flush_frequency_in_sec, self.flush)
self._timer.setDaemon(True)
self._timer.start()

def __get_es_client(self):
if self.auth_type == CMRESHandler.AuthType.NO_AUTH:
if self._client is None:
self._client = Elasticsearch(hosts=self.hosts,
use_ssl=self.use_ssl,
verify_certs=self.verify_certs,
connection_class=RequestsHttpConnection,
serializer=self.serializer)
return self._client

if self.auth_type == CMRESHandler.AuthType.BASIC_AUTH:
if self._client is None:
return Elasticsearch(hosts=self.hosts,
http_auth=self.auth_details,
use_ssl=self.use_ssl,
verify_certs=self.verify_certs,
connection_class=RequestsHttpConnection,
serializer=self.serializer)
return self._client

raise ValueError("Authentication method not supported")

def test_es_source(self):
""" Returns True if the handler can ping the Elasticsearch servers

:return: A boolean, True if the connection against elasticserach host was successful
"""
return self.__get_es_client().ping()

@staticmethod
def __get_es_datetime_str(timestamp):
""" Returns elasticsearch utc formatted time for an epoch timestamp

:param timestamp: epoch, including milliseconds
:return: A string valid for elasticsearch time record
"""

current_date = datetime.datetime.utcfromtimestamp(timestamp)
return "{0!s}.{1}".format(
datetime.datetime.strftime(current_date + datetime.timedelta(hours=8), '%Y-%m-%dT%H:%M:%S'),
int(current_date.microsecond))

def flush(self):
""" Flushes the buffer into ES
:return: None
"""
if self._timer is not None and self._timer.is_alive():
self._timer.cancel()
self._timer = None

if self._buffer:
try:
with self._buffer_lock:
logs_buffer = self._buffer
self._buffer = []
actions = (
{
'_index': self._index_name_func.__func__(self.es_index_name),
'_type': self.es_doc_type,
'_source': log_record
}
for log_record in logs_buffer
)
eshelpers.bulk(
client=self.__get_es_client(),
actions=actions,
stats_only=True
)
except Exception as exception:
if self.raise_on_indexing_exceptions:
raise exception

def close(self):
""" Flushes the buffer and release any outstanding resource

:return: None
"""
if self._timer is not None:
self.flush()
self._timer = None

def emit(self, record):
""" Emit overrides the abstract logging.Handler logRecord emit method

Format and records the log

:param record: A class of type ```logging.LogRecord```
:return: None
"""
self.format(record)
# 添加自己需要的字段
self.es_additional_fields.update({
'host': get_local_ip(),
# 'service': 'spark-server', # settings.service_name
'logLevel': record.levelname,
'details': record.msg,
'stackTrace': record.exc_info,
'thread': record.__dict__['threadName'],
'logger': record.pathname,
'logData': record.exc_text
})
rec = self.es_additional_fields.copy()

# 删除不需要的字段
# for key, value in record.__dict__.items():
# if key not in CMRESHandler.__LOGGING_FILTER_FIELDS:
# rec[key] = "" if value is None else value

rec[self.default_timestamp_field_name] = self.__get_es_datetime_str(record.created)

with self._buffer_lock:
self._buffer.append(rec)

if len(self._buffer) >= self.buffer_size:
self.flush()
else:
self.__schedule_flush()