scorll分页
scroll分页可以实现es中的大量数据分页 但是不能实现大跨度跳转分页
从第1页直接跳转到第100页这样就会导致超时 最好的是一页一页的向下翻页 这样就可以无限往下分页
查询es的时候最好指定要查找的索引名称或者索引名称正则表达式 这样可以大大提高es的查询效率
def hostLogsdata(self,appname,startTime,endTime,hostname,pageIndex,pageSize,sortColumn,sortType):
pageIndex = int(pageIndex)
pageSize = int(pageSize)
userChoiceTime_start = self.formartStartTime(startTime)
userChoiceTime_end = self.formartEndTime(endTime)
if sortColumn=="":
sort_dict={"@timestamp": {"order": "desc" }}
else:
if sortColumn == "logtime":
sortColumn="@timestamp"
sort_dict={sortColumn:{"order":sortType}}
body = {
"size": 0,
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": True,
"query": "appname:" + appname
}
},
"filter": {
"bool": {
"must": [
{"term": {"hostname": hostname}},
{"match": {"success": "false"}},
{
"range": {
"@timestamp": {
"gte": tools.strtime_to_timestamp(userChoiceTime_start),
"lte": tools.strtime_to_timestamp(userChoiceTime_end)
}
}
}
]
}
}
}
},
"sort": sort_dict
}
# 第一(次)页查询没有scroll_id
start = datetime.datetime.now()
if pageIndex == 1:
res = self.es.search(body=body, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
else:
res = self.es.search(body=body, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
while pageIndex - 1 > 0:
res = self.es.scroll(scroll_id=scroll_id, scroll='1m')
pageIndex -= 1
reslist = []
for re in res["hits"]["hits"]:
re = re["_source"]
reslist.append(
{"logtime": self.formatDate(re["logtime"]), "interface":re.get("interface","无数据"),"appname": re["appname"], "waste_time": str(re.get("waste_time","无数据"))+"ms",
"jylsh": re.get("jylsh","无数据")})
data = {'code': 'SUCCESS', 'message': '', 'data': {"total": scroll_size, "list": reslist}}
end = datetime.datetime.now()
print('Running time: %s Seconds' % (end - start))
return data
分页函数
打印函数执行的耗时时间
start = datetime.datetime.now()
if pageIndex == 1:
res = self.es.search(body=body, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
else:
res = self.es.search(body=body, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
while pageIndex - 1 > 0:
res = self.es.scroll(scroll_id=scroll_id, scroll='1m')
pageIndex -= 1
reslist = []
for re in res["hits"]["hits"]:
re = re["_source"]
reslist.append(
{"logtime": self.formatDate(re["logtime"]), "interface":re.get("interface","无数据"),"appname": re["appname"], "waste_time": str(re.get("waste_time","无数据"))+"ms",
"jylsh": re.get("jylsh","无数据")})
data = {'code': 'SUCCESS', 'message': '', 'data': {"total": scroll_size, "list": reslist}}
end = datetime.datetime.now()
print('Running time: %s Seconds' % (end - start))
View Code
python正则匹配字符串替换
def geteslogs(self,startTime,endTime,querycondition,pageIndex,pageSize):
userChoiceTime_start = self.formartStartTime(startTime)
userChoiceTime_end = self.formartEndTime(endTime)
res_time = self.calc_time(startTime, endTime)
interval = tools.set_interval(res_time[0], res_time[1])
pageIndex = int(pageIndex)
pageSize = int(pageSize)
if querycondition=="":
querycondition="*"
tbody ={
"sort":[{"@timestamp":{"order":"desc","unmapped_type":"boolean"}}],
"query":{"filtered":
{"query":
{"query_string":
{"analyze_wildcard":True,"query":""+querycondition+""}
},
"filter":
{"bool":
{"must":
[{"range":
{"@timestamp":{"gte":tools.strtime_to_timestamp(userChoiceTime_start),"lte":tools.strtime_to_timestamp(userChoiceTime_end),"format":"epoch_millis"}}
}],
"must_not":[]
}
}
}
},
"fields":["*","_source"]
}
cbody ={
"sort": [{"@timestamp": {"order": "desc", "unmapped_type": "boolean"}}],
"query": {"filtered":
{"query":
{"query_string":
{"analyze_wildcard": True, "query": ""+querycondition+""}
},
"filter":
{"bool":
{"must":
[{"range":
{"@timestamp": {"gte": tools.strtime_to_timestamp(userChoiceTime_start),
"lte": tools.strtime_to_timestamp(userChoiceTime_end),
"format": "epoch_millis"}}
}],
"must_not": []
}
}
}
},
"aggs": {
"2": {"date_histogram":
{
"field": "@timestamp",
"interval": interval,
"time_zone": "Asia/Shanghai",
"min_doc_count": 0,
"extended_bounds": {"min": tools.strtime_to_timestamp(userChoiceTime_start),
"max": tools.strtime_to_timestamp(userChoiceTime_end)}
}
}
},
"fields": ["*", "_source"]
}
# 第一(次)页查询没有scroll_id
if pageIndex == 1:
res = self.es.search(index="app-dzswj-business-*",body=tbody, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
else:
res = self.es.search(index="app-dzswj-business-*",body=tbody, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
while pageIndex - 1 > 0:
res = self.es.scroll(scroll_id=scroll_id, scroll='1m')
pageIndex -= 1
reslist=res["hits"]["hits"]
total=res["hits"]["total"]
for res in reslist:
res["logtime"] = res["_source"]["logtime"]
res["_source"]=str(res["_source"])
if ":" in querycondition:
highworld=querycondition.split(":")[1]
res["_source"]=res["_source"].replace(highworld,"<span style='background-color:yellow'>"+highworld+"</span>")
else:
hre = re.compile(re.escape(querycondition), re.IGNORECASE)
words = hre.findall(res["_source"])
if words:
res["_source"]=hre.sub("<span style='background-color:yellow'>"+words[0]+"</span>",res["_source"])
#查询柱状图
res2= self.es.search(index="app-dzswj-business-*",body=cbody)
bars=res2["aggregations"]["2"]["buckets"]
for bar in bars:
bar["localtime"]=self.timestamp13_to_date(bar["key"])
data = {'code': 'SUCCESS', 'message': '', 'data': {"total": total, "list": reslist,"bars":bars}}
return data
View Code
python一次性查询多个索引
class esLogAPI(object):
def __init__(self,url):
self.es = Elasticsearch(url,timeout=120)
res = self.es.search(index=["app-dzswj-business-2019.09.20","app-dzswj-business-2019.09.12"],body=tbody, scroll='1m', size=pageSize)
查看官方api
def search(self, index=None, doc_type=None, body=None, params=None):
"""
Execute a search query and get back search hits that match the query.
`<http://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html>`_
:arg index: A comma-separated list of index names to search; use `_all`
or empty string to perform the operation on all indices
View Code
python查询es的时候动态匹配索引名称
def getindexes(self,startTime,endTime):
# app-dzswj-business-2019.09.05
date_list = []
index_list=[]
begin_date = datetime.datetime.strptime(startTime,"%Y-%m-%dT%H:%M:%S.%fZ")+datetime.timedelta(hours=8)
end_date = datetime.datetime.strptime(endTime,"%Y-%m-%dT%H:%M:%S.%fZ")+datetime.timedelta(hours=8)
while begin_date <= end_date:
date_str = begin_date.strftime("%Y.%m.%d")
date_list.append(date_str)
begin_date += datetime.timedelta(days=1)
for date in date_list:
index_list.append("*-%s"%(date))
return index_list
def geteslogs(self,startTime,endTime,querycondition,pageIndex,pageSize):
userChoiceTime_start = self.formartStartTime(startTime)
userChoiceTime_end = self.formartEndTime(endTime)
res_time = self.calc_time(startTime, endTime)
interval = tools.set_interval(res_time[0], res_time[1])
pageIndex = int(pageIndex)
pageSize = int(pageSize)
if querycondition=="":
querycondition="*"
tbody ={
"sort":[{"@timestamp":{"order":"desc","unmapped_type":"boolean"}}],
"query":{"filtered":
{"query":
{"query_string":
{"analyze_wildcard":True,"query":""+querycondition+""}
},
"filter":
{"bool":
{"must":
[{"range":
{"@timestamp":{"gte":tools.strtime_to_timestamp(userChoiceTime_start),"lte":tools.strtime_to_timestamp(userChoiceTime_end),"format":"epoch_millis"}}
}],
"must_not":[]
}
}
}
},
"fields":["*","_source"]
}
cbody ={
"sort": [{"@timestamp": {"order": "desc", "unmapped_type": "boolean"}}],
"query": {"filtered":
{"query":
{"query_string":
{"analyze_wildcard": True, "query": ""+querycondition+""}
},
"filter":
{"bool":
{"must":
[{"range":
{"@timestamp": {"gte": tools.strtime_to_timestamp(userChoiceTime_start),
"lte": tools.strtime_to_timestamp(userChoiceTime_end),
"format": "epoch_millis"}}
}],
"must_not": []
}
}
}
},
"aggs": {
"2": {"date_histogram":
{
"field": "@timestamp",
"interval": interval,
"time_zone": "Asia/Shanghai",
"min_doc_count": 0,
"extended_bounds": {"min": tools.strtime_to_timestamp(userChoiceTime_start),
"max": tools.strtime_to_timestamp(userChoiceTime_end)}
}
}
},
"fields": ["*", "_source"]
}
index_list_pattern=self.getindexes(startTime,endTime)
# 第一(次)页查询没有scroll_id
if pageIndex == 1:
res = self.es.search(index=index_list_pattern,body=tbody, scroll='1m', size=pageSize)
# res = self.es.search(index=["app-dzswj-business-2019.09.22","app-dzswj-business-2019.09.23"], body=tbody, scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
else:
res = self.es.search(index=index_list_pattern,body=tbody, scroll='1m', size=pageSize)
# res = self.es.search(index=["app-dzswj-business-2019.09.22", "app-dzswj-business-2019.09.23"], body=tbody,
# scroll='1m', size=pageSize)
scroll_size = res['hits']['total']
scroll_id = res['_scroll_id']
while pageIndex - 1 > 0:
res = self.es.scroll(scroll_id=scroll_id, scroll='1m')
pageIndex -= 1
reslist=res["hits"]["hits"]
total=res["hits"]["total"]
for res in reslist:
res["logtime"] = res["_source"]["logtime"]
res["_source"]=str(res["_source"])
if ":" in querycondition:
highworld=querycondition.split(":")[1]
res["_source"]=res["_source"].replace(highworld,"<span style='background-color:yellow'>"+highworld+"</span>")
else:
hre = re.compile(re.escape(querycondition), re.IGNORECASE)
words = hre.findall(res["_source"])
if words:
res["_source"]=hre.sub("<span style='background-color:yellow'>"+words[0]+"</span>",res["_source"])
#查询柱状图
res2= self.es.search(index=index_list_pattern,body=cbody)
bars=res2["aggregations"]["2"]["buckets"]
for bar in bars:
bar["localtime"]=self.timestamp13_to_date(bar["key"])
data = {'code': 'SUCCESS', 'message': '', 'data': {"total": total, "list": reslist,"bars":bars}}
return data
匹配索引后缀
实现分组查询匹配
for i2 in res["aggregations"]["2"]["buckets"]:
for i3 in i2["3"]["buckets"]:
for i4 in i3["4"]["buckets"]:
for i5 in i4["5"]["buckets"]:
for i6 in i5["6"]["buckets"]:
for i7 in i6["7"]["buckets"]:
timestr = i7["key_as_string"][:-6]
View Code
es深度分页性能问题
主要是给es集群中的协调节点带来了巨大的性能开销