nginx日志访问量,多维度统计,__fields表示每条日志以空格分隔后索引对应的字段名称。
如0表示第一个字段客户端IP(client_ip),要统计其它字段,只需在这里添加即可。
## 直接上代码
# -*- coding:utf-8 -*- import sys try: import json except ImportError: import simplejson as json class AnalysisNginxLog(object): ''' analysis nginx proxy access log proxy log format: log_format proxy '$remote_addr - $host [$time_local] "$request" $status' ' $body_bytes_sent "$http_referer" ' '"$http_user_agent" "$http_x_forwarded_for/$upstream_addr" "$request_time/$upstream_response_time/$upstream_status/$userId" "$xm_logid"'; ''' __fields = [ (0, 'client_ip'), (2, 'domain'), (3, 'access_time'), # (4, 'time_zone'), (5, 'http_method'), (6, 'uri'), (7, 'http_version'), (8, 'request_status'), (9, 'request_body_size') ] __filter_args = [ 'uri', 'access_time' ] def __init__(self, logfile): self.logfile = logfile self.data = [] self._load_data() def _open(self): with open(self.logfile) as fp: while True: line = fp.readline().strip() if not line: break yield line def get_data(self): self._load_data() return self.data def _filter1(self, data): return data.strip().strip('#').strip('"') def _filter2(self, data): if not isinstance(data, dict): raise Exception('Invalid Parameters, must be type of dict!') for key, value in data.items(): if key not in self.__filter_args: continue if '?' in value: v = value.split('?')[0] data[key] = v elif '[' in value: data[key] = value.strip('[') return data def _load_data(self): for line in self._open(): li = [x.strip() for x in line.split()] dict_tmp = {} for i, field in self.__fields: try: dict_tmp[field] = self._filter1(li[i]) except IndexError as e: print('Error: {}, at {}'.format( e, line )) dict_tmp[field] = None self.data.append(self._filter2(dict_tmp)) def counter_by_field(self, field, top): fields = [x[1] for x in self.__fields] if field not in fields: raise Exception('Invalid Parameters!') result = {} for log in self.data: if log[field] in result: result[log[field]] += 1 else: result[log[field]] = 1 result = sorted(result.items(), key=lambda d: d[1], reverse=True)[:top] print(json.dumps(result, indent=2)) return result def _help(): print('Usage: %s [log_file] [colums] [topN] ' % sys.argv[0]) sys.exit() def counter_log(logfile, colums, top=10): nginx = AnalysisNginxLog(logfile) cols = colums.split(',') for col in cols: print('{c} [{col}] Top {top} {c}'.format(c='-' * 30, col=col, top=top)) nginx.counter_by_field(col, int(top)) if __name__ == '__main__': if len(sys.argv) <= 2: _help() counter_log(*sys.argv[1:])
## 运行结果
# > python analysis_nginx_proxy_log.py Usage: analysis_nginx_proxy_log.py [log_file] [colums] [topN]
> python analysis_nginx_proxy_log.py pss_00-06.log uri,client_ip 3 ------------------------------ [uri] Top 3 ------------------------------ [ [ "/api/pss/GetBatchSnDetails", 2298 ], [ "/api/xmss/CheckSubHome", 1456 ], [ "/api/xmss/GetPhyStock", 1299 ] ] ------------------------------ [client_ip] Top 3 ------------------------------ [ [ "10.108.38.18", 2150 ], [ "10.114.135.253", 1403 ], [ "10.104.3.11", 1253 ] ]