mongodb_server.py
#! /bin/env python #-*- coding:utf8 -*- import sys import os from bson.timestamp import Timestamp import pymongo from pymongo import MongoClient class mongodbMonitor(object): def mongodb_connect(self,host=None, port=None, user=None, password=None): # try: conn = MongoClient(host, port, serverSelectionTimeoutMS=1000) # conntion timeout 1 sec. if user and password: db_admin = conn["admin"] if not db_admin.authenticate(user,password): pass; conn.server_info() print conn # except : # e = sys.exc_info()[0] # return e, None return 0,conn #data node(1): standalone, replset primary, replset secondary. mongos(2), mongoConfigSrv(3) def get_mongo_role(self, conn): mongo_role = 1 conn.server_info() if (conn.is_mongos): mongo_role = 2 elif ("chunks" in conn.get_database("config").collection_names()): # Role is a config servers? not mongos and has config.chunks collections. it's a config server. mongo_role = 3 return mongo_role def get_mongo_monitor_data(self, conn): mongo_monitor_dict ={} mongo_monitor_dict["mongo_local_alive"] = 1 # mongo local alive metric for all nodes. mongo_role = self.get_mongo_role(conn) if(mongo_role == 1): mongodb_role,serverStatus_dict = self.serverStatus(conn) mongo_monitor_dict.update(serverStatus_dict) repl_status_dict = {} if (mongodb_role == "master" or mongodb_role == "secondary"): repl_status_dict = self.repl_status(conn) mongo_monitor_dict.update(repl_status_dict) else: print "this is standalone node" elif(mongo_role == 2): # mongos shards_dict = self.shard_status(conn) mongo_monitor_dict.update(shards_dict) return mongo_monitor_dict def serverStatus(self,connection): serverStatus = connection.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1)])) mongodb_server_dict = {} # mongodb server status metric for upload to falcon mongo_version = serverStatus["version"] #uptime metric mongodb_server_dict["uptime"] = int(serverStatus["uptime"]) #asserts section metrics mongo_asserts = serverStatus["asserts"] for asserts_key in mongo_asserts.keys(): asserts_key_name = "asserts_" + asserts_key mongodb_server_dict[asserts_key_name] = mongo_asserts[asserts_key] ### "extra_info" section metrics: page_faults. falcon counter type. if serverStatus.has_key("extra_info"): mongodb_server_dict["page_faults"] = serverStatus["extra_info"]["page_faults"] ### "connections" section metrics current_conn = serverStatus["connections"]["current"] available_conn = serverStatus["connections"]["available"] mongodb_server_dict["connections_current"] = current_conn mongodb_server_dict["connections_available"] = available_conn # mongodb connection used percent mongodb_server_dict["connections_used_percent"] = int((current_conn/(current_conn + available_conn)*100)) # total created from mongodb started. COUNTER metric mongodb_server_dict["connections_totalCreated"] = serverStatus["connections"]["totalCreated"] # "globalLock" currentQueue mongodb_server_dict["globalLock_currentQueue_total"] = serverStatus["globalLock"]["currentQueue"]["total"] mongodb_server_dict["globalLock_currentQueue_readers"] = serverStatus["globalLock"]["currentQueue"]["readers"] mongodb_server_dict["globalLock_currentQueue_writers"] = serverStatus["globalLock"]["currentQueue"]["writers"] # "locks" section, Changed in version 3.0 if serverStatus.has_key("locks") and mongo_version >"3.0": locks_dict_keys = serverStatus["locks"].keys() for lock_scope in locks_dict_keys: # Global, Database,Collection,Oplog for lock_metric in serverStatus["locks"][lock_scope]: for lock_type in serverStatus["locks"][lock_scope][lock_metric]: if lock_type == "R": lock_name = "Slock" elif lock_type == "W": lock_name = "Xlock" elif lock_type == "r": lock_name = "ISlock" elif lock_type == "w": lock_name = "IXlock" lock_metric_key = "locks_" + lock_scope + "_" + lock_metric + "_" + lock_name mongodb_server_dict[lock_metric_key] = serverStatus["locks"][lock_scope][lock_metric][lock_type] # "network" section metrics: bytesIn, bytesOut, numRequests; counter type if serverStatus.has_key("network"): for network_metric in serverStatus["network"].keys(): network_metric_key = "network_" + network_metric # network metric key for upload mongodb_server_dict[network_metric_key] = serverStatus["network"][network_metric] ### "opcounters" section metrics: insert, query, update, delete, getmore, command. couter type if serverStatus.has_key("opcounters"): for opcounters_metric in serverStatus["opcounters"].keys(): opcounters_metric_key = "opcounters_" + opcounters_metric mongodb_server_dict[opcounters_metric_key] = serverStatus["opcounters"][opcounters_metric] ### "opcountersRepl" section metrics: insert, query, update, delete, getmore, command. couter type if serverStatus.has_key("opcountersRepl"): for opcountersRepl_metric in serverStatus["opcountersRepl"].keys(): opcountersRepl_metric_key = "opcountersRepl_" + opcountersRepl_metric mongodb_server_dict[opcountersRepl_metric_key] = serverStatus["opcounters"][opcountersRepl_metric] ### "mem" section metrics: if serverStatus.has_key("mem"): for mem_metric in serverStatus["mem"].keys(): mem_metric_key = "mem_" + mem_metric if( mem_metric in ["bits","supported"] ): mongodb_server_dict[mem_metric_key] = serverStatus["mem"][mem_metric] else: mongodb_server_dict[mem_metric_key] = serverStatus["mem"][mem_metric]*1024*1024 ### "dur" section metrics: if serverStatus.has_key("dur"): mongodb_server_dict["dur_journaledBytes"] = serverStatus["dur"]["journaledMB"]*1024*1024 mongodb_server_dict["dur_writeToDataFilesBytes"] = serverStatus["dur"]["writeToDataFilesMB"]*1024*1024 mongodb_server_dict["dur_commitsInWriteLock"] = serverStatus["dur"]["commitsInWriteLock"] ### "repl" section mongodb_role = "" if (serverStatus.has_key("repl") and serverStatus["repl"].has_key("secondary")): if serverStatus["repl"]["ismaster"]: mongodb_role = "master" if serverStatus["repl"]["secondary"]: mongodb_role = "secondary" else: # not Replica sets mode mongodb_role = "standalone" ### "backgroundFlushing" section metrics, only for MMAPv1 if serverStatus.has_key("backgroundFlushing"): for bgFlush_metric in serverStatus["backgroundFlushing"].keys(): if bgFlush_metric != "last_finished": # discard last_finished metric bgFlush_metric_key = "backgroundFlushing_" + bgFlush_metric mongodb_server_dict[bgFlush_metric_key] = serverStatus["backgroundFlushing"][bgFlush_metric] ### cursor from "metrics" section if serverStatus.has_key("metrics") and serverStatus["metrics"].has_key("cursor"): cursor_status = serverStatus["metrics"]["cursor"] mongodb_server_dict["cursor_timedOut"] = cursor_status["timedOut"] mongodb_server_dict["cursor_open_noTimeout"] = cursor_status["open"]["noTimeout"] mongodb_server_dict["cursor_open_pinned"] = cursor_status["open"]["pinned"] mongodb_server_dict["cursor_open_total"] = cursor_status["open"]["total"] ### "wiredTiger" section if serverStatus.has_key("wiredTiger"): serverStatus_wt = serverStatus["wiredTiger"] #cache wt_cache = serverStatus_wt["cache"] mongodb_server_dict["wt_cache_used_total_bytes"] = wt_cache["bytes currently in the cache"] mongodb_server_dict["wt_cache_dirty_bytes"] = wt_cache["tracked dirty bytes in the cache"] mongodb_server_dict["wt_cache_readinto_bytes"] = wt_cache["bytes read into cache"] mongodb_server_dict["wt_cache_writtenfrom_bytes"] = wt_cache["bytes written from cache"] #concurrentTransactions wt_concurrentTransactions = serverStatus_wt["concurrentTransactions"] mongodb_server_dict["wt_concurrentTransactions_write"] = wt_concurrentTransactions["write"]["available"] mongodb_server_dict["wt_concurrentTransactions_read"] = wt_concurrentTransactions["read"]["available"] #"block-manager" section wt_block_manager = serverStatus_wt["block-manager"] mongodb_server_dict["wt_bm_bytes_read"] = wt_block_manager["bytes read"] mongodb_server_dict["wt_bm_bytes_written"] = wt_block_manager["bytes written"] mongodb_server_dict["wt_bm_blocks_read"] = wt_block_manager["blocks read" ] mongodb_server_dict["wt_bm_blocks_written"] = wt_block_manager["blocks written"] ### "rocksdb" engine if serverStatus.has_key("rocksdb"): serverStatus_rocksdb = serverStatus["rocksdb"] mongodb_server_dict["rocksdb_num_immutable_mem_table"] = serverStatus_rocksdb["num-immutable-mem-table"] mongodb_server_dict["rocksdb_mem_table_flush_pending"] = serverStatus_rocksdb["mem-table-flush-pending"] mongodb_server_dict["rocksdb_compaction_pending"] = serverStatus_rocksdb["compaction-pending"] mongodb_server_dict["rocksdb_background_errors"] = serverStatus_rocksdb["background-errors"] mongodb_server_dict["rocksdb_num_entries_active_mem_table"] = serverStatus_rocksdb["num-entries-active-mem-table"] mongodb_server_dict["rocksdb_num_entries_imm_mem_tables"] = serverStatus_rocksdb["num-entries-imm-mem-tables"] mongodb_server_dict["rocksdb_num_snapshots"] = serverStatus_rocksdb["num-snapshots"] mongodb_server_dict["rocksdb_oldest_snapshot_time"] = serverStatus_rocksdb["oldest-snapshot-time"] mongodb_server_dict["rocksdb_num_live_versions"] = serverStatus_rocksdb["num-live-versions"] mongodb_server_dict["rocksdb_total_live_recovery_units"] = serverStatus_rocksdb["total-live-recovery-units"] ### "PerconaFT" engine if serverStatus.has_key("PerconaFT"): serverStatus_PerconaFT = serverStatus["PerconaFT"] mongodb_server_dict["PerconaFT_log_count"] = serverStatus_PerconaFT["log"]["count"] mongodb_server_dict["PerconaFT_log_time"] = serverStatus_PerconaFT["log"]["time"] mongodb_server_dict["PerconaFT_log_bytes"] = serverStatus_PerconaFT["log"]["bytes"] mongodb_server_dict["PerconaFT_fsync_count"] = serverStatus_PerconaFT["fsync"]["count"] mongodb_server_dict["PerconaFT_fsync_time"] = serverStatus_PerconaFT["fsync"]["time"] ### cachetable PerconaFT_cachetable = serverStatus_PerconaFT["cachetable"] mongodb_server_dict["PerconaFT_cachetable_size_current"] = PerconaFT_cachetable["size"]["current"] mongodb_server_dict["PerconaFT_cachetable_size_writing"] = PerconaFT_cachetable["size"]["writing"] mongodb_server_dict["PerconaFT_cachetable_size_limit"] = PerconaFT_cachetable["size"]["limit"] ### PerconaFT checkpoint PerconaFT_checkpoint = serverStatus_PerconaFT["checkpoint"] mongodb_server_dict["PerconaFT_checkpoint_count"] = PerconaFT_checkpoint["count"] mongodb_server_dict["PerconaFT_checkpoint_time"] = PerconaFT_checkpoint["time"] mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_count"] = PerconaFT_checkpoint["write"]["nonleaf"]["count"] mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_time"] = PerconaFT_checkpoint["write"]["nonleaf"]["time"] mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_compressed"] = PerconaFT_checkpoint["write"]["nonleaf"]["bytes"]["compressed"] mongodb_server_dict["PerconaFT_checkpoint_write_nonleaf_bytes_uncompressed"] = PerconaFT_checkpoint["write"]["nonleaf"]["bytes"]["uncompressed"] mongodb_server_dict["PerconaFT_checkpoint_write_leaf_count"] = PerconaFT_checkpoint["write"]["leaf"]["count"] mongodb_server_dict["PerconaFT_checkpoint_write_leaf_time"] = PerconaFT_checkpoint["write"]["leaf"]["time"] mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_compressed"] = PerconaFT_checkpoint["write"]["leaf"]["bytes"]["compressed"] mongodb_server_dict["PerconaFT_checkpoint_write_leaf_bytes_uncompressed"] = PerconaFT_checkpoint["write"]["leaf"]["bytes"]["uncompressed"] ### serializeTime for serializeTime_item in serverStatus_PerconaFT["serializeTime"]: prefix = "PerconaFT_serializeTime_" + serializeTime_item for serializeTime_key in serverStatus_PerconaFT["serializeTime"][serializeTime_item]: key_name = prefix + "_" + serializeTime_key mongodb_server_dict[key_name] = serverStatus_PerconaFT["serializeTime"][serializeTime_item][serializeTime_key] ### PerconaFT compressionRatio for compressionRatio_item in serverStatus_PerconaFT["compressionRatio"]: key_name = "PerconaFT_compressionRatio_" + compressionRatio_item mongodb_server_dict[key_name] = serverStatus_PerconaFT["compressionRatio"][compressionRatio_item] return (mongodb_role, mongodb_server_dict) def repl_status(self,connection): replStatus = connection.admin.command("replSetGetStatus") print replStatus repl_status_dict = {} # repl set metric dict # myState "1" for PRIMARY , "2" for SECONDARY, "3": repl_status_dict["repl_myState"] = replStatus["myState"] repl_status_members = replStatus["members"] master_optime = 0 # Master oplog ops time myself_optime = 0 # SECONDARY oplog ops time print "开始打印repl_status_members" print repl_status_members print "结束打印repl_status_members" for repl_member in repl_status_members: if repl_member.has_key("self") and repl_member["self"]: repl_status_dict["repl_health"] = repl_member["health"] #repl_status_dict["repl_optime"] = repl_member["optime"].time #print "value of optime ts is:" #print type(repl_member["optime"]) #print type(repl_member["optime"]["ts"]) print repl_member["optime"]["ts"].time repl_status_dict["repl_optime"] = repl_member["optime"]["ts"].time if repl_member.has_key("repl_electionTime"): repl_status_dict["repl_electionTime"] = repl_member["electionTime"].time if repl_member.has_key("repl_configVersion"): repl_status_dict["repl_configVersion"] = repl_member["configVersion"] #myself_optime = repl_member["optime"].time myself_optime = repl_member["optime"]["ts"].time if (replStatus["myState"] == 2 and repl_member["state"] == 1 ): # CONDARY ,get repl lag master_optime = repl_member["optime"]["ts"].time if replStatus["myState"] == 2 : repl_status_dict["repl_lag"] = master_optime - myself_optime ### oplog window hours oplog_collection = connection["local"]["oplog.rs"] oplog_tFirst = oplog_collection.find({},{"ts":1}).sort('$natural',pymongo.ASCENDING).limit(1).next() oplog_tLast = oplog_collection.find({},{"ts":1}).sort('$natural',pymongo.DESCENDING).limit(1).next() oplogrs_collstats = connection["local"].command("collstats", "oplog.rs") window_multiple = 1 ##oplog.rs collections is not full if oplogrs_collstats.has_key("maxSize"): window_multiple = oplogrs_collstats["maxSize"]/(oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"]) else: window_multiple = oplogrs_collstats["storageSize"]/(oplogrs_collstats["count"] * oplogrs_collstats["avgObjSize"]) #oplog_window .xx hours oplog_window = round((oplog_tLast["ts"].time - oplog_tFirst["ts"].time)/3600.0,2) * window_multiple # full repl_status_dict["repl_oplog_window"] = oplog_window return repl_status_dict # only for mongos node def shard_status(self, conn): config_db = conn["config"] settings_col = config_db["settings"] balancer_doc = settings_col.find_one({'_id':'balancer'}) shards_dict = {} if balancer_doc is None: shards_dict["shards_BalancerState"] = 1 elif balancer_doc["stopped"]: shards_dict["shards_BalancerState"] = 0 else: shards_dict["shards_BalancerState"] = 1 # shards_activeWindow metric,0: without setting, 1:setting # shards_activeWindow_start metric, { "start" : "23:30", "stop" : "6:00" } : 23.30 for 23:30 # shards_activeWindow_stop metric if balancer_doc is None: shards_dict["shards_activeWindow"] = 0 elif balancer_doc.has_key("activeWindow"): shards_dict["shards_activeWindow"] = 1 if balancer_doc["activeWindow"].has_key("start"): window_start = balancer_doc["activeWindow"]["start"] shards_dict["shards_activeWindow_start"] = window_start.replace(":",".") if balancer_doc["activeWindow"].has_key("stop"): window_stop = balancer_doc["activeWindow"]["stop"] shards_dict["shards_activeWindow_stop"] = window_stop.replace(":",".") # shards_chunkSize metric chunksize_doc = settings_col.find_one({"_id" : "chunksize"}) if chunksize_doc is not None: shards_dict["shards_chunkSize"] = chunksize_doc["value"] # shards_isBalancerRunning metric locks_col = config_db["locks"] balancer_lock_doc = locks_col.find_one({'_id':'balancer'}) if balancer_lock_doc is None: print "config.locks collection empty or missing. be sure you are connected to a mongos" shards_dict["shards_isBalancerRunning"] = 0 elif balancer_lock_doc["state"] > 0: shards_dict["shards_isBalancerRunning"] = 1 else: shards_dict["shards_isBalancerRunning"] = 0 # shards_size metric shards_col = config_db["shards"] shards_dict["shards_size"] = shards_col.count() # shards_mongosSize metric mongos_col = config_db["mongos"] shards_dict["shards_mongosSize"] = mongos_col.count() return shards_dict
monodb_monitor.py
#! /bin/env python #-*- coding:utf8 -*- import sys import os import time import datetime import socket import yaml import requests import json from mongodb_server import mongodbMonitor falcon_client = "http://127.0.0.1:1988/v1/push" ts = int(time.time()) # all falcon counter type metrics list mongodb_counter_metric = ["uptime","asserts_msg", "asserts_regular", "asserts_rollovers", "asserts_user", "asserts_warning", "page_faults", "connections_totalCreated", "locks_Global_acquireCount_ISlock", "locks_Global_acquireCount_IXlock", "locks_Global_acquireCount_Slock", "locks_Global_acquireCount_Xlock", "locks_Global_acquireWaitCount_ISlock", "locks_Global_acquireWaitCount_IXlock", "locks_Global_timeAcquiringMicros_ISlock", "locks_Global_timeAcquiringMicros_IXlock", "locks_Database_acquireCount_ISlock", "locks_Database_acquireCount_IXlock", "locks_Database_acquireCount_Slock", "locks_Database_acquireCount_Xlock", "locks_Collection_acquireCount_ISlock", "locks_Collection_acquireCount_IXlock", "locks_Collection_acquireCount_Xlock", "opcounters_command", "opcounters_insert", "opcounters_delete", "opcounters_update", "opcounters_query", "opcounters_getmore", "opcountersRepl_command", "opcountersRepl_insert", "opcountersRepl_delete", "opcountersRepl_update", "opcountersRepl_query", "opcountersRepl_getmore", "network_bytesIn", "network_bytesOut", "network_numRequests", "backgroundFlushing_flushes", "backgroundFlushing_last_ms", "cursor_timedOut", "wt_cache_readinto_bytes", "wt_cache_writtenfrom_bytes", "wt_bm_bytes_read", "wt_bm_bytes_written", "wt_bm_blocks_read", "wt_bm_blocks_written" ] with open('../../cfg.json') as f: data = f.read().replace('\n','') jsonlist = json.loads(data) mongodb_hostname = jsonlist['hostname'] f=open("../conf/mongomon.conf") y = yaml.load(f) f.close() mongodb_items = y["items"] for mongodb_ins in mongodb_items: mongodb_monitor = mongodbMonitor() mongodb_tag = "mongo=" + str(mongodb_ins["port"]) err,conn = mongodb_monitor.mongodb_connect(host="127.0.0.1",port=mongodb_ins["port"], user=mongodb_ins["user"], password=mongodb_ins["password"]) mongodb_upate_list = [] if err != 0: key_item_dict = {"endpoint": mongodb_hostname, "metric": "mongo_local_alive", "tags":mongodb_tag , "timestamp":ts, "value": 0, "step": 60, "counterType": "GAUGE"} mongodb_upate_list.append(key_item_dict) r = requests.post(falcon_client,data=json.dumps(mongodb_upate_list)) continue #The instance is dead. upload the "mongo_alive_local=0" key, then continue. mongodb_dict = mongodb_monitor.get_mongo_monitor_data(conn) mongodb_dict_keys = mongodb_dict.keys() for mongodb_metric in mongodb_dict_keys: if mongodb_metric in mongodb_counter_metric : key_item_dict = {"endpoint": mongodb_hostname, "metric": mongodb_metric, "tags":mongodb_tag , "timestamp":ts, "value": mongodb_dict[mongodb_metric], "step": 60, "counterType": "COUNTER"} else: key_item_dict = {"endpoint": mongodb_hostname, "metric": mongodb_metric, "tags":mongodb_tag , "timestamp":ts, "value": mongodb_dict[mongodb_metric], "step": 60, "counterType": "GAUGE"} mongodb_upate_list.append(key_item_dict) print "开始上报" print json.dumps(mongodb_upate_list) r = requests.post(falcon_client,data=json.dumps(mongodb_upate_list)) print r