这个脚本是运行tasklist,取出进程的pid监控,结果存到python自带的sqlite3数据库中。本来呢,打算service和pid同时监控,但对于我的任务来说,pid不存在了,service肯定stop,所以我只监控了pid。当然,我也给出了如何监控service status的代码,需要安装pywin32库。
1.sCheck.py 监控主程序
代码
#!/usr/bin/env python
#coding=utf-8
#|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#| id | service | pid | time | status | runtime |
#|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
# Author:dyang
#该脚本实际设计用来监控windows某服务和其进程状态的。
#该脚本设计了两个表,detail_table和result_table,默认5分钟取一次,把结果先写到detail_table,然后把最近两次的结果比较,如果pid发生变化或者服务stop了,
#就会记录到result_table(所以看结果只看result_table就行)。
#import win32serviceutil
#import win32service
import sqlite3
import os
import time
#svcType, svcState, svcControls, err, svcErr, svcCP, svcWH = win32serviceutil.QueryServiceStatus('Dhcp')
#if svcState==win32service.SERVICE_STOPPED:
# print 'stopped'
#elif svcState==win32service.SERVICE_RUNNING:
# print 'running'
#elif svcState==win32service.SERVICE_START_PENDING:
# print 'starting'
#elif svcState==win32service.SERVICE_STOP_PENDING:
# print 'stopping'
def creatDBandTable(db,dtn,rtn): #创建table
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute('create table %s(id integer primary key,service varchar(30),pid integer,time TEXT(30),status integer(10),runtime integer)'%dtn)
cu.execute('create table %s(id integer primary key,service varchar(30),pid integer,time TEXT(30),status integer(10),runtime integer)'%rtn)
cx.commit()
cx.close()
def getInfo(processname): #通过tasklist命令,找到processname的pid
task = os.popen('tasklist')
if processname in task.read():
try:
num = os.popen('tasklist').read().split().index(processname)
tasklist = os.popen('tasklist').read().split()
index_n = num+1
pid = int(tasklist[index_n])
time1 = time.ctime()
status = 1 # status = 1 means running
except Exception,e:
print e
print 'Sorry ,it ecounter a exception...'
status = 0 # status = 0 means stopped
pid = 0
time1 = time.ctime()
else:
print 'Sorry,the process is not in process list,may be not running......'
status = 0
pid = 0
time1 = time.ctime()
return (pid,time1,status)
def insertDetail(db,detail_table,svc,pid,time1,status,rt1): #每次的结果插入到detail_table,返回表中最后一行的id号
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute("insert into %s(id,service,pid,time,status,runtime) values(NULL,'%s',%d,'%s',%d,%d)"%(detail_table,svc,pid,time1,status,rt1))
cx.commit()
rowid = cu.lastrowid
cx.close()
return rowid
def insertResult(db,result_table,svc,pid,time1,status,rt2): #当监控服务和进程发生变化,将结果插入到result_table,并返回表中最后一行的id号
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute("insert into %s values(NULL,'%s',%d,'%s',%d,%d)"%(result_table,svc,pid,time1,status,rt2))
cx.commit()
rowid = cu.lastrowid
cx.close()
return rowid
def getpid(db,rowid,table): #根据id号,得到表中的pid列的数据
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute("select pid,runtime from '%s' where id=%d"%(table,rowid))
value,run_t= cu.fetchone()
cx.close()
return (value,run_t)
def getStatus(db,rowid,table): #获得表中的status列的值,如果是0,表示stop;1表示running
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute("select status,runtime from '%s' where id=%d"%(table,rowid))
value,runtime1 = cu.fetchone()
cx.close()
return (value,runtime1)
def updateResultDB(db,table,rowid,rt3): #更新result_table
。因为有一种情况,当监控的服务down了,其进程也会消失,这是status为0。等到下一次检查时,如果status仍为0,就没有必要再插入一行到表里,我直接更新runtime即可。
cx = sqlite3.connect(db)
cu = cx.cursor()
cu.execute("update '%s' set runtime=%d where id=%d"%(table,rt3,rowid))
cx.commit()
cx.close()
def main(dbn,svc,pn):
detail_table_n = svc+'_detail' #detail_table的真实名字
result_table_n = svc+'_result' #result_table的真实名字 #初始运行时间指定为0
if not os.path.exists(dbn):
creatDBandTable(dbn,detail_table_n,result_table_n)
pid,time1,status = getInfo(pn) #获取监控进程的pid,当前时间和状态。如果服务stop了,相应进程也会不存在,所以我只监控了进程。
runtime = 0
if status:
rowid_old = insertDetail(dbn,detail_table_n,svc,pid,time1,status,0)
rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
print 'Now it will sleep for 5 minutes...'
time.sleep(5*60)
# pid_old,a = getpid(dbn,rowid_old,detail_table_n)
while True:
pid_old,a = getpid(dbn,rowid_old,detail_table_n)
pid,time1,status = getInfo(pn)
rowid_d = insertDetail(dbn,detail_table_n,svc,pid,time1,status,300) #强制detail_table的rt都为300,不在此记录
rowid_old = rowid_d
pid_new,a = getpid(dbn,rowid_d,detail_table_n)
if status==0: #如果发现进程退出了
status_1,runtime = getStatus(dbn,rowid_r,result_table_n)
if status_1: #检查detail表中上一次记录,是不是状态也为0,如果上一次是1,说明该进程是刚刚stop的,我插入一条记录到result_table;如果上一次是0,我只update result_table的runtime即可。
print '---------------insert-----------------'
rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
time.sleep(5*60)
# rt1 += 5
# rt = 0
continue
else:
print 'status_1 is 0,just update DB...'
runtime += 5
updateResultDB(dbn,result_table_n,rowid_r,runtime)
time.sleep(5*60)
continue
else: #如果进程未退出
status_2,runtime_r = getStatus(dbn,rowid_r,result_table_n)
if status_2: #上次未退出
if pid_old != pid_new: #如果pid发生了变化
rowid_r = insertResult(dbn,result_table_n,svc,pid_new,time1,status,0)
pid_old = pid_new
time.sleep(5*60)
else: #没有变化,则只更新runtime
runtime_r += 5
updateResultDB(dbn,result_table_n,rowid_r,runtime_r)
time.sleep(5*60)
continue
else: #上次退出了
rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
else:
print 'Pls. check whether the server is runing...'
print 'Over!'
if __name__ =='__main__':
db_name = "c:\\EsgLS.db" #数据库文件保存的位置
service_name = 'WebsenseEsgLogServer' #监控的服务名称
pid_name = 'EsgLogServer.exe' #相应的进程名字
main(db_name,service_name,pid_name)
2.读结果。默认保存在c:\result.txt
代码
#!/usr/bin/env python
#coding=utf-8
import sqlite3
import os
import time
try:
os.remove(r'c:\result.txt')
except Exception,e:
pass
db = "c:\\EsgLS.db"
service_name = 'WebsenseEsgLogServer'
detail_table = service_name+'_detail'
result_table = service_name+'_result'
cx = sqlite3.connect(db)
cu = cx.cursor()
#cu.execute("select * from '%s'"%detail_table)
cu.execute("select * from '%s'"%result_table)
fp = open(r'c:\result.txt','ab')
result = cu.fetchall()
for items in result:
# print item
for item in items:
item1 = str(item)
fp.write(item1+' | ')
fp.write('%s'%os.linesep)
fp.close()
以上已经修复了时间记录不准确的问题
不足:
1.不支持多进程监控
2.健壮性不够,未捕获exception并处理
3.不适用于同名多进程的监控
这些将来不断完善