这个脚本是运行tasklist,取出进程的pid监控,结果存到python自带的sqlite3数据库中。本来呢,打算service和pid同时监控,但对于我的任务来说,pid不存在了,service肯定stop,所以我只监控了pid。当然,我也给出了如何监控service status的代码,需要安装pywin32库。

1.sCheck.py  监控主程序

代码

#!/usr/bin/env python
#coding=utf-8
#|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#| id  |  service  |  pid  |  time  |  status  |  runtime  |
#|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#                                               Author:dyang
#该脚本实际设计用来监控windows某服务和其进程状态的。
#该脚本设计了两个表,detail_table和result_table,默认5分钟取一次,把结果先写到detail_table,然后把最近两次的结果比较,如果pid发生变化或者服务stop了,
#就会记录到result_table(所以看结果只看result_table就行)。

#import win32serviceutil
#import win32service
import sqlite3
import os
import time

#svcType, svcState, svcControls, err, svcErr, svcCP, svcWH =  win32serviceutil.QueryServiceStatus('Dhcp') 
#if svcState==win32service.SERVICE_STOPPED:
#    print 'stopped'
#elif svcState==win32service.SERVICE_RUNNING:
#    print 'running'
#elif svcState==win32service.SERVICE_START_PENDING:
#    print 'starting'
#elif svcState==win32service.SERVICE_STOP_PENDING:
#    print 'stopping'

def creatDBandTable(db,dtn,rtn):    #创建table
    cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute('create table %s(id integer primary key,service varchar(30),pid integer,time TEXT(30),status integer(10),runtime integer)'%dtn)
    cu.execute('create table %s(id integer primary key,service varchar(30),pid integer,time TEXT(30),status integer(10),runtime integer)'%rtn)
    cx.commit()
    cx.close()
    
def getInfo(processname):             #通过tasklist命令,找到processname的pid
    task = os.popen('tasklist')
    if processname in task.read():
        try:
            num = os.popen('tasklist').read().split().index(processname)
            tasklist = os.popen('tasklist').read().split()
            index_n = num+1
            pid = int(tasklist[index_n])
            time1 = time.ctime()
            status = 1   # status = 1 means running
        except Exception,e:
            print e
            print 'Sorry ,it ecounter a exception...'
            status = 0   # status = 0 means stopped
            pid = 0
            time1 = time.ctime()
    else:
        print 'Sorry,the process is not in process list,may be not running......'
        status = 0
        pid = 0
        time1 = time.ctime()
    return (pid,time1,status)
def insertDetail(db,detail_table,svc,pid,time1,status,rt1):   #每次的结果插入到detail_table,返回表中最后一行的id号
    cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute("insert into %s(id,service,pid,time,status,runtime) values(NULL,'%s',%d,'%s',%d,%d)"%(detail_table,svc,pid,time1,status,rt1))
    cx.commit()
    rowid = cu.lastrowid
    cx.close()
    return rowid

def insertResult(db,result_table,svc,pid,time1,status,rt2):   #当监控服务和进程发生变化,将结果插入到result_table,并返回表中最后一行的id号
    cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute("insert into %s values(NULL,'%s',%d,'%s',%d,%d)"%(result_table,svc,pid,time1,status,rt2))
    cx.commit()
    rowid = cu.lastrowid
    cx.close()
    return rowid

def getpid(db,rowid,table):             #根据id号,得到表中的pid列的数据
    cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute("select pid,runtime from '%s' where id=%d"%(table,rowid))
    value,run_t= cu.fetchone()
    cx.close()
    return (value,run_t)
def getStatus(db,rowid,table):         #获得表中的status列的值,如果是0,表示stop;1表示running
    cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute("select status,runtime from '%s' where id=%d"%(table,rowid))
    value,runtime1 = cu.fetchone()
    cx.close()
    return (value,runtime1)

def updateResultDB(db,table,rowid,rt3):    #更新result_table

。因为有一种情况,当监控的服务down了,其进程也会消失,这是status为0。等到下一次检查时,如果status仍为0,就没有必要再插入一行到表里,我直接更新runtime即可。

cx = sqlite3.connect(db)
    cu = cx.cursor()
    cu.execute("update '%s' set runtime=%d where id=%d"%(table,rt3,rowid))
    cx.commit()
    cx.close()

def main(dbn,svc,pn):
    detail_table_n = svc+'_detail'              #detail_table的真实名字
    result_table_n = svc+'_result'              #result_table的真实名字                                    #初始运行时间指定为0
    if not os.path.exists(dbn):
        creatDBandTable(dbn,detail_table_n,result_table_n)
    pid,time1,status = getInfo(pn) #获取监控进程的pid,当前时间和状态。如果服务stop了,相应进程也会不存在,所以我只监控了进程。
    runtime = 0
    if status:
        rowid_old = insertDetail(dbn,detail_table_n,svc,pid,time1,status,0)
        rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
        print 'Now it will sleep for 5 minutes...'
        time.sleep(5*60)
#        pid_old,a = getpid(dbn,rowid_old,detail_table_n)
        while True:
            pid_old,a = getpid(dbn,rowid_old,detail_table_n)
            pid,time1,status = getInfo(pn)
            rowid_d = insertDetail(dbn,detail_table_n,svc,pid,time1,status,300)  #强制detail_table的rt都为300,不在此记录
            rowid_old = rowid_d
            pid_new,a = getpid(dbn,rowid_d,detail_table_n)
            if status==0: #如果发现进程退出了
                status_1,runtime = getStatus(dbn,rowid_r,result_table_n)
                if status_1:    #检查detail表中上一次记录,是不是状态也为0,如果上一次是1,说明该进程是刚刚stop的,我插入一条记录到result_table;如果上一次是0,我只update result_table的runtime即可。
                    print '---------------insert-----------------'
                    rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
                    time.sleep(5*60)  
#                    rt1 += 5   
#                    rt = 0
                    continue
                else:
                    print 'status_1 is 0,just update DB...'
                    runtime += 5
                    updateResultDB(dbn,result_table_n,rowid_r,runtime)
                    time.sleep(5*60)
                    continue
            else:  #如果进程未退出
                status_2,runtime_r = getStatus(dbn,rowid_r,result_table_n)
                if status_2:  #上次未退出
                    if pid_old != pid_new:     #如果pid发生了变化
                        rowid_r = insertResult(dbn,result_table_n,svc,pid_new,time1,status,0)
                        pid_old = pid_new
                        time.sleep(5*60)
                    else:     #没有变化,则只更新runtime
                        runtime_r += 5
                        updateResultDB(dbn,result_table_n,rowid_r,runtime_r)
                        time.sleep(5*60)
                        continue
                else:   #上次退出了
                    rowid_r = insertResult(dbn,result_table_n,svc,pid,time1,status,0)
                    
    else:
        print 'Pls. check whether the server is runing...'
        print 'Over!'
        
        
        

if __name__ =='__main__':
    db_name = "c:\\EsgLS.db"     #数据库文件保存的位置
    service_name = 'WebsenseEsgLogServer'    #监控的服务名称
    pid_name = 'EsgLogServer.exe'   #相应的进程名字
    main(db_name,service_name,pid_name)

 

 

 2.读结果。默认保存在c:\result.txt

代码

#!/usr/bin/env python
#coding=utf-8
import sqlite3
import os
import time

try:
    os.remove(r'c:\result.txt')
except Exception,e:
    pass
db = "c:\\EsgLS.db"
service_name = 'WebsenseEsgLogServer'

detail_table = service_name+'_detail'
result_table = service_name+'_result'
cx = sqlite3.connect(db)
cu = cx.cursor()
#cu.execute("select * from '%s'"%detail_table)
cu.execute("select * from '%s'"%result_table)
fp = open(r'c:\result.txt','ab')
result = cu.fetchall()
for items in result:
#    print item
    for item in items:
        item1 = str(item)
        fp.write(item1+'  |  ')
    fp.write('%s'%os.linesep)
fp.close()

 

 以上已经修复了时间记录不准确的问题


不足:

1.不支持多进程监控

2.健壮性不够,未捕获exception并处理

3.不适用于同名多进程的监控

这些将来不断完善