roslaunch中有个小功能类似于android init进程中的service重启功能,如果该进程在创建时有respawn属性,则在该进程dead后需要将其重启起来,起到一个进程监控的作用,相关源码位于pmon.py。
下面分析下其主要功能,
#ros_comm\tools\roslaunch\src\roslaunch\pmon.py
class Process(object):
"""
Basic process representation for L{ProcessMonitor}. Must be subclassed
to provide actual start()/stop() implementations.
Constructor *must* be called from the Python Main thread in order
for signal handlers to register properly.
"""
def __init__(self, package, name, args, env,
respawn=False, respawn_delay=0.0, required=False):
#①进程的属性,respawn为是否需要重启
self.respawn = respawn
self.respawn_delay = respawn_delay
self.required = required
Process()类就是ProcessMonitor()所监控的进程需要去继承的基类,可以设置dead后是否需要重启属性。
通过调用start_process_monitor()
函数可以启动一个ProcessMonitor进程(线程),
#① 可以有多个pmon
_pmons = []
_pmon_counter = 0
def start_process_monitor():
global _pmon_counter
if _shutting_down:
#logger.error("start_process_monitor: cannot start new ProcessMonitor (shutdown initiated)")
return None
_pmon_counter += 1
name = "ProcessMonitor-%s"%_pmon_counter
logger.info("start_process_monitor: creating ProcessMonitor")
#②创建ProcessMonitor对象
process_monitor = ProcessMonitor(name)
try:
# prevent race condition with pmon_shutdown() being triggered
# as we are starting a ProcessMonitor (i.e. user hits ctrl-C
# during startup)
_shutdown_lock.acquire()
#③将ProcessMonitor对象添加到_pmons中,并调用其start()函数
_pmons.append(process_monitor)
process_monitor.start()
logger.info("start_process_monitor: ProcessMonitor started")
finally:
_shutdown_lock.release()
return process_monitor
class ProcessMonitor(Thread):
def __init__(self, name="ProcessMonitor"):
Thread.__init__(self, name=name)
# ①所监控的进程
self.procs = []
# #885: ensure core procs
self.core_procs = []
def register(self, p):
"""
Register process with L{ProcessMonitor}
@param p: Process
@type p: L{Process}
@raise RLException: if process with same name is already registered
"""
logger.info("ProcessMonitor.register[%s]"%p.name)
e = None
with self.plock:
if self.has_process(p.name):
e = RLException("cannot add process with duplicate name '%s'"%p.name)
elif self.is_shutdown:
e = RLException("cannot add process [%s] after process monitor has been shut down"%p.name)
else: #② 将进程注册到ProcessMonitor,即添加到procs
self.procs.append(p)
#③ProcessMonitor线程的线程函数
def run(self):
"""
thread routine of the process monitor. NOTE: you must still
call mainthread_spin or mainthread_spin_once() from the main
thread in order to pick up main thread work from the process
monitor.
"""
try:
#don't let exceptions bomb thread, interferes with exit
try:
self._run()
finally:
self._post_run()
#④ProcessMonitor线程的线程函数的主体
def _run(self):
plock = self.plock
dead = []
respawn = []
#while循环,pmon关闭开关
while not self.is_shutdown:
#监控中的进程
for p in procs:
try:
if not p.is_alive():
logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s",
p.name,
"True(%f)" % p.respawn_delay if p.respawn else p.respawn,
p.required, p.exit_code)
exit_code_str = p.get_exit_description()
#⑤ 这个进程是必须的,必须的进程dead掉了,pmon自己也关闭
#将self.is_shutdown 设置为 True
if p.required:
printerrlog('='*80+"REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n"%(p.name, exit_code_str)+'='*80)
self.is_shutdown = True
_respawn=[]
for r in respawn:
try:
if self.is_shutdown:
break
if r.should_respawn() <= 0.0:
printlog("[%s] restarting process" % r.name)
# stop process, don't accumulate errors
#⑥ 重启需要重启的进程,起到进程监控的作用。
r.stop([])
r.start()
else:
# not ready yet, keep it around
_respawn.append(r)
except:
traceback.print_exc()
logger.error("Restart failed %s",traceback.format_exc())
respawn = _respawn
time.sleep(0.1) #yield thread
#moved this to finally block of _post_run
#self._post_run() #kill all processes
通过上面代码发现,self.is_shutdown是pmon的关闭开关,当is_shutdown为True,则while循环退出,将会继续执行_post_run(),会杀掉所有的监控进程,不过有顺序,最后杀掉核心进程(core_procs)。
def _post_run(self):
logger.info("ProcessMonitor._post_run %s"%self)
# this is already true entering, but go ahead and make sure
self.is_shutdown = True
# killall processes on run exit
q = Queue()
q.join()
with self.plock:
# make copy of core_procs for threadsafe usage
core_procs = self.core_procs[:]
logger.info("ProcessMonitor._post_run %s: remaining procs are %s"%(self, self.procs))
# enqueue all non-core procs in reverse order for parallel kill
# #526/885: ignore core procs
[q.put(p) for p in reversed(self.procs) if not p in core_procs]
# use 10 workers
killers = []
for i in range(10):
t = _ProcessKiller(q, i)
killers.append(t)
t.start()
# wait for workers to finish
q.join()
shutdown_errors = []
# accumulate all the shutdown errors
for t in killers:
shutdown_errors.extend(t.errors)
del killers[:]
# #526/885: kill core procs last
# we don't want to parallelize this as the master has to be last
for p in reversed(core_procs):
_kill_process(p, shutdown_errors)
# delete everything except dead_list
logger.info("ProcessMonitor exit: cleaning up data structures and signals")
with self.plock:
del core_procs[:]
del self.procs[:]
del self.core_procs[:]
reacquire_signals = self.reacquire_signals
if reacquire_signals:
reacquire_signals.clear()
logger.info("ProcessMonitor exit: pmon has shutdown")
self.done = True
if shutdown_errors:
printerrlog("Shutdown errors:\n"+'\n'.join([" * %s"%e for e in shutdown_errors]))
通过pmon.py的代码分析,pmon.py肯定是在一个进程的主线程中去import,调用start_process_monitor()函数就会产生一个pmon,然后把需要监控的进程(线程)注册到pmon中,主线程会有多个pmon保存在全局_pmons = []
中。