roslaunch中有个小功能类似于android init进程中的service重启功能,如果该进程在创建时有respawn属性,则在该进程dead后需要将其重启起来,起到一个进程监控的作用,相关源码位于pmon.py。

下面分析下其主要功能,

#ros_comm\tools\roslaunch\src\roslaunch\pmon.py
class Process(object):
    """
    Basic process representation for L{ProcessMonitor}. Must be subclassed
    to provide actual start()/stop() implementations.

    Constructor *must* be called from the Python Main thread in order
    for signal handlers to register properly.
    """

    def __init__(self, package, name, args, env,
            respawn=False, respawn_delay=0.0, required=False):
 #①进程的属性,respawn为是否需要重启
        self.respawn = respawn
        self.respawn_delay = respawn_delay
        self.required = required

Process()类就是ProcessMonitor()所监控的进程需要去继承的基类,可以设置dead后是否需要重启属性。

通过调用start_process_monitor()函数可以启动一个ProcessMonitor进程(线程),

#① 可以有多个pmon
_pmons = []
_pmon_counter = 0
def start_process_monitor():
    global _pmon_counter
    if _shutting_down:
        #logger.error("start_process_monitor: cannot start new ProcessMonitor (shutdown initiated)")
        return None
    _pmon_counter += 1
    name = "ProcessMonitor-%s"%_pmon_counter
    logger.info("start_process_monitor: creating ProcessMonitor")
    #②创建ProcessMonitor对象
    process_monitor = ProcessMonitor(name)
    try:
        # prevent race condition with pmon_shutdown() being triggered
        # as we are starting a ProcessMonitor (i.e. user hits ctrl-C
        # during startup)
        _shutdown_lock.acquire()
         #③将ProcessMonitor对象添加到_pmons中,并调用其start()函数
        _pmons.append(process_monitor)
        process_monitor.start()
        logger.info("start_process_monitor: ProcessMonitor started")
    finally:
        _shutdown_lock.release()

    return process_monitor
class ProcessMonitor(Thread):
     def __init__(self, name="ProcessMonitor"):
        Thread.__init__(self, name=name)
        # ①所监控的进程
        self.procs = []
        # #885: ensure core procs
        self.core_procs = []

    def register(self, p):
        """
        Register process with L{ProcessMonitor}
        @param p: Process
        @type  p: L{Process}
        @raise RLException: if process with same name is already registered
        """
        logger.info("ProcessMonitor.register[%s]"%p.name)
        e = None
        with self.plock:
            if self.has_process(p.name):
                e = RLException("cannot add process with duplicate name '%s'"%p.name)
            elif self.is_shutdown:
                e = RLException("cannot add process [%s] after process monitor has been shut down"%p.name)
            else: #② 将进程注册到ProcessMonitor,即添加到procs 
                self.procs.append(p)
    #③ProcessMonitor线程的线程函数
    def run(self):
        """
        thread routine of the process monitor. NOTE: you must still
        call mainthread_spin or mainthread_spin_once() from the main
        thread in order to pick up main thread work from the process
        monitor.
        """
        try:
            #don't let exceptions bomb thread, interferes with exit
            try:
                self._run()
        finally:
            self._post_run()
 #④ProcessMonitor线程的线程函数的主体           
  def _run(self):
        plock = self.plock
        dead = []
        respawn = []
        #while循环,pmon关闭开关
        while not self.is_shutdown:
            #监控中的进程
            for p in procs:
                try:
                    if not p.is_alive():
                        logger.debug("Process[%s] has died, respawn=%s, required=%s, exit_code=%s",
                                p.name,
                                "True(%f)" % p.respawn_delay if p.respawn else p.respawn,
                                p.required, p.exit_code)
                        exit_code_str = p.get_exit_description()
                        #⑤ 这个进程是必须的,必须的进程dead掉了,pmon自己也关闭
                        #将self.is_shutdown 设置为 True
                        if p.required:
                            printerrlog('='*80+"REQUIRED process [%s] has died!\n%s\nInitiating shutdown!\n"%(p.name, exit_code_str)+'='*80)
                            self.is_shutdown = True



            _respawn=[]
            for r in respawn: 
                try:
                    if self.is_shutdown:
                        break
                    if r.should_respawn() <= 0.0:
                        printlog("[%s] restarting process" % r.name)
                        # stop process, don't accumulate errors
                        #⑥ 重启需要重启的进程,起到进程监控的作用。
                        r.stop([])
                        r.start()
                    else:
                        # not ready yet, keep it around
                        _respawn.append(r)
                except:
                    traceback.print_exc()
                    logger.error("Restart failed %s",traceback.format_exc())
            respawn = _respawn
            time.sleep(0.1) #yield thread
        #moved this to finally block of _post_run
        #self._post_run() #kill all processes

通过上面代码发现,self.is_shutdown是pmon的关闭开关,当is_shutdown为True,则while循环退出,将会继续执行_post_run(),会杀掉所有的监控进程,不过有顺序,最后杀掉核心进程(core_procs)。

   def _post_run(self):
        logger.info("ProcessMonitor._post_run %s"%self)
        # this is already true entering, but go ahead and make sure
        self.is_shutdown = True
        # killall processes on run exit

        q = Queue()
        q.join()

        with self.plock:
            # make copy of core_procs for threadsafe usage
            core_procs = self.core_procs[:]
            logger.info("ProcessMonitor._post_run %s: remaining procs are %s"%(self, self.procs))

            # enqueue all non-core procs in reverse order for parallel kill
            # #526/885: ignore core procs
            [q.put(p) for p in reversed(self.procs) if not p in core_procs]

        # use 10 workers
        killers = []
        for i in range(10):
            t = _ProcessKiller(q, i)
            killers.append(t)
            t.start()

        # wait for workers to finish
        q.join()
        shutdown_errors = []

        # accumulate all the shutdown errors
        for t in killers:
            shutdown_errors.extend(t.errors)
        del killers[:]

        # #526/885: kill core procs last
        # we don't want to parallelize this as the master has to be last
        for p in reversed(core_procs):
            _kill_process(p, shutdown_errors)

        # delete everything except dead_list
        logger.info("ProcessMonitor exit: cleaning up data structures and signals")
        with self.plock:
            del core_procs[:]
            del self.procs[:]
            del self.core_procs[:]

        reacquire_signals = self.reacquire_signals
        if reacquire_signals:
            reacquire_signals.clear() 
        logger.info("ProcessMonitor exit: pmon has shutdown")
        self.done = True

        if shutdown_errors:
            printerrlog("Shutdown errors:\n"+'\n'.join([" * %s"%e for e in shutdown_errors]))

通过pmon.py的代码分析,pmon.py肯定是在一个进程的主线程中去import,调用start_process_monitor()函数就会产生一个pmon,然后把需要监控的进程(线程)注册到pmon中,主线程会有多个pmon保存在全局_pmons = []中。