本文为项目开发总结的原创文档。

 

本项目,添加一个watchdog守护进程,用来监控环境的三大进程mozart、bitbox、mplayer,任何一个进程出现故障,整个环境进行重启。

 


首先有通过版级驱动/arch/mips/xburst/soc-x1000/common# vim reset.c 



找到与看门狗有关的code[同事发现,牛!];因此主要是将核心代码从内核空间搬移到用户空间,及如何监控应用层的进程。



整体实现思路:



1.创建一个进程作为守护进程:watchdog

  

进程的添加:



在configs下添加watchdog.mak;


在src下添加watchdog包,用于加入watchdog相关的code;Makefile;


 



进程的启动:在app.c中的startall中调用mozart_system("watchdog -b");


2.调整进程优先级:



1)如何查看进程的优先级



2)如何修改进程的优先级;


3.watchdog守护进程如何监控mozart和bitbox和mplayer

我们知道内核会通过/proc虚拟文件系统导出系统中正在运行的进程信息,每个进程都有一个/proc/<pid>目录。因此我们可以将检测进程是否存在转换为检测/proc/<pid>目录是否存在,这样就简单多了。


如下文详细代码中的processExists;

 

实现难点突破:



用户空间和内核空间操作的都是虚拟地址。



1)如果是拿到的是物理地址,用户空间可以通过mmap的方式将物理地址转成虚拟地址(每一次的地址值都不一样),可以直接对这个虚拟地址赋值。



如下:

static int dev_fd; 
  
 
  

        dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);
if (dev_fd < 0) { 
  
 
  

            printf("open(/dev/mem) failed."); 
  
 
  

            return 0; 
  
 
  

        }
mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd,  
   WDT_IOBASE
 
 

     
 
 
 
*( unsigned long*)(map_base + TCU_TSCR) = (1 << 16);
 
 

     
 
 
 

   close(dev_fd);


 





 


2)而内核空间,从物理地址转成虚拟地址,一般是固定的。0x1000200---0xb000200;



 



3)同一个物理地址转成虚拟地址,用户空间和内核空间是不相同的。



关于用户空间和内核空间:



 



物理地址在内核空间和用户空间映射地址不一样~~~


 

看门狗实际上就是一个定时器,其硬件内部维护了一个计数的寄存器。每当时钟信号到来时,计数寄存器减掉1,。如果减到0,则重启系统。

如果减到0之前,系统又设置计数寄存器到一个较大的值,则系统永远不会重启。 

 

watchdog的基本实现原理是:

用户空间程序打开 /dev/mem设备(俗称“开门放狗”),

就会导致在内核中启动一个定时器(本项目mdt_start_count的入参是20000ms即20s),此后,用户空间程序需要保证在20分钟之内向这个设备写入数据(俗称“定期喂狗”),每次写操作会导致重新设定定时器(本项目是每sleep 10s重新去设定)。如果用户空间程序在20分钟之内没有写操作,定时器到期会导致一次系统Reboot操作(“狗咬人了”)。 


watchdog.c 内容如下:



#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <stdbool.h>
#include <unistd.h>
#include <pthread.h>
#include <errno.h>
#include <time.h>
#include <sys/types.h>
#include <pwd.h>
#include <sys/stat.h>
#include <linux/input.h>
#include <fcntl.h>
#include <execinfo.h>
#include <sys/mman.h>

#define WDT_IOBASE (0x10002000)
#define MAP_SIZE        0xFF



#define JZ_EXTAL_RTC      32768     /* RTC extal freq: 32.768 KHz */
#define TCU_IOBASE      0x10002000
#define TCU_TSCR   (0x3C)   /* Timer Stop Clear Register */

#define WDT_TCSR                (0x0c)  /* rw, 32, 0x???????? */
#define WDT_TCER                (0x04)  /* rw, 32, 0x???????? */
#define WDT_TDR                 (0x00)  /* rw, 32, 0x???????? */
#define WDT_TCNT                (0x08)  /* rw, 32, 0x???????? */
#define TCU_TSSR   (0x2C)   /* Timer Stop Set Register */

static void wdt_start_count(int msecs)
{
    static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);

    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }

    unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE );

        int time = JZ_EXTAL_RTC / 64 * msecs / 1000;
        if(time > 65535)
                time = 65535;

#if 0
        outl(1 << 16,TCU_IOBASE + TCU_TSCR);

        outl(0,WDT_IOBASE + WDT_TCNT);          //counter
        outl(time,WDT_IOBASE + WDT_TDR);        //data
        outl((3<<3 | 1<<1),WDT_IOBASE + WDT_TCSR);
        outl(0,WDT_IOBASE + WDT_TCER);
        outl(1,WDT_IOBASE + WDT_TCER);
#endif


/*上文屏蔽部分是内核空间对寄存器的操作,修改成用户空间对寄存器的操作,关键是物理地址在用户空间需要通过mmap进行转换*/


//    printf("wdt_start_count  begin~~~. map_base = %p,time=%d\n",map_base,time);
    *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16);
    *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter
    *( unsigned long*)(map_base + WDT_TDR) = time;//data
    *( unsigned long*)(map_base + WDT_TCSR) = (3<<3 | 1<<1);
    *( unsigned long*)(map_base + WDT_TCER) = 0;
    *( unsigned long*)(map_base + WDT_TCER) = 1;

    close(dev_fd);
//    printf("wdt_start_count  end.\n");

}

static void wdt_stop_count(void)
{
    static int dev_fd;
    dev_fd = open("/dev/mem", O_RDWR | O_NDELAY);

    if (dev_fd < 0) {
        printf("open(/dev/mem) failed.");
        return 0;
    }

    unsigned char *map_base=(unsigned char * )mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, dev_fd, WDT_IOBASE );
    printf("\033[1;33mwdt_stop_count  begin~~~. map_base = %p\n\033[m",map_base);
#if 0
        outl(1 << 16,TCU_IOBASE + TCU_TSCR);
        outl(0,WDT_IOBASE + WDT_TCNT);          //counter
        outl(65535,WDT_IOBASE + WDT_TDR);       //data
        outl(1 << 16,TCU_IOBASE + TCU_TSSR);
#endif

    *( unsigned long*)(map_base + TCU_TSCR) = (1 << 16);
    *( unsigned long*)(map_base + WDT_TCNT) = 0;//counter
    *( unsigned long*)(map_base + WDT_TDR) = 65535;
    *( unsigned long*)(map_base + TCU_TSSR) = (1 << 16);
    close(dev_fd);
    printf("wdt_stop_count  end.\n");
}


void jz_wdt_restart()
{
        printf("Restarting after 4 ms\n");

     while(1)
     {
        wdt_start_count(20000);     
        sleep(10);
     }
     
        while(1)
                printf("check wdt.\n");
}

/*判 断 进 程  是 否 存 在*/
bool processExists(char * process_name) {  
        FILE *ptr;  
        int RE_BUF_SIZE = 32;  
        char rebuff[RE_BUF_SIZE];  
        char ps[128];  
        snprintf(ps, sizeof(ps), "ps | grep %s |grep -v grep| wc -l", process_name);  
        if((ptr = popen(ps, "r")) != NULL) {  
            int count = 0;  
            fgets(rebuff, RE_BUF_SIZE, ptr);  
            if(rebuff != NULL) {  
                count = atoi(rebuff);  
            }  
            pclose(ptr);  
            return count >= 1;  
        }  

     printf("Current process %s is not Exist!!!!\n",process_name);
        return false;  
    }  



static char *signal_str[] = {
    [1] = "SIGHUP",       [2] = "SIGINT",       [3] = "SIGQUIT",      [4] = "SIGILL",      [5] = "SIGTRAP",
    [6] = "SIGABRT",      [7] = "SIGBUS",       [8] = "SIGFPE",       [9] = "SIGKILL",     [10] = "SIGUSR1",
    [11] = "SIGSEGV",     [12] = "SIGUSR2",     [13] = "SIGPIPE",     [14] = "SIGALRM",    [15] = "SIGTERM",
    [16] = "SIGSTKFLT",   [17] = "SIGCHLD",     [18] = "SIGCONT",     [19] = "SIGSTOP",    [20] = "SIGTSTP",
    [21] = "SIGTTIN",     [22] = "SIGTTOU",     [23] = "SIGURG",      [24] = "SIGXCPU",    [25] = "SIGXFSZ",
    [26] = "SIGVTALRM",   [27] = "SIGPROF",     [28] = "SIGWINCH",    [29] = "SIGIO",      [30] = "SIGPWR",
    [31] = "SIGSYS",      [34] = "SIGRTMIN",    [35] = "SIGRTMIN+1",  [36] = "SIGRTMIN+2", [37] = "SIGRTMIN+3",
    [38] = "SIGRTMIN+4",  [39] = "SIGRTMIN+5",  [40] = "SIGRTMIN+6",  [41] = "SIGRTMIN+7", [42] = "SIGRTMIN+8",
    [43] = "SIGRTMIN+9",  [44] = "SIGRTMIN+10", [45] = "SIGRTMIN+11", [46] = "SIGRTMIN+12", [47] = "SIGRTMIN+13",
    [48] = "SIGRTMIN+14", [49] = "SIGRTMIN+15", [50] = "SIGRTMAX-14", [51] = "SIGRTMAX-13", [52] = "SIGRTMAX-12",
    [53] = "SIGRTMAX-11", [54] = "SIGRTMAX-10", [55] = "SIGRTMAX-9",  [56] = "SIGRTMAX-8", [57] = "SIGRTMAX-7",
    [58] = "SIGRTMAX-6",  [59] = "SIGRTMAX-5",  [60] = "SIGRTMAX-4",  [61] = "SIGRTMAX-3", [62] = "SIGRTMAX-2",
    [63] = "SIGRTMAX-1",  [64] = "SIGRTMAX",
};


static void usage(const char *app_name)
{
    printf("%s [-f file] -h\n"
           " -h help (show this usage text)\n"
           " -f file\n", app_name);

    return;
}


void sig_handler(int signo)
{
    char cmd[64] = {};
    void *array[10];
    int size = 0;
    char **strings = NULL;
    int i = 0;

#if 0
    printf("\n\n[%s: %d] bitbox crashed by signal %s.\n", __func__, __LINE__, signal_str[signo]);

    printf("Call Trace:\n");
    size = backtrace(array, 10);
    strings = backtrace_symbols(array, size);
    if (strings) {
        for (i = 0; i < size; i++)
            printf ("  %s\n", strings[i]);
        free (strings);
    } else {
        printf("Not Found\n\n");
    }

    if (signo == SIGSEGV || signo == SIGBUS ||
        signo == SIGTRAP || signo == SIGABRT) {
        sprintf(cmd, "cat /proc/%d/maps", getpid());
        printf("Process maps:\n");
        system(cmd);
    }
#else
    wdt_stop_count();
#endif

    exit(-1);
}

int main(int argc, char **argv)
{
    int c = -1;
    int daemonize = 0;
    printf("watchdog V1.7 start!!!!@_@\n");

    signal(SIGPIPE, SIG_IGN);
    signal(SIGINT, sig_handler);
    signal(SIGTERM, sig_handler);
    signal(SIGBUS, sig_handler);
    signal(SIGSEGV, sig_handler);
    signal(SIGABRT, sig_handler);
    
    while (1) {
        c = getopt(argc, argv, "bBf:h");
        if (c < 0)
            break;
        switch (c) {
            case 'b':
            case 'B':
                daemonize = 1;
                break;
            case 'f':
                break;
            case 'h':

                return 0;
            default:
                return -1;
        }
    }

    /* run in the background */
    if (daemonize) {
        if (daemon(0, 1)) {
            perror("daemon");
            return -1;
        }
    }

    while(1)
    {
        if(processExists("mozart")==true && processExists("bitbox")==true && processExists("mplayer")==true)
        {
//            printf("Both mozart and bitbox and mplayer are exists!!!!!\n");
            wdt_start_count(20000);     
                sleep(10);
        }
        else
        {
            printf(" mozart or  bitbox or mplayer is  not exist, Reboot!!!!! \n");
            printf("Mozart process  exist ???: %d\n",processExists("mozart"));
            printf("BitBox process  exist ???: %d\n",processExists("bitbox"));
            printf("Mplayer process  exist ???: %d\n",processExists("mplayer"));
            
            break;
        }
    }

}


 

 

程序运行起来后,通过ps可查看到:

S 0 241 1 7868 716 0:0 13:16 00:00:01 watchdog -b