Guide Line

  1. 目前的项目里需要一个比较安全的log记录机制来写文件,通过比较,选择了 syslog
  2. 整个 syslog 机制由syslog () 调用, syslogd守护进程,/etc/syslog.conf组成。系统内核及工具产生信息时,通过调用syslog(),把信息送往syslogd,syslogd再根据/etc/syslog.conf中的配置,可以灵活地对信息的发送和保存进行控制,比如:
  • 记录到系统日志中
  • 输出到系统控制台上
  • 转发给指定的用户
  • 通过网络转发给其他主机上的syslogd –@host 远程主机; #. @loghost
  1. 通过syslog.conf的配置, 这篇博客记录一下通过阅读syslog, syslogd 以及newsyslog的源码, 对整个log模块的理解。
  2. 对syslog的控制是通过newsyslog.conf 来控制的,之后将详细说明


守护进程,接收syslog(usrland)/log(kernel) write的message 根据syslog.conf 去记录文件 syslog() 用户调用 根据syslog.conf 去发送message syslog(LOG_DDOS, “%s”, msg_buf); newsyslog 根据newsyslog.conf 来处理/var/log底下的文件 Trim file and compress file

Created with Raphaël 2.1.0 syslogd /etc/syslog.conf read by syslogd socksetup according to configuration bind(fx->s, (struct sockaddr *)&sunx, SUN_LEN(&sunx)) syslog() connectlog(); -- _connect(LogFile) send log file


code of syslogd

int main()
    if (madvise(NULL, 0, MADV_PROTECT) != 0) --# define MADV_PROTECT    10  #protect process from pageout kill */ The madvise() system call is used to give advice or directions to the kernel about the address range beginning at address addr and with size length bytes.
     while ((ch = getopt(argc, argv, "468Aa:b:cCdf:kl:m:nNop:P:sS:Tuv"))
        case 'b':
            bindhostname = optarg;
            break;              
        case 'l':  -- mode : path
            {
                long    perml;
                mode_t  mode;
                char    *name, *ep;
                if() {
                } else if ((name = strchr(optarg, ':')) != NULL) { --name should be : path"
                    if (isdigit(*optarg)) {
                        perml = strtol(optarg, &ep, 8); -- string to long base为8时,合法字符为‘0’,‘1’,……‘7’
                }
                STAILQ_INSERT_TAIL(&funixes, fx, next); --加到unix socket队列里"
                break;
            }

    ppid = waitdaemon(0, 0, 30); -- make the process to be daemon process"
    (void)signal(SIGTERM, dodie); -- kll(1)发送的信号系统默认终止信号
    (void)signal(SIGINT, Debug ? dodie : SIG_IGN); -- SIGINT 中断(delete or ctrl+c dodie or SIG_IGN (内核忽略此信号)call back func "
    (void)signal(SIGQUIT, Debug ? dodie : SIG_IGN); -- (ctrl+\ 中止前台进程and 产生一个core文件)
    sigemptyset(&mask); --函数sigemptyset初始化由set指向的信号集,使排除其中所有信号
    sigaddset(&mask, SIGHUP); -- SIGHUP 如果终端界面检测到一个连接断开,则将此信号送给与该终端相关的控制进程(对话期首进程)

    (void)signal(SIGALRM, domark); -- SIGALRM 超过用alarm函数设置的时间时产生此信号。MarkSet = 1;

    STAILQ_FOREACH_SAFE(fx, &funixes, next, fx1) {
        (void)unlink(fx->name); -- if already exist, then delete it first
        memset(&sunx, 0, sizeof(sunx));
        sunx.sun_family = AF_LOCAL; --  local socket. that means file
        (void)strlcpy(sunx.sun_path, fx->name, sizeof(sunx.sun_path));
        fx->s = socket(PF_LOCAL, SOCK_DGRAM, 0);  -- PF_LOCAL <==> AF_UNIX  the same machine, AF_INET for TCP/UDP based on ipv4
        if (fx->s < 0 || bind(fx->s, (struct sockaddr *)&sunx, SUN_LEN(&sunx)) < 0 ||
            chmod(fx->name, fx->mode) < 0) { -- mode 是通过  mode = (mode_t )perml 得到的
            (void)snprintf(line, sizeof line,"cannot create %s", fx->name);
            if (fx == &funix_default || fx == &funix_secure)
                die(0);
            else {
                STAILQ_REMOVE(&funixes, fx, funix, next);
                continue;
            }
        } -- delete scenario"
    increase_rcvbuf(fx->s); 
        : if (getsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, &slen) == 0) {
            if (len < RCVBUF_MINSIZE) { -- #define  RCVBUF_MINSIZE  (80 * 1024) /* minimum size of dgram rcv buffer 
                len = RCVBUF_MINSIZE;
                setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &len, sizeof(len));  
            }
        }
    if (SecureMode <= 1)
        finet = socksetup(family, bindhostname);    -- "-b to specify the service name, default service is "syslog" -b bind_address[:service] ,finet是一个socket 描述符的集合"

if ((fklog = open(_PATH_KLOG, O_RDONLY, 0)) >= 0)
        if (fcntl(fklog, F_SETFL, O_NONBLOCK) < 0) -- read klog 的文件状态O_NONBLOCK, 感觉是内核log()是通过klog来实现的
            fklog = -1;

    sigemptyset(&mask);
    sigaddset(&mask, SIGCHLD);
    sact.sa_handler = init;
    sact.sa_mask = mask;
    sact.sa_flags = SA_RESTART;
    (void)sigaction(SIGHUP, &sact, NULL);-- prevent SIGHUP and SIGCHLD handlers from running in parallel  处理一个给定的信号,如果这个信号再次发生,那么它会阻塞到对前一个信号的处理结束为止
    ... set fdsrmax based on finet
for (;;) { 
    for (i = 0; i < *finet; i++) {
        if (finet[i+1] != -1)
            FD_SET(finet[i+1], fdsr); -- note finet[0] is num of sockets counter ,
        }
    i = select(fdsrmax+1, fdsr, NULL, NULL, needdofsync ? &tv : tvp);
    switch (i) {
        case 0: -- No change
            dofsync();
        case -1 :
            continue
    }
    for (i = 0; i < *finet; i++) {  --check socket
        if (FD_ISSET(finet[i+1], fdsr)) {
            len = sizeof(frominet);
            l = recvfrom(finet[i+1], line, MAXLINE,  0, (struct sockaddr *)&frominet,&len);
            if (l > 0) {
            ...
            if (validate((struct sockaddr *)&frominet, hname))
            printline(hname, line, RemoteAddDate ? ADDDATE : 0);    -- check and write  log, here we specific log format
            }
}
STAILQ_FOREACH(fx, &funixes, next) { -- syslog.conf 文件
        if (FD_ISSET(fx->s, fdsr)) {
            l = recvfrom(fx->s, line, MAXLINE, 0, -- from syslog()
                    (struct sockaddr *)&fromunix, &len);
                if (l > 0) {
                    line[l] = '\0';
                    printline(LocalHostName, line, 0);
if (fdsr)
        free(fdsr); <--最后free
}

code of syslog

void
syslog(int pri, const char *fmt, ...) --用户态调用 "
{
    va_list ap;

    va_start(ap, fmt);
    vsyslog(pri, fmt, ap);
    va_end(ap);
}


--------------------------------------------------

void vsyslog(int pri, const char *fmt, va_list ap)
    THREAD_LOCK();
    /* Create the primary stdio hook */
    tbuf_cookie.base = tbuf;
    tbuf_cookie.left = sizeof(tbuf);
    fp = fwopen(&tbuf_cookie, writehook);
    -- fwopen() as calls to funopen() with only a read or write function, The funopen() function associates a stream with up to four I/O func, Either  readfn  or writefn must be specified; the others can be given as an appropriately typed NULL pointer. These  I/O functions will  be used to read, write, seek and close the new stream. Read and write I/O functions are allowed to change  the  underlying buffer on  fully  buffered  or  line  buffered  streams  by calling

setvbuf(3). -- 看起来像是新建了一个I/O stream, 

    if (fp == NULL) {
        THREAD_UNLOCK();
        return;
    }
    LogTag = _getprogname(); -- process name
    if (LogStat & LOG_PID) -- LogStat是openlog是确定的,or  facility codes , messages 是有pid 的
        (void)fprintf(fp, "[%d]", getpid());

    if (strstr(fmt, "%m")) {

    (void)vfprintf(fp, fmt, ap);
    (void)fclose(fp);
    -- Get connected, output the message to the local logger.
    if (!opened)
        openlog_unlocked(LogTag, LogStat | LOG_NDELAY, 0);
    connectlog();
    --> as following
    if (status == NOCONN) {
            /*
             * Try the old "/dev/log" path, for backward
             * compatibility.
             */
        (void)strncpy(SyslogAddr.sun_path, _PATH_OLDLOG,
                sizeof SyslogAddr.sun_path);
        if (_connect(LogFile, (struct sockaddr *)&SyslogAddr,
                sizeof(SyslogAddr)) != -1)
            status = CONNDEF;
        }
-- 类似代码走了三次,connect三个log, 分别是 "/var/run/log" "/var/run/logpriv" "/dev/log"

-- send() 如果失败,可能有两种情况
     * If the send() failed, there are two likely scenarios: 
     *  1) syslogd was restarted
     *  2) /var/run/log is out of socket buffer space, which
     *     in most cases means local DoS.
     * We attempt to reconnect to /var/run/log[priv] to take care of
     * case #1 and keep send()ing data to cover case #2
     * to give syslogd a chance to empty its socket buffer.
     *
     * If we are working with a priveleged socket, then take
     * only one attempt, because we don't want to freeze a
     * critical application like su(1) or sshd(8).
     *
     */

if (send(LogFile, tbuf, cnt, 0) < 0) {
    if (errno != ENOBUFS) {
            disconnectlog();
            connectlog(); --attempt to reconnect
        } else {
            do { --keep send()ing data to cover this case
                if (send(LogFile, tbuf, cnt, 0) >= 0) {
                    THREAD_UNLOCK();
                    return;
                }
            }while(0)
        }
}...

(void)_writev(fd, iov, 2); <--w r i t e v函数用于在一个函数调用中写多个非连续缓存:聚集写(gather write)。writev以顺序i o v[0], i o v[ 1 ]至iov[iovcnt-1] 从缓存中聚集输出数据。writev返回输出的字节总数,它应等于所有缓存长度之和。此处需要include u_io.h
THREAD_UNLOCK();