Nginx高可用、高可靠性体现它的平滑升级--在升级过程中能够保证业务不间断,那它是如何实现的呢?接下来我们一起探讨它的实现原理。
一、升级过程
1.1、查看进程信息
先通过命令行感性的了解一下Nginx是如何进行平滑升级的?首先通过命令行查看进程信息,可知,一个master进程、两个worker进程,如下所示:
[root@localhost ~]# ps -ef | grep nginx | grep -v grep
root 2850 1 0 08:24 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2851 2850 0 08:24 ? 00:00:00 nginx: worker process
root 2852 2850 0 08:24 ? 00:00:00 nginx: worker process
[root@localhost ~]#
地址监听信息如下:
[root@localhost ~]# netstat -apn | grep tcp| grep nginx
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 2850/nginx
[root@localhost ~]#
1.2、升级版本
通过命令行,发送SIGUSR2给master进程(Nginx定义SIGUSR2是版本升级),进行版本升级。
[root@localhost ~]# kill -USR2 `cat /usr/local/nginx/logs/nginx.pid`
[root@localhost ~]#
[root@localhost ~]# ps -ef | grep nginx | grep -v grep
root 2850 1 0 08:24 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2851 2850 0 08:24 ? 00:00:00 nginx: worker process
root 2852 2850 0 08:24 ? 00:00:00 nginx: worker process
root 2942 2850 0 08:39 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2943 2942 0 08:39 ? 00:00:00 nginx: worker process
root 2944 2942 0 08:39 ? 00:00:00 nginx: worker process
[root@localhost ~]#
变化内容:
1)会发现会多出一组master和worker进行(新master进程id是2942,新woker进程是2943、2944) ,其中新master进程的父进程是2850,即旧master进程。
2)nginx.pid文件发生改变,变成新的master进程id,这点很重要,后面再操作nginx.pid时,应该警惕,如下:
[root@localhost ~]#
[root@localhost ~]# cat /usr/local/nginx/logs/nginx.pid
2942
[root@localhost ~]#
地址监听信息,没有变化:
[root@localhost ~]# netstat -apn | grep tcp| grep nginx
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 2850/nginx
[root@localhost ~]#
此时我们进行访问Nginx服务(浏览器输入地址),然后查看链接信息:
[root@localhost ~]#
[root@localhost ~]# netstat -apn | grep tcp| grep nginx
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 2850/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52980 ESTABLISHED 2944/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52979 ESTABLISHED 2944/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52983 ESTABLISHED 2944/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52981 ESTABLISHED 2944/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52982 ESTABLISHED 2851/nginx
tcp 0 0 192.168.12.129:80 192.168.12.1:52984 ESTABLISHED 2943/nginx
[root@localhost ~]#
发现提供服务worker进程分别是2944、2851、2943,其中2851是旧worker进程。
1.3、旧版本服务下线
通过发送WINCH信号,使旧版本worker进程不在处理新请求且结束进程,注意:发送WINCH,不能使用nginx.pid文件,必须使用进程id。
[root@localhost ~]#
[root@localhost ~]# kill -WINCH 2850
[root@localhost ~]#
[root@localhost ~]# ps -ef|grep nginx |grep -v grep
root 2850 1 0 09:04 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2942 2850 0 09:06 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2943 2942 0 09:06 ? 00:00:00 nginx: worker process
root 2944 2942 0 09:06 ? 00:00:00 nginx: worker process
[root@localhost ~]#
[root@localhost ~]# netstat -apn | grep nginx |grep tcp
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 2850/nginx
[root@localhost ~]#
通过查看命令行可知,旧worker进程都已经退出,但是旧的master进程并没有退出,此时监听地址信息,进程id号仍然是旧master进程id号。
1.4、关闭旧master进程
关闭旧master进程,可以直接通过kill命令行,将其杀死,如下:
[root@localhost ~]# kill 2580
[root@localhost ~]# ps -ef|grep nginx |grep -v grep
root 2942 1 0 09:06 ? 00:00:00 nginx: master process /usr/local/nginx/sbin/nginx
root 2943 2942 0 09:06 ? 00:00:00 nginx: worker process
root 2944 2942 0 09:06 ? 00:00:00 nginx: worker process
[root@localhost ~]#
最后查看,监听信息,进程信息变成新master进程id:
[root@localhost ~]# netstat -apn | grep nginx |grep tcp
tcp 0 0 0.0.0.0:80 0.0.0.0:* LISTEN 2942/nginx
[root@localhost ~]#
[root@localhost ~]#
二、升级实现原理
上面只是演示如何进行平滑升级,这里再从源码级别深入探讨平滑升级的流程。
通过上面演示,可知Nginx进行平滑升级采用的是进程间通信方式--信号。通过命令行kill(也是一个进程)发送信号给master进程,那么master进程才可以针对不同信号进行处理。既然是通过信号方式,必然要先注册信号,否则应用程序是无法处理的。
2.1、信号注册
信号注册是在main函数中调用,信号注册方式一般有两种,一种signal和sigaction,nginx采用sigaction,如下:
ngx_int_t
ngx_init_signals(ngx_log_t *log)
{
ngx_signal_t *sig;
struct sigaction sa;
for (sig = signals; sig->signo != 0; sig++) {
ngx_memzero(&sa, sizeof(struct sigaction));
sa.sa_handler = sig->handler;
sigemptyset(&sa.sa_mask);
if (sigaction(sig->signo, &sa, NULL) == -1) {//注册信号,当信号发生由sig->handler处理
#if (NGX_VALGRIND)
ngx_log_error(NGX_LOG_ALERT, log, ngx_errno,
"sigaction(%s) failed, ignored", sig->signame);
#else
ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
"sigaction(%s) failed", sig->signame);
return NGX_ERROR;
#endif
}
}
return NGX_OK;
}
信号表signals主要定义信号和处理函数映射关系,具体定义如下:
ngx_signal_t signals[] = {
{ ngx_signal_value(NGX_RECONFIGURE_SIGNAL),
"SIG" ngx_value(NGX_RECONFIGURE_SIGNAL),
"reload",
ngx_signal_handler },
{ ngx_signal_value(NGX_REOPEN_SIGNAL),
"SIG" ngx_value(NGX_REOPEN_SIGNAL),
"reopen",
ngx_signal_handler },
{ ngx_signal_value(NGX_NOACCEPT_SIGNAL),
"SIG" ngx_value(NGX_NOACCEPT_SIGNAL),
"",
ngx_signal_handler },
{ ngx_signal_value(NGX_TERMINATE_SIGNAL),
"SIG" ngx_value(NGX_TERMINATE_SIGNAL),
"stop",
ngx_signal_handler },
{ ngx_signal_value(NGX_SHUTDOWN_SIGNAL),
"SIG" ngx_value(NGX_SHUTDOWN_SIGNAL),
"quit",
ngx_signal_handler },
{ ngx_signal_value(NGX_CHANGEBIN_SIGNAL),
"SIG" ngx_value(NGX_CHANGEBIN_SIGNAL),
"",
ngx_signal_handler },
{ SIGALRM, "SIGALRM", "", ngx_signal_handler },
{ SIGINT, "SIGINT", "", ngx_signal_handler },
{ SIGIO, "SIGIO", "", ngx_signal_handler },
{ SIGCHLD, "SIGCHLD", "", ngx_signal_handler },
{ SIGSYS, "SIGSYS, SIG_IGN", "", SIG_IGN },
{ SIGPIPE, "SIGPIPE, SIG_IGN", "", SIG_IGN },
{ 0, NULL, "", NULL }
};
2.2 、信号处理
由上一小节可知,信号处理函数均是ngx_signal_handler方法:
static void
ngx_signal_handler(int signo)
{
char *action;
ngx_int_t ignore;
ngx_err_t err;
ngx_signal_t *sig;
ignore = 0;
err = ngx_errno;
for (sig = signals; sig->signo != 0; sig++) {//根据信号 查找具体信号组
if (sig->signo == signo) {
break;
}
}
ngx_time_sigsafe_update();//跟新时间
action = "";
switch (ngx_process) {//ngx_process代表当前进程角色
/* master进程 */
case NGX_PROCESS_MASTER:
case NGX_PROCESS_SINGLE:
switch (signo) {
case ngx_signal_value(NGX_SHUTDOWN_SIGNAL):
ngx_quit = 1;
action = ", shutting down";
break;
case ngx_signal_value(NGX_TERMINATE_SIGNAL):
case SIGINT:
ngx_terminate = 1;
action = ", exiting";
break;
case ngx_signal_value(NGX_NOACCEPT_SIGNAL):
if (ngx_daemonized) {
ngx_noaccept = 1;
action = ", stop accepting connections";
}
break;
case ngx_signal_value(NGX_RECONFIGURE_SIGNAL):
ngx_reconfigure = 1;
action = ", reconfiguring";
break;
case ngx_signal_value(NGX_REOPEN_SIGNAL):
ngx_reopen = 1;
action = ", reopening logs";
break;
/* 平滑升级 接收到的信号 */
case ngx_signal_value(NGX_CHANGEBIN_SIGNAL):
if (getppid() > 1 || ngx_new_binary > 0) {
/*
* Ignore the signal in the new binary if its parent is
* not the init process, i.e. the old binary's process
* is still running. Or ignore the signal in the old binary's
* process if the new binary's process is already running.
*/
action = ", ignoring";
ignore = 1;
break;
}
ngx_change_binary = 1;//这里只是把标志位设置为1
action = ", changing binary";
break;
case SIGALRM:
ngx_sigalrm = 1;
break;
case SIGIO:
ngx_sigio = 1;
break;
/**
* 当worker进程异常退出时,会向父进程发送SIGCHLD信号
* 当master进程收到该信号,就会重新调度起worker进程
*/
case SIGCHLD:
ngx_reap = 1;
break;
}
break;
/* worker进程 */
case NGX_PROCESS_WORKER:
case NGX_PROCESS_HELPER:
switch (signo) {
case ngx_signal_value(NGX_NOACCEPT_SIGNAL):
if (!ngx_daemonized) {
break;
}
ngx_debug_quit = 1;
/* fall through */
case ngx_signal_value(NGX_SHUTDOWN_SIGNAL):
ngx_quit = 1;
action = ", shutting down";
break;
case ngx_signal_value(NGX_TERMINATE_SIGNAL):
case SIGINT:
ngx_terminate = 1;
action = ", exiting";
break;
case ngx_signal_value(NGX_REOPEN_SIGNAL):
ngx_reopen = 1;
action = ", reopening logs";
break;
case ngx_signal_value(NGX_RECONFIGURE_SIGNAL):
case ngx_signal_value(NGX_CHANGEBIN_SIGNAL):
case SIGIO:
action = ", ignoring";
break;
}
break;
}
ngx_log_error(NGX_LOG_NOTICE, ngx_cycle->log, 0,
"signal %d (%s) received%s", signo, sig->signame, action);
if (ignore) {
ngx_log_error(NGX_LOG_CRIT, ngx_cycle->log, 0,
"the changing binary signal is ignored: "
"you should shutdown or terminate "
"before either old or new binary's process");
}
if (signo == SIGCHLD) {//当子进程异常退出时,需要修改退出的子进程的状态
ngx_process_get_status();
}
ngx_set_errno(err);
}
2.3、信号产生方式
有两种方式处理方式:一种是nginx自身发起信号,一种是kill命令行。针对平滑升级来说,我们通过kill方式,发送USR2信号给master进程,因此上面流程会进入:
/* 平滑升级 接收到的信号 */
case ngx_signal_value(NGX_CHANGEBIN_SIGNAL):
if (getppid() > 1 || ngx_new_binary > 0) {
/*
* Ignore the signal in the new binary if its parent is
* not the init process, i.e. the old binary's process
* is still running. Or ignore the signal in the old binary's
* process if the new binary's process is already running.
*/
action = ", ignoring";
ignore = 1;
break;
}
ngx_change_binary = 1;//这里只是把标志位设置为1
action = ", changing binary";
break;
此时退出该函数表示,信号中断处理函数已经结束,那么master进程应该如何处理呢?
三、信号事件后续处理
我们在《菜鸟学习Nginx之启动流程2》已经说明了,master进程会阻塞在sigsuspend函数这里,当有信号产生时,进程会被中断,转而调用信号处理函数。当信号处理函数结束后sigsuspend阻塞函数返回,执行后续代码流程。这段流程需要谨记于心。
/**
* 当master进程接收到USR2信号,表明进行平滑升级
*/
if (ngx_change_binary)
{
ngx_change_binary = 0;
ngx_log_error(NGX_LOG_NOTICE, cycle->log, 0, "changing binary");
ngx_new_binary = ngx_exec_new_binary(cycle, ngx_argv);
}
进入ngx_exec_new_binary函数:
ngx_pid_t
ngx_exec_new_binary(ngx_cycle_t *cycle, char *const *argv)
{
char **env, *var;
u_char *p;
ngx_uint_t i, n;
ngx_pid_t pid;
ngx_exec_ctx_t ctx;
ngx_core_conf_t *ccf;
ngx_listening_t *ls;
ngx_memzero(&ctx, sizeof(ngx_exec_ctx_t));
ctx.path = argv[0];
ctx.name = "new binary process";
ctx.argv = argv;
n = 2;
env = ngx_set_environment(cycle, &n);
if (env == NULL) {
return NGX_INVALID_PID;
}
var = ngx_alloc(sizeof(NGINX_VAR)
+ cycle->listening.nelts * (NGX_INT32_LEN + 1) + 2,
cycle->log);
if (var == NULL) {
ngx_free(env);
return NGX_INVALID_PID;
}
p = ngx_cpymem(var, NGINX_VAR "=", sizeof(NGINX_VAR));
ls = cycle->listening.elts;
for (i = 0; i < cycle->listening.nelts; i++) {
p = ngx_sprintf(p, "%ud;", ls[i].fd);
}
*p = '\0';
env[n++] = var;
#if (NGX_SETPROCTITLE_USES_ENV)
/* allocate the spare 300 bytes for the new binary process title */
env[n++] = "SPARE=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
#endif
env[n] = NULL;
#if (NGX_DEBUG)
{
char **e;
for (e = env; *e; e++) {
ngx_log_debug1(NGX_LOG_DEBUG_CORE, cycle->log, 0, "env: %s", *e);
}
}
#endif
ctx.envp = (char *const *) env;
ccf = (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module);
//重命名pid文件 把旧的pid文件重名为pid.old
if (ngx_rename_file(ccf->pid.data, ccf->oldpid.data) == NGX_FILE_ERROR) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
ngx_rename_file_n " %s to %s failed "
"before executing new binary process \"%s\"",
ccf->pid.data, ccf->oldpid.data, argv[0]);
ngx_free(env);
ngx_free(var);
return NGX_INVALID_PID;
}
pid = ngx_execute(cycle, &ctx);/* 执行exec家族函数 启动新进程 */
if (pid == NGX_INVALID_PID) {
if (ngx_rename_file(ccf->oldpid.data, ccf->pid.data)
== NGX_FILE_ERROR)
{
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
ngx_rename_file_n " %s back to %s failed after "
"an attempt to execute new binary process \"%s\"",
ccf->oldpid.data, ccf->pid.data, argv[0]);
}
}
ngx_free(env);
ngx_free(var);
return pid;
}
ngx_pid_t
ngx_execute(ngx_cycle_t *cycle, ngx_exec_ctx_t *ctx)
{
return ngx_spawn_process(cycle, ngx_execute_proc, ctx, ctx->name,
NGX_PROCESS_DETACHED);
}
static void
ngx_execute_proc(ngx_cycle_t *cycle, void *data)
{
ngx_exec_ctx_t *ctx = data;
/**
* 如果执行成功则函数不会返回,执行失败则直接返回-1,失败原因存于errno 中
* 当执行成功之后进程上下文就会切换,因此也没有必要返回
*/
if (execve(ctx->path, ctx->argv, ctx->envp) == -1) {
ngx_log_error(NGX_LOG_ALERT, cycle->log, ngx_errno,
"execve() failed while executing %s \"%s\"",
ctx->name, ctx->path);
}
exit(1);
}
四、总结
这里有一个问题:新进程是如何将继续监听原先listening socket呢?主要通过两种方式:
1、启动平滑升级时,旧master会把监听socket文件句柄(数字)写到环境变量中。
2、旧master在fork出子进程后,从环境变量中读取出listen socket,然后子进程在执行exec函数,进行替换。这里需要提示父子进程空间是共享的,所以子进程可以操作listen socket。
Nginx这种升级方式,是比较经典的升级方式,为了日后工作提供了指导方向。但是我个人感觉Nginx升级方式有点不完美,不完美之处在于需要敲三次命令行,才能完成升级。