一、 主函数流程
/*
* 记录pg启动时间
*/
PgStartTime = GetCurrentTimestamp();
/*
* 在postmaster.pid文件中记录postmaster状态,以告知pg_ctl
*/
AddToDataDirLockFile(LOCK_FILE_LINE_PM_STATUS, PM_STATUS_STARTING);
/*
* 启动数据库并记录pid、设置状态
*/
StartupPID = StartupDataBase();
Assert(StartupPID != 0);
StartupStatus = STARTUP_RUNNING;
pmState = PM_STARTUP;
/* Some workers may be scheduled to start now,启动部分后台进程 */
maybe_start_bgworkers();
/* 循环等待客户连接请求 */
status = ServerLoop();
/*
* ServerLoop正常不应该返回(死循环),如果返回了,说明pg已关闭,需要退出postmaster
*/
ExitPostmaster(status != STATUS_OK);
abort(); /* not reached */
}
二、 启动数据库 StartupDataBase()
Startup进程的3大功能——崩溃恢复、从库日志应用、PITR(基于时间点的恢复)
#define StartupDataBase() StartChildProcess(StartupProcess)
在下方也可以看到,启动各类后台进程时调用的也是这个函数。
#define StartArchiver() StartChildProcess(ArchiverProcess)
#define StartBackgroundWriter() StartChildProcess(BgWriterProcess)
#define StartCheckpointer() StartChildProcess(CheckpointerProcess)
#define StartWalWriter() StartChildProcess(WalWriterProcess)
#define StartWalReceiver() StartChildProcess(WalReceiverProcess)
发挥作用的主要是StartupProcessMain和StartupXLOG。
PostmasterMain()
->StartupDataBase()
->StartChildProcess()
->AuxiliaryProcessMain()
->StartupProcessMain()
->StartupXLOG()
StartupProcessMain主要是处理或者忽略一些接收到的信号量,注册一些超时时间,核心是调用StartupXLOG函数。这个函数非常长,在崩溃恢复系列中有简单学习过,可以参考:
三、 循环等待客户连接请求 ServerLoop
主要函数调用
PostmasterMain()
|->ServerLoop()
|->initMasks()
|->for(;;)
|->select() <--监听端口
|->ConnCreate() <--建立connection相关的数据结构
|->BackendStartup() <--创建后端进程backend process
|->PostmasterRandom()
|->fork_process()
|->InitPostmasterChild()
|->ClosePostmasterPorts()
|->BackendInitialize()
|->ProcessStartupPacket()
|->BackendRun()
|->PostgresMain()
|->ConnFree() <--释放connection相关的数据结构
关于socket和信号处理:
/*
* Main idle loop of postmaster
*
* NB: Needs to be called with signals blocked
*/
static int
ServerLoop(void)
{
fd_set readmask;
int nSockets;
time_t last_lockfile_recheck_time,
last_touch_time;
last_lockfile_recheck_time = last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
/* 无限循环 */
for (;;)
{
fd_set rmask;
int selres;
time_t now;
/*
* 等待连接请求到达
*
* 如果是PM_WAIT_DEAD_END状态,则不希望接收任何新连接,因此不会调用select(),只是sleep
*/
memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
// PM_WAIT_DEAD_END状态
if (pmState == PM_WAIT_DEAD_END)
{
PG_SETMASK(&UnBlockSig);
pg_usleep(100000L); /* 100 msec seems reasonable */
// select result
selres = 0;
PG_SETMASK(&BlockSig);
}
else
{
/* must set timeout each time; some OSes change it! 设置超时时间 */
struct timeval timeout;
/* Needs to run with blocked signals! */
DetermineSleepTime(&timeout);
PG_SETMASK(&UnBlockSig);
/* 调用select系统函数等待客户端提出连接请求 */
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
PG_SETMASK(&BlockSig);
}
/* Now check the select() result,如果结果小于0则报错 */
if (selres < 0)
{
if (errno != EINTR && errno != EWOULDBLOCK)
{
ereport(LOG,
(errcode_for_socket_access(),
errmsg("select() failed in postmaster: %m")));
return STATUS_ERROR;
}
}
/*
* New connection pending on any of our sockets? If so, fork a child process to deal with it. 大于0表示正常返回
*/
if (selres > 0)
{
int i;
/*
* 循环扫描ListenSocket数组,检查数组中每一个监听套接字描述符
*/
for (i = 0; i < MAXLISTEN; i++)
{
if (ListenSocket[i] == PGINVALID_SOCKET)
break;
/* 若该套接字有用户提出连接请求,则调用ConnCreate函数创建一个Port结构体 */
if (FD_ISSET(ListenSocket[i], &rmask))
{
Port *port;
/* ConnCreate函数首先从CurrentMemoryContext中创建一个post结构体,然后调用Accept系统函数接受由该监听套接字所维护的用户连接请求队列中的第一个连接请求,将监听套接字转化为一个已连接的套接字,并将返回的已连接套接字和相关信息填入Port相应属性中。对于TCP连接,还要为连接设置超时等连接选项 */
port = ConnCreate(ListenSocket[i]);
/* 成功创建Port结构体 */
if (port)
{
/* 调用BackendStartup 启动后台进程接替postmaster与用户进行连接,然后postmaster关闭与用户的连接,释放port结构体,检查并确保辅助进程BgWriter,SysLogger等正常运行。完成后进入下一次循环,继续等待用户连接请求的到来 */
BackendStartup(port);
StreamClose(port->sock);
ConnFree(port);
}
}
}
}
四、 启动各后台进程
接上面ServerLoop函数
/* I如果SysLogger进程丢失,尝试重新启动一个 */
if (SysLoggerPID == 0 && Logging_collector)
SysLoggerPID = SysLogger_Start();
/*
* 如果没有BgWriter进程在运行,尝试启动。启动失败也没有关系,后面会继续重试,与checkpointer进程类似
*/
if (pmState == PM_RUN || pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY)
{
if (CheckpointerPID == 0)
CheckpointerPID = StartCheckpointer();
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
}
/*
* 如果没有WalWriter进程在运行,也尝试启动,只在正常状态(即状态为PM_RUN时需要执行)
*/
if (WalWriterPID == 0 && pmState == PM_RUN)
WalWriterPID = StartWalWriter();
/*
*如果没有autovacuum launcher进程在运行,也尝试启动。如果在binary upgrade模式下,则不需要启动
*/
if (!IsBinaryUpgrade && AutoVacPID == 0 &&
(AutoVacuumingActive() || start_autovac_launcher) &&
pmState == PM_RUN)
{
AutoVacPID = StartAutoVacLauncher();
if (AutoVacPID != 0)
start_autovac_launcher = false; /* signal processed */
}
/* 如果没有stats collector进程在运行,也尝试启动 */
if (PgStatPID == 0 &&
(pmState == PM_RUN || pmState == PM_HOT_STANDBY))
PgStatPID = pgstat_start();
/* 如果没有archiver进程在运行,也尝试启动 */
if (PgArchPID == 0 && PgArchStartupAllowed())
PgArchPID = StartArchiver();
/* If we need to signal the autovacuum launcher, do so now */
if (avlauncher_needs_signal)
{
avlauncher_needs_signal = false;
if (AutoVacPID != 0)
kill(AutoVacPID, SIGUSR2);
}
/* 如果没有WAL receiver进程在运行,也尝试启动 */
if (WalReceiverRequested)
MaybeStartWalReceiver();
/* Get other worker processes running, if needed */
if (StartWorkerNeeded || HaveCrashedWorker)
maybe_start_bgworkers();
#ifdef HAVE_PTHREAD_IS_THREADED_NP
/*
* With assertions enabled, check regularly for appearance of
* additional threads. All builds check at start and exit.
*/
Assert(pthread_is_threaded_np() == 0);
#endif
/*
* 有些操作是不需要每次循环都执行的,通过时间now来处理
*/
now = time(NULL);
/*
* 如果强制关闭数据库,或者遇到严重错误
*/
if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
AbortStartTime != 0 &&
(now - AbortStartTime) >= SIGKILL_CHILDREN_AFTER_SECS)
{
/* We were gentle with them before. Not anymore */
ereport(LOG,
(errmsg("issuing SIGKILL to recalcitrant children")));
TerminateChildren(SIGKILL);
/* reset flag so we don't SIGKILL again */
AbortStartTime = 0;
}
/*
* 每隔一分钟,验证postmaster.pid 没有被删除或覆盖。如果有,则强制关闭数据库,避免在数据库关闭后postmaster及其子进程仍有遗留,如果创建新数据库集群,可能还会导致其他问题。
* 另外,也作为DBA误删除postmaster.pid并启动新postmaster的一种保护(可能导致数据损坏),通过这种方式,可以尽量降低影响
*/
if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
{
if (!RecheckDataDirLockFile())
{
ereport(LOG,
(errmsg("performing immediate shutdown because data directory lock file is invalid")));
kill(MyProcPid, SIGQUIT);
}
last_lockfile_recheck_time = now;
}
/*
* 每隔58分钟,创建Unix socket和lock files,确保他们没有被tmp-cleaning 任务删除(假设一个小时内不会有清理任务运行)
*/
if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
{
TouchSocketFiles();
TouchSocketLockFiles();
last_touch_time = now;
}
}
}
参考
《PostgreSQL数据库内核分析》第二章