Postmaster启动

第一阶段启动

Postgres可以以一下三种模式启动:

  • Bootstrap模式:从无到有创建数据库的模式,postgres --boot -x1 -k -F
  • Single模式:单用户模式,只允许单个用户执行SQL命令。Bootstrap创建了最核心的元数据之后使用single模式创建其他数据,对应着postbootstrap阶段
  • Normal模式:多用户的正常模式
/src/backend/main/main.c
/* Any Postgres server process begins execution here. */
int main(int argc, char *argv[]) {
...
if (argc > 1 && strcmp(argv[1], "--boot") == 0)
AuxiliaryProcessMain(argc, argv); /* does not return */
else if (argc > 1 && strcmp(argv[1], "--describe-config") == 0)
GucInfoMain(); /* does not return */
else if (argc > 1 && strcmp(argv[1], "--single") == 0)
PostgresMain(argc, argv, NULL, /* no dbname */strdup(get_user_name_or_exit(progname)));/* does not return */
else
PostmasterMain(argc, argv); /* does not return */
abort(); /* should not get here */
}

第二阶段Normal模式

Normal模式调用在PostmasterMain(argc, argv)

/src/backend/postmaster/postmaster.c
/* Postmaster main entry point */
void PostmasterMain(int argc, char *argv[]){
...
/* We're ready to rock and roll... */
StartupPID = StartupDataBase();
Assert(StartupPID != 0);
StartupStatus = STARTUP_RUNNING;
pmState = PM_STARTUP;
/* Some workers may be scheduled to start now */
maybe_start_bgworkers();
status = ServerLoop();
/* ServerLoop probably shouldn't ever return, but if it does, close down. */
ExitPostmaster(status != STATUS_OK);
abort(); /* not reached */
}

第三阶段服务循环

ServerLoop循环等待客户端连接,调用select函数阻塞等待客户端连接

static int ServerLoop(void) {
fd_set readmask;
int nSockets;
time_t last_lockfile_recheck_time, last_touch_time;
last_lockfile_recheck_time = last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
for (;;) {
fd_set rmask;
int selres;
time_t now;
/* Wait for a connection request to arrive.
* We block all signals except while sleeping. That makes it safe for
* signal handlers, which again block all signals while executing, to
* do nontrivial work.
* If we are in PM_WAIT_DEAD_END state, then we don't want to accept
* any new connections, so we don't call select(), and just sleep.
*/
memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));

if (pmState == PM_WAIT_DEAD_END) {
PG_SETMASK(&UnBlockSig);
pg_usleep(100000L); /* 100 msec seems reasonable */
selres = 0;
PG_SETMASK(&BlockSig);
}else{
/* must set timeout each time; some OSes change it! */
struct timeval timeout;
/* Needs to run with blocked signals! */
DetermineSleepTime(&timeout);
PG_SETMASK(&UnBlockSig);
selres = select(nSockets, &rmask, NULL, NULL, &timeout);
PG_SETMASK(&BlockSig);
}

/* Now check the select() result */
if (selres < 0){
if (errno != EINTR && errno != EWOULDBLOCK){
ereport(LOG,(errcode_for_socket_access(),errmsg("select() failed in postmaster: %m")));
return STATUS_ERROR;
}
}

如果有客户端连接进来,fork子进程来处理该连接。

/* New connection pending on any of our sockets? If so, fork a child process to deal with it. */
if (selres > 0) {
int i;
for (i = 0; i < MAXLISTEN; i++) {
if (ListenSocket[i] == PGINVALID_SOCKET)
break;
if (FD_ISSET(ListenSocket[i], &rmask)) {
Port *port;
port = ConnCreate(ListenSocket[i]);
if (port) {
BackendStartup(port);
/* We no longer need the open socket or port structure in this process */
StreamClose(port->sock);
ConnFree(port);
}
}
}
}
...
}
}

第四阶段启动postgres子进程

/* BackendStartup -- start backend process
* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
* Note: if you change this code, also consider StartAutovacuumWorker. */
static int BackendStartup(Port *port) {
Backend *bn; /* for backend cleanup */
pid_t pid;
/* Create backend data structure. Better before the fork() so we can handle failure cleanly. */
bn = (Backend *) malloc(sizeof(Backend));
if (!bn){
ereport(LOG,(errcode(ERRCODE_OUT_OF_MEMORY),errmsg("out of memory")));
return STATUS_ERROR;
}
/* Compute the cancel key that will be assigned to this backend. The
* backend will have its own copy in the forked-off process' value of
* MyCancelKey, so that it can transmit the key to the frontend. */
if (!RandomCancelKey(&MyCancelKey)){
free(bn);
ereport(LOG,(errcode(ERRCODE_INTERNAL_ERROR),errmsg("could not generate random cancel key")));
return STATUS_ERROR;
}
bn->cancel_key = MyCancelKey;
/* Pass down canAcceptConnections state */
port->canAcceptConnections = canAcceptConnections(BACKEND_TYPE_NORMAL);
bn->dead_end = (port->canAcceptConnections != CAC_OK && port->canAcceptConnections != CAC_SUPERUSER);
/* Unless it's a dead_end child, assign it a child slot number */
if (!bn->dead_end)
bn->child_slot = MyPMChildSlot = AssignPostmasterChildSlot();
else
bn->child_slot = 0;
/* Hasn't asked to be notified about any bgworkers yet */
bn->bgworker_notify = false;
#ifdef EXEC_BACKEND
pid = backend_forkexec(port);
#else /* !EXEC_BACKEND */
pid = fork_process();
if (pid == 0) /* child */
{
free(bn);
/* Detangle from postmaster */
InitPostmasterChild();
/* Close the postmaster's sockets */
ClosePostmasterPorts(false);
/* Perform additional initialization and collect startup packet */
BackendInitialize(port);
/* And run the backend */
BackendRun(port);
}
#endif /* EXEC_BACKEND */
if (pid < 0){
/* in parent, fork failed */
int save_errno = errno;
if (!bn->dead_end) (void) ReleasePostmasterChildSlot(bn->child_slot);
free(bn);
errno = save_errno;
ereport(LOG, (errmsg("could not fork new process for connection: %m")));
report_fork_failure_to_client(port, save_errno);
return STATUS_ERROR;
}
/* in parent, successful fork */
ereport(DEBUG2,(errmsg_internal("forked new backend, pid=%d socket=%d",(int) pid, (int) port->sock)));
/* Everything's been successful, it's safe to add this backend to our list of backends. */
bn->pid = pid;
bn->bkend_type = BACKEND_TYPE_NORMAL; /* Can change later to WALSND */
dlist_push_head(&BackendList, &bn->elem);
#ifdef EXEC_BACKEND
if (!bn->dead_end) ShmemBackendArrayAdd(bn);
#endif
return STATUS_OK;
}

第五阶段启动postgres子进程执行PostgresMain

/* BackendRun -- set up the backend's argument list and invoke PostgresMain()
* returns:
* Shouldn't return at all.
* If PostgresMain() fails, return status. */
static void BackendRun(Port *port) {
char **av;
int maxac;
int ac;
int i;
/* Now, build the argv vector that will be given to PostgresMain.
* The maximum possible number of commandline arguments that could come
* from ExtraOptions is (strlen(ExtraOptions) + 1) / 2; see
* pg_split_opts(). */
maxac = 2; /* for fixed args supplied below */
maxac += (strlen(ExtraOptions) + 1) / 2;
av = (char **) MemoryContextAlloc(TopMemoryContext, maxac * sizeof(char *));
ac = 0;
av[ac++] = "postgres";
/* Pass any backend switches specified with -o on the postmaster's own
* command line. We assume these are secure. */
pg_split_opts(av, &ac, ExtraOptions);
av[ac] = NULL;
Assert(ac < maxac);
/* Debug: print arguments being passed to backend */
ereport(DEBUG3,(errmsg_internal("%s child[%d]: starting with (",progname, (int) getpid())));
for (i = 0; i < ac; ++i)
ereport(DEBUG3,(errmsg_internal("\t%s", av[i])));
ereport(DEBUG3,
(errmsg_internal(")")));
/* Make sure we aren't in PostmasterContext anymore. (We can't delete it
* just yet, though, because InitPostgres will need the HBA data.) */
MemoryContextSwitchTo(TopMemoryContext);
PostgresMain(ac, av, port->database_name, port->user_name);
}