void pgstat_init(void) {
ACCEPT_TYPE_ARG3 alen;
struct addrinfo *addrs = NULL, *addr, hints;
fd_set rset;
struct timeval tv;
char test_byte;
int sel_res, ret, tries = 0;
#define TESTBYTEVAL ((char) 199)
/* This static assertion verifies that we didn't mess up the calculations involved in selecting maximum payload sizes for our UDP messages. Because the only consequence of overrunning PGSTAT_MAX_MSG_SIZE would be silent performance loss from fragmentation, it seems worth having a compile-time cross-check that we didn't. */ // 这个静态断言验证了我们在为UDP消息选择最大有效负载大小时没有弄乱计算。由于溢出PGSTAT_MAX_MSG_SIZE的唯一后果是碎片导致的性能损失,因此似乎值得进行编译时交叉检查,但我们没有这样做。
StaticAssertStmt(sizeof(PgStat_Msg) <= PGSTAT_MAX_MSG_SIZE,"maximum stats message size exceeds PGSTAT_MAX_MSG_SIZE");
/* Create the UDP socket for sending and receiving statistic messages */
hints.ai_flags = AI_PASSIVE;
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_DGRAM;
hints.ai_protocol = 0;
hints.ai_addrlen = 0;
hints.ai_addr = NULL;
hints.ai_canonname = NULL;
hints.ai_next = NULL;
ret = pg_getaddrinfo_all("localhost", NULL, &hints, &addrs);
if (ret || !addrs){
ereport(LOG,(errmsg("could not resolve \"localhost\": %s",gai_strerror(ret))));
goto startup_failed;
}
/* On some platforms, pg_getaddrinfo_all() may return multiple addresses only one of which will actually work (eg, both IPv6 and IPv4 addresses when kernel will reject IPv6). Worse, the failure may occur at the bind() or perhaps even connect() stage. So we must loop through the results till we find a working combination. We will generate LOG messages, but no error, for bogus combinations. */ // 在某些平台上,pg_getaddrinfo_all()可能返回多个地址,其中只有一个实际有效(例如,当内核拒绝IPv6时,IPv6和IPv4地址)。更糟糕的是,失败可能发生在bind()阶段,甚至可能发生在connect()阶段。因此,我们必须对结果进行循环,直到找到有效的组合。我们将为假组合生成日志消息,但没有错误。
for (addr = addrs; addr; addr = addr->ai_next)
{
#ifdef HAVE_UNIX_SOCKETS
if (addr->ai_family == AF_UNIX) continue; /* Ignore AF_UNIX sockets, if any are returned. */
#endif
if (++tries > 1) ereport(LOG,(errmsg("trying another address for the statistics collector")));
if ((pgStatSock = socket(addr->ai_family, SOCK_DGRAM, 0)) == PGINVALID_SOCKET){ /* Create the socket. */
ereport(LOG,(errcode_for_socket_access(),errmsg("could not create socket for statistics collector: %m")));
continue;
}
/* Bind it to a kernel assigned port on localhost and get the assigned port via getsockname(). */
if (bind(pgStatSock, addr->ai_addr, addr->ai_addrlen) < 0) {
ereport(LOG,(errcode_for_socket_access(),errmsg("could not bind socket for statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
alen = sizeof(pgStatAddr);
if (getsockname(pgStatSock, (struct sockaddr *) &pgStatAddr, &alen) < 0){
ereport(LOG,(errcode_for_socket_access(),errmsg("could not get address of socket for statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
/* Connect the socket to its own address. This saves a few cycles by not having to respecify the target address on every send. This also provides a kernel-level check that only packets from this same address will be received. */ // 将套接字连接到它自己的地址。这节省了几个周期,因为不必在每次发送时重新指定目标地址。这还提供了一个内核级检查,确保只接收来自同一地址的数据包
if (connect(pgStatSock, (struct sockaddr *) &pgStatAddr, alen) < 0){
ereport(LOG,(errcode_for_socket_access(),errmsg("could not connect socket for statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
/* Try to send and receive a one-byte test message on the socket. This is to catch situations where the socket can be created but will not actually pass data (for instance, because kernel packet filtering rules prevent it). */ // 尝试在套接字上发送和接收一个单字节的测试消息。这是为了捕捉可以创建套接字但实际上不会传递数据的情况(例如,因为内核包过滤规则阻止了它)
test_byte = TESTBYTEVAL;
retry1:
if (send(pgStatSock, &test_byte, 1, 0) != 1){
if (errno == EINTR) goto retry1; /* if interrupted, just retry */
ereport(LOG,(errcode_for_socket_access(),errmsg("could not send test message on socket for statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
/* There could possibly be a little delay before the message can be received. We arbitrarily allow up to half a second before deciding it's broken. */ // 在接收消息之前可能会有一点延迟。在决定它是否损坏之前,我们任意允许半秒钟。
for (;;){ /* need a loop to handle EINTR */
FD_ZERO(&rset);
FD_SET(pgStatSock, &rset);
tv.tv_sec = 0;
tv.tv_usec = 500000;
sel_res = select(pgStatSock + 1, &rset, NULL, NULL, &tv); // select带超时阻塞等待
if (sel_res >= 0 || errno != EINTR) break;
}
if (sel_res < 0){
ereport(LOG,(errcode_for_socket_access(),errmsg("select() failed in statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
if (sel_res == 0 || !FD_ISSET(pgStatSock, &rset)){
/* This is the case we actually think is likely, so take pains to give a specific message for it. errno will not be set meaningfully here, so don't use it. */ // 这是我们实际上认为可能发生的情况,因此请尽力为其提供具体信息。errno在这里不会被设置为有意义的,所以不要使用它
ereport(LOG,(errcode(ERRCODE_CONNECTION_FAILURE),errmsg("test message did not get through on socket for statistics collector")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
test_byte++; /* just make sure variable is changed */
retry2:
if (recv(pgStatSock, &test_byte, 1, 0) != 1) { // 接收测试数据
if (errno == EINTR) goto retry2; /* if interrupted, just retry */
ereport(LOG,(errcode_for_socket_access(),errmsg("could not receive test message on socket for statistics collector: %m")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
if (test_byte != TESTBYTEVAL){ /* strictly paranoia ... */
ereport(LOG,(errcode(ERRCODE_INTERNAL_ERROR),errmsg("incorrect test message transmission on socket for statistics collector")));
closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
continue;
}
break; /* If we get here, we have a working socket */
}
if (!addr || pgStatSock == PGINVALID_SOCKET) goto startup_failed; /* Did we find a working address? */
/* Set the socket to non-blocking IO. This ensures that if the collector falls behind, statistics messages will be discarded; backends won't block waiting to send messages to the collector. */ // 将套接字设置为非阻塞IO。这确保了如果收集器落后,统计信息将被丢弃;后端不会阻止等待向收集器发送消息
if (!pg_set_noblock(pgStatSock)){
ereport(LOG,(errcode_for_socket_access(),errmsg("could not set statistics collector socket to nonblocking mode: %m")));
goto startup_failed;
}
/* Try to ensure that the socket's receive buffer is at least PGSTAT_MIN_RCVBUF bytes, so that it won't easily overflow and lose data. Use of UDP protocol means that we are willing to lose data under heavy load, but we don't want it to happen just because of ridiculously small default buffer sizes (such as 8KB on older Windows versions). */ // 尝试确保套接字的接收缓冲区至少为PGSTAT_MIN_RCVBUF字节,这样它就不会轻易溢出和丢失数据。UDP协议的使用意味着我们愿意在重负载下丢失数据,但我们不希望仅仅因为默认缓冲区大小小得可笑(例如旧Windows版本上的8KB)而发生这种情况
{
int old_rcvbuf, new_rcvbuf;
ACCEPT_TYPE_ARG3 rcvbufsize = sizeof(old_rcvbuf);
if (getsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,(char *) &old_rcvbuf, &rcvbufsize) < 0){
ereport(LOG,(errmsg("%s(%s) failed: %m", "getsockopt", "SO_RCVBUF")));
old_rcvbuf = 0; /* if we can't get existing size, always try to set it */
}
new_rcvbuf = PGSTAT_MIN_RCVBUF;
if (old_rcvbuf < new_rcvbuf){
if (setsockopt(pgStatSock, SOL_SOCKET, SO_RCVBUF,(char *) &new_rcvbuf, sizeof(new_rcvbuf)) < 0)
ereport(LOG,(errmsg("%s(%s) failed: %m", "setsockopt", "SO_RCVBUF")));
}
}
pg_freeaddrinfo_all(hints.ai_family, addrs);
ReserveExternalFD(); /* Now that we have a long-lived socket, tell fd.c about it. */
return;
startup_failed:
ereport(LOG,(errmsg("disabling statistics collector for lack of working socket")));
if (addrs) pg_freeaddrinfo_all(hints.ai_family, addrs);
if (pgStatSock != PGINVALID_SOCKET) closesocket(pgStatSock);
pgStatSock = PGINVALID_SOCKET;
/* Adjust GUC variables to suppress useless activity, and for debugging purposes (seeing track_counts off is a clue that we failed here). We use PGC_S_OVERRIDE because there is no point in trying to turn it back on from postgresql.conf without a restart. */ // 调整GUC变量以抑制无用的活动,并且出于调试目的(看到track_counts off是我们在这里失败的线索)。我们使用PGC_S_OVERRIDE,因为在postgresql中尝试将其重新打开是没有意义的。
SetConfigOption("track_counts", "off", PGC_INTERNAL, PGC_S_OVERRIDE);
}