FinishPreparedTransaction --> RecordTransactionCommitPrepared --> XactLogCommitRecord
CommitTransaction --> RecordTransactionCommit --> XactLogCommitRecord


/* Log the commit record for a plain or twophase transaction commit.
* A 2pc commit will be emitted when twophase_xid is valid, a plain one otherwise. */
XLogRecPtr XactLogCommitRecord(TimestampTz commit_time, int nsubxacts, TransactionId *subxacts, int nrels, RelFileNode *rels, int nmsgs, SharedInvalidationMessage *msgs, bool relcacheInval, bool forceSync, int xactflags, TransactionId twophase_xid, const char *twophase_gid) {
xl_xact_commit xlrec;
xl_xact_xinfo xl_xinfo;
xl_xact_dbinfo xl_dbinfo;
xl_xact_subxacts xl_subxacts;
xl_xact_relfilenodes xl_relfilenodes;
xl_xact_invals xl_invals;
xl_xact_twophase xl_twophase;
xl_xact_origin xl_origin;
uint8 info;
Assert(CritSectionCount > 0);
xl_xinfo.xinfo = 0;

/* decide between a plain and 2pc commit */
if (!TransactionIdIsValid(twophase_xid)) info = XLOG_XACT_COMMIT;

/* First figure out and collect all the information needed */
xlrec.xact_time = commit_time;
if (relcacheInval) xl_xinfo.xinfo |= XACT_COMPLETION_UPDATE_RELCACHE_FILE;
if (forceSyncCommit) xl_xinfo.xinfo |= XACT_COMPLETION_FORCE_SYNC_COMMIT;
/* Check if the caller would like to ask standbys for immediate feedback once this commit is applied. */
/* Relcache invalidations requires information about the current database and so does logical decoding. */
if (nmsgs > 0 || XLogLogicalInfoActive()) {
xl_xinfo.xinfo |= XACT_XINFO_HAS_DBINFO;
xl_dbinfo.dbId = MyDatabaseId;
xl_dbinfo.tsId = MyDatabaseTableSpace;
if (nsubxacts > 0) {
xl_xinfo.xinfo |= XACT_XINFO_HAS_SUBXACTS;
xl_subxacts.nsubxacts = nsubxacts;
if (nrels > 0) {
xl_relfilenodes.nrels = nrels;
if (nmsgs > 0) {
xl_xinfo.xinfo |= XACT_XINFO_HAS_INVALS;
xl_invals.nmsgs = nmsgs;
if (TransactionIdIsValid(twophase_xid)) {
xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE;
xl_twophase.xid = twophase_xid;
Assert(twophase_gid != NULL);
if (XLogLogicalInfoActive()) xl_xinfo.xinfo |= XACT_XINFO_HAS_GID;
/* dump transaction origin information */
if (replorigin_session_origin != InvalidRepOriginId) {
xl_xinfo.xinfo |= XACT_XINFO_HAS_ORIGIN;
xl_origin.origin_lsn = replorigin_session_origin_lsn;
xl_origin.origin_timestamp = replorigin_session_origin_timestamp;
if (xl_xinfo.xinfo != 0) info |= XLOG_XACT_HAS_INFO;

/* Then include all the collected data into the commit record. */
XLogRegisterData((char *) (&xlrec), sizeof(xl_xact_commit));
if (xl_xinfo.xinfo != 0) XLogRegisterData((char *) (&xl_xinfo.xinfo), sizeof(xl_xinfo.xinfo));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_DBINFO) XLogRegisterData((char *) (&xl_dbinfo), sizeof(xl_dbinfo));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_SUBXACTS) {
XLogRegisterData((char *) (&xl_subxacts), MinSizeOfXactSubxacts);
XLogRegisterData((char *) subxacts, nsubxacts * sizeof(TransactionId));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_RELFILENODES) {
XLogRegisterData((char *) (&xl_relfilenodes), MinSizeOfXactRelfilenodes);
XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_INVALS) {
XLogRegisterData((char *) (&xl_invals), MinSizeOfXactInvals);
XLogRegisterData((char *) msgs, nmsgs * sizeof(SharedInvalidationMessage));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) {
XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase));
if (xl_xinfo.xinfo & XACT_XINFO_HAS_GID) XLogRegisterData(unconstify(char *, twophase_gid), strlen(twophase_gid) + 1);
if (xl_xinfo.xinfo & XACT_XINFO_HAS_ORIGIN) XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin));
/* we allow filtering by xacts */
return XLogInsert(RM_XACT_ID, info);

typedef struct xl_xact_commit {
TimestampTz xact_time; /* time of commit */
/* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */
/* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */
/* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */
/* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */
/* xl_xact_invals follows if XINFO_HAS_INVALS */
/* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */
/* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */
/* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */
} xl_xact_commit;
typedef struct xl_xact_xinfo {
/* Even though we right now only require 1 byte of space in xinfo we use
* four so following records don't have to care about alignment. Commit
* records can be large, so copying large portions isn't attractive. */
uint32 xinfo;
} xl_xact_xinfo;
typedef struct xl_xact_dbinfo {
Oid dbId; /* MyDatabaseId */
Oid tsId; /* MyDatabaseTableSpace */
} xl_xact_dbinfo;



static XLogRecData *rdatas;     /* An array of XLogRecData structs, to hold registered data. */
static int num_rdatas; /* entries currently used */
static int max_rdatas; /* allocated size */
typedef struct XLogRecData {
struct XLogRecData *next; /* next struct in chain, or NULL */
char *data; /* start of rmgr data to include */
uint32 len; /* length of rmgr data to include */
} XLogRecData;


typedef struct {
bool in_use; /* is this slot in use? */
uint8 flags; /* REGBUF_* flags */
RelFileNode rnode; /* identifies the relation and block */
ForkNumber forkno;
BlockNumber block;
Page page; /* page content */
uint32 rdata_len; /* total length of data in rdata chain */
XLogRecData *rdata_head; /* head of the chain of data registered with this block */
XLogRecData *rdata_tail; /* last entry in the chain, or &rdata_head if empty */
XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to backup block data in XLogRecordAssemble() */
/* buffer to store a compressed version of backup block image */
char compressed_page[PGLZ_MAX_BLCKSZ];
} registered_buffer;

hdr_scratch在组装日志时使用的临时内存,长度为HEADER_SCRATCH_SIZE,保存日志记录的Header部分​​static char *hdr_scratch = NULL;​​​。

static MemoryContext xloginsert_cxt;
/* Allocate working buffers needed for WAL record construction. */
void InitXLogInsert(void) {
/* Initialize the working areas */
if (xloginsert_cxt == NULL)
xloginsert_cxt = AllocSetContextCreate(TopMemoryContext,"WAL record construction",ALLOCSET_DEFAULT_SIZES);
if (registered_buffers == NULL) {
registered_buffers = (registered_buffer *)MemoryContextAllocZero(xloginsert_cxt,sizeof(registered_buffer) * (XLR_NORMAL_MAX_BLOCK_ID + 1));
max_registered_buffers = XLR_NORMAL_MAX_BLOCK_ID + 1;
if (rdatas == NULL){
rdatas = MemoryContextAlloc(xloginsert_cxt,sizeof(XLogRecData) * XLR_NORMAL_RDATAS);
max_rdatas = XLR_NORMAL_RDATAS;
/* Allocate a buffer to hold the header information for a WAL record. */
if (hdr_scratch == NULL)
hdr_scratch = MemoryContextAllocZero(xloginsert_cxt, HEADER_SCRATCH_SIZE);


事务日志不直接写入WAL Buffer,而是先组成XLogRecData链接,然后将这个链表转化为一条事务日志。





void XLogRegisterData(char *data, int len) {
XLogRecData *rdata;
if (num_rdatas >= max_rdatas) elog(ERROR, "too much WAL data");
rdata = &rdatas[num_rdatas++];
rdata->data = data;
rdata->len = len;
/* we use the mainrdata_last pointer to track the end of the chain, so no need to clear 'next' here. */
mainrdata_last->next = rdata;
mainrdata_last = rdata;
mainrdata_len += len;




  • XLOG_INCLUDE_ORIGIN replication origin需要包含在该record中
  • XLOG_MARK_UNIMPORTANT 该记录对于持久化不重要,允许避免触发WAL archiving和其他后台工作
/* flags for the in-progress insertion */
static uint8 curinsert_flags = 0;
/* Set insert status flags for the upcoming WAL record.
* The flags that can be used here are:
* - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
* included in the record.
* - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
* durability, which allows to avoid triggering WAL archiving and other
* background activity. */
void XLogSetRecordFlags(uint8 flags) {
curinsert_flags = flags;



/* Pointer to a location in the XLOG.  These pointers are 64 bits wide, because we don't want them ever to overflow. */
typedef uint64 XLogRecPtr;
/* Insert an XLOG record having the specified RMID and info bytes, with the body of the record being the data and buffer references registered earlier with XLogRegister* calls.
* Returns XLOG pointer to end of record (beginning of next record).
* This can be used as LSN for data pages affected by the logged action.
* (LSN is the XLOG point up to which the XLOG must be flushed to disk
* before the data page can be written out. This implements the basic
* WAL rule "write the log before the data".) */
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info) {
/* XLogBeginInsert() must have been called. */
if (!begininsert_called) elog(ERROR, "XLogBeginInsert was not called");
/* The caller can set rmgr bits, XLR_SPECIAL_REL_UPDATE and XLR_CHECK_CONSISTENCY; the rest are reserved for use by me. */
elog(PANIC, "invalid xlog info mask %02X", info);

XLogRecPtr EndPos;
/* In bootstrap mode, we don't actually log anything but XLOG resources; return a phony record pointer. #define SizeOfXLogLongPHD MAXALIGN(sizeof(XLogLongPageHeaderData)) */
if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID) {
EndPos = SizeOfXLogLongPHD; /* start of 1st chkpt record */
return EndPos;
do {
XLogRecPtr RedoRecPtr;
bool doPageWrites;
XLogRecPtr fpw_lsn;
XLogRecData *rdt;
/* Get values needed to decide whether to do full-page writes. Since
* we don't yet have an insertion lock, these could change under us,
* but XLogInsertRecord will recheck them once it has a lock. */
GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, &fpw_lsn);
EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
} while (EndPos == InvalidXLogRecPtr);
return EndPos;


/* Reset WAL record construction buffers. */
void XLogResetInsertion(void) {
int i;
for (i = 0; i < max_registered_block_id; i++)
registered_buffers[i].in_use = false;
num_rdatas = 0;
max_registered_block_id = 0;
mainrdata_len = 0;
mainrdata_last = (XLogRecData *) &mainrdata_head;
curinsert_flags = 0;
begininsert_called = false;

调用GetFullPageWriteInfo函数获取full-page write是否开启,日志的注册主要是将WAL日志所需的信息保存在内存中(mainrdata_last),由XLogRecordAssemble函数处理完成(主要处理日志记录中与页面Block相关的部分,即对在registered_buffers数组中的数据进行二次加工,例如判断是否需要做Full Page Write,是否需要压缩页面等)。

do {
XLogRecPtr RedoRecPtr;
bool doPageWrites;
XLogRecPtr fpw_lsn;
XLogRecData *rdt;
/* Get values needed to decide whether to do full-page writes. Since
* we don't yet have an insertion lock, these could change under us,
* but XLogInsertRecord will recheck them once it has a lock. */
GetFullPageWriteInfo(&RedoRecPtr, &doPageWrites);
rdt = XLogRecordAssemble(rmid, info, RedoRecPtr, doPageWrites, &fpw_lsn);
EndPos = XLogInsertRecord(rdt, fpw_lsn, curinsert_flags);
} while (EndPos == InvalidXLogRecPtr);