Postgresql generation and cleanup logic in xlog

0 Introduction

2 is part of the analysis and cleaning XLOG generation logic, the processing directly surge XLOG see Part 3.

1 WAL archiving

# 在自动的WAL检查点之间的日志文件段的最大数量
checkpoint_segments = 
# 在自动WAL检查点之间的最长时间
checkpoint_timeout = 
# 缓解io压力
checkpoint_completion_target = 
# 日志文件段的保存最小数量,为了备库保留更多段
wal_keep_segments = 
# 已完成的WAL段通过archive_command发送到归档存储
archive_mode = 
# 强制timeout切换到新的wal段文件
archive_timeout = 


max_wal_size = 
min_wal_size =

1.1 is not open archives

The number of files by following a few control parameters, usually no more than

(2 + checkpoint_completion_target) * checkpoint_segments + 1

or

checkpoint_segments + wal_keep_segments + 1Files.

If a file is no longer needed the old section will be renamed and continue to cover the use, if for short-term peak of log output resulted in more than

3 * checkpoint_segments + 1Files, delete files.

1.2 open archives

The number of files: delete archived segment files successfully

PG abstract view of a run to generate a log sequence WAL infinitely long. Each section 16M, the names of these segments of the file is named values, reflected in WAL position in the sequence. In no WAL archiving, the system usually just a few segment files and then create a recycling method is to segment files are no longer used to rename a higher segment number.

If and only if the archive command is successful, the archive command return zero. After obtaining a zero result, PostgreSQL will assume that the WAL segment file has been successfully archived segment files will be deleted later. A non-zero value tells PostgreSQL that the file has not been archived, periodically retry until it succeeds.

2 PG-source analysis

2.1 Delete logic

Delete trigger action

RemoveOldXlogFiles
> CreateCheckPoint
> CreateRestartPoint

wal_keep_segments judgment (call this function to modify _logSegNo, and then pass RemoveOldXlogFiles)

static void
KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
{
	XLogSegNo	segno;
	XLogRecPtr	keep;

	XLByteToSeg(recptr, segno);
	keep = XLogGetReplicationSlotMinimumLSN();

	/* compute limit for wal_keep_segments first */
	if (wal_keep_segments > 0)
	{
		/* avoid underflow, don't go below 1 */
		if (segno <= wal_keep_segments)
			segno = 1;
		else
			segno = segno - wal_keep_segments;
	}

	/* then check whether slots limit removal further */
	if (max_replication_slots > 0 && keep != InvalidXLogRecPtr)
	{
		XLogSegNo	slotSegNo;

		XLByteToSeg(keep, slotSegNo);

		if (slotSegNo <= 0)
			segno = 1;
		else if (slotSegNo < segno)
			segno = slotSegNo;
	}

	/* don't delete WAL segments newer than the calculated segment */
	if (segno < *logSegNo)
		*logSegNo = segno;
}

Delete logic

static void
RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr endptr)
{
    ...
    ...
	while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
	{
		/* Ignore files that are not XLOG segments */
		if (strlen(xlde->d_name) != 24 ||
			strspn(xlde->d_name, "0123456789ABCDEF") != 24)
			continue;

		/*
		 * We ignore the timeline part of the XLOG segment identifiers in
		 * deciding whether a segment is still needed.  This ensures that we
		 * won't prematurely remove a segment from a parent timeline. We could
		 * probably be a little more proactive about removing segments of
		 * non-parent timelines, but that would be a whole lot more
		 * complicated.
		 *
		 * We use the alphanumeric sorting property of the filenames to decide
		 * which ones are earlier than the lastoff segment.
		 */
		if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
		{
			if (XLogArchiveCheckDone(xlde->d_name))
                # 归档关闭返回真
                # 存在done文件返回真
                # 存在.ready返回假
                # recheck存在done文件返回真
                # 重建.ready文件返回假
			{
				/* Update the last removed location in shared memory first */
				UpdateLastRemovedPtr(xlde->d_name);
                
                # 回收 或者 直接删除,清理.done和.ready文件
				RemoveXlogFile(xlde->d_name, endptr);
			}
		}
	}
    ...
    ...
}

2.2 Archive logic

static void
pgarch_ArchiverCopyLoop(void)
{
	char		xlog[MAX_XFN_CHARS + 1];
    
    # 拿到最老那个没有被归档的xlog文件名
	while (pgarch_readyXlog(xlog))
	{
		int			failures = 0;

		for (;;)
		{
			/*
			 * Do not initiate any more archive commands after receiving
			 * SIGTERM, nor after the postmaster has died unexpectedly. The
			 * first condition is to try to keep from having init SIGKILL the
			 * command, and the second is to avoid conflicts with another
			 * archiver spawned by a newer postmaster.
			 */
			if (got_SIGTERM || !PostmasterIsAlive())
				return;

			/*
			 * Check for config update.  This is so that we'll adopt a new
			 * setting for archive_command as soon as possible, even if there
			 * is a backlog of files to be archived.
			 */
			if (got_SIGHUP)
			{
				got_SIGHUP = false;
				ProcessConfigFile(PGC_SIGHUP);
			}

			# archive_command没设的话不再执行
            # 我们的command没有设置,走的是这个分支
			if (!XLogArchiveCommandSet())
			{
				/*
				 * Change WARNING to DEBUG1, since we will left archive_command empty to 
				 * let external tools to manage archive
				 */
				ereport(DEBUG1,
						(errmsg("archive_mode enabled, yet archive_command is not set")));
				return;
			}
            # 执行归档命令!
			if (pgarch_archiveXlog(xlog))
			{
				# 成功了,把.ready改名为.done
				pgarch_archiveDone(xlog);

				/*
				 * Tell the collector about the WAL file that we successfully
				 * archived
				 */
				pgstat_send_archiver(xlog, false);

				break;			/* out of inner retry loop */
			}
			else
			{
				/*
				 * Tell the collector about the WAL file that we failed to
				 * archive
				 */
				pgstat_send_archiver(xlog, true);

				if (++failures >= NUM_ARCHIVE_RETRIES)
				{
					ereport(WARNING,
							(errmsg("archiving transaction log file \"%s\" failed too many times, will try again later",
									xlog)));
					return;		/* give up archiving for now */
				}
				pg_usleep(1000000L);	/* wait a bit before retrying */
			}
		}
	}
}

2.3 ready generation logic

static void
XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
{
...
            if (finishing_seg)
			{
				issue_xlog_fsync(openLogFile, openLogSegNo);

				/* signal that we need to wakeup walsenders later */
				WalSndWakeupRequest();

				LogwrtResult.Flush = LogwrtResult.Write;		/* end of page */

                # 归档打开 && wal_level >= archive
				if (XLogArchivingActive())
                    # 生成ready文件
					XLogArchiveNotifySeg(openLogSegNo);

				XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
...
}

2.4 summary

  • ready file as long as archive_mode = on and wal_lever> = archive, there will always generate (XLogWrite function call generation)
    • Because archive_command setting empty, so the consumer is completely ready file from an external control program
  • Document processing done is done by PG, two places will trigger done document processing, and checkpoint restart point
    • How many document processing done by wal_keep_segments and replication_slot control (KeepLogSeg function)

The reason 3 WAL segment accumulated (total length requirements?)

  • Note: in any case be careful not to manually delete files xlog

  • Note: Log checkpoint generated back does not generate ready files instantly, is a next generation after the xlog

3.1 ReplicationSlot

Open the copy flow channel

-- 流复制插槽
-- 如果restart_lsn和当前XLOG相差非常大的字节数, 需要排查slot的订阅者是否能正常接收XLOG, 
-- 或者订阅者是否正常. 长时间不将slot的数据取走, pg_xlog目录可能会撑爆
select pg_xlog_location_diff(pg_current_xlog_location(),restart_lsn), * 
from pg_replication_slots;

delete

select pg_drop_replication_slot('xxx');

PG will be deleted after the next checkpoint cleanup xlog

3.2 larger wal_keep_segments

Check the configuration parameters, pay attention to this parameter will open and ready xlog a certain delays

3.3 recycling problems

If you do not use PG automatic recovery mechanism, the database relies on external programs to modify .ready files need to detect the recovery process

(archive_mode=on archive_command=’’)

3.4 checkpoint interval is too long

Check the configuration parameters

Published 27 original articles · won praise 2 · views 50000 +

Guess you like

Origin blog.csdn.net/jackgo73/article/details/90108958