前言
今天来对ltp MM模块的oom01进行源码分析。但是说起OOM机制,就不得不讲一些内存回收方面的知识。
应用程序通过malloc函数申请内存的时候,实际上申请的是虚拟内存,此时并不会分配物理内存。当应用程序读写了虚拟内存时,CPU就会去访问这个虚拟内存, 这时会发现这个虚拟内存没有映射到物理内存, CPU就会产生缺页中断,进程会从用户态切换到内核态,并将缺页中断交给内核的Page Fault Handler(缺页中断函数)处理。缺页中断处理函数会看是否有空闲的物理内存,如果有,就直接分配物理内存,并建立虚拟内存与物理内存之间的映射关系。如果没有空闲的物理内存,那么内核就会开始进行回收内存的工作,回收的方式主要是两种:直接内存回收和后台内存回收。
- 后台内存回收(kswapd):在物理内存紧张的时候,会唤醒kswapd内核线程来回收内存,这个回收内存的过程异步的,不会阻塞进程的执行。
- 直接内存回收(direct reclaim):如果后台异步回收跟不上进程内存申请的速度,就会开始直接回收,这个回收内存的过程是同步的,会阻塞进程的执行。
如果直接内存回收后,空闲的物理内存仍然无法满足此次物理内存的申请,那么内核就会放最后的大招了 ——触发OOM(Out of Memory)机制。而ltp的oom*系列测试,就是测试系统的OOM机制功能是否正常。
1.源码分析
1.1函数调用关系
下图便是oom01.c的函数调用关系,牵扯到两个.c文件:oom01.c和mem.c。
1.2源码分析
我们首先从函数入口函数开始看起,可以看到针对oom的测试是不支持32位ABI接口架构硬件的。接下来的源码分析我们便以第一组测试,即overcommit_memory被设置为2(overcommit_memory内存管理策略之前的文章已经详细介绍过,不再赘述)的这组测试开始,结合1.1小节的函数调用关系图逐级进行分析。
oom01.c
verify_oom():
static void verify_oom(void)
{
#ifdef TST_ABI32
tst_brk(TCONF, "test is not designed for 32-bit system.");
#endif
/* we expect mmap to fail before OOM is hit */
set_sys_tune("overcommit_memory", 2, 1);
// 修改内存管理策略为2,开启oom测试,不允许SIGKIL,retcode为ENOMEM
oom(NORMAL, 0, ENOMEM, 0);
/* with overcommit_memory set to 0 or 1 there's no
* guarantee that mmap fails before OOM */
set_sys_tune("overcommit_memory", 0, 1);
oom(NORMAL, 0, ENOMEM, 1);
set_sys_tune("overcommit_memory", 1, 1);
testoom(0, 0, ENOMEM, 1);
}
mem.c
oom()的参数,在此就不再进行注释,因为原coder已经给我们写好了详细的函数注释:)。
/*
* oom - allocates memory according to specified testcase and checks
* desired outcome (e.g. child killed, operation failed with ENOMEM)
* @testcase: selects how child allocates memory
* valid choices are: NORMAL, MLOCK and KSM
* @lite: if non-zero, child makes only single TESTMEM+MB allocation
* if zero, child keeps allocating memory until it gets killed
* or some operation fails
* @retcode: expected return code of child process
* if matches child ret code, this function reports PASS,
* otherwise it reports FAIL
* @allow_sigkill: if zero and child is killed, this function reports FAIL
* if non-zero, then if child is killed by SIGKILL
* it is considered as PASS
*/
void oom(int testcase, int lite, int retcode, int allow_sigkill)
{
pid_t pid;
int status, threads;
// 保护本进程,将其oom_score_adj设置为-1000
tst_enable_oom_protection(0);
switch (pid = SAFE_FORK()) {
// child进程
case 0:
// 关闭child进程保护,将其oom_score_adj设置为0
tst_disable_oom_protection(0);
// 确定当前可运行核心数
threads = MAX(1, tst_ncpus() - 1);
// 子进程开辟线程,不停进行内存映射,直至出错
child_alloc(testcase, lite, threads);
default:
break;
}
tst_res(TINFO, "expected victim is %d.", pid);
// 等待child进程结束,并获取其退出状态,不启用option
SAFE_WAITPID(-1, &status, 0);
/*
* 返回的子进程状态都保存在status指针中, 用以下3个宏可以检查该状态:
* WIFEXITED(status): 若为正常终止, 则为真. 此时可执行 WEXITSTATUS(status): 取子进程传送给exit或_exit参数的低8位.
* WIFSIGNALED(status): 若为异常终止, 则为真.此时可执行 WTERMSIG(status): 取使子进程终止的信号编号.
* WIFSTOPPED(status): 若为当前暂停子进程, 则为真. 此时可执行 WSTOPSIG(status): 取使子进程暂停的信号编号
*/
if (WIFSIGNALED(status)) {
// child异常退出
if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
tst_res(TPASS, "victim signalled: (%d) %s", SIGKILL, tst_strsig(SIGKILL));
} else {
tst_res(TFAIL, "victim signalled: (%d) %s", WTERMSIG(status), tst_strsig(WTERMSIG(status)));
}
} else if (WIFEXITED(status)) {
// child正常退出
if (WEXITSTATUS(status) == retcode) {
tst_res(TPASS, "victim retcode: (%d) %s", retcode, strerror(retcode));
} else {
tst_res(TFAIL, "victim unexpectedly ended with " "retcode: %d, expected: %d", WEXITSTATUS(status), retcode);
}
} else {
tst_res(TFAIL, "victim unexpectedly ended");
}
}
child_alloc()
static void child_alloc(int testcase, int lite, int threads)
{
int i;
pthread_t *th;
if (lite) {
int ret = alloc_mem(TESTMEM * 2 + MB, testcase);
exit(ret);
}
// 申请保存线程标识符的空间
th = malloc(sizeof(pthread_t) * threads);
if (!th) {
tst_res(TINFO | TERRNO, "malloc");
goto out;
}
for (i = 0; i < threads; i++) {
// 开辟线程,每个线程均运行child_alloc_thread(),利用mmap()进行映射
TEST(pthread_create(&th[i], NULL, child_alloc_thread, (void *)((long)testcase)));
if (TST_RET) {
tst_res(TINFO | TRERRNO, "pthread_create");
/*
* Keep going if thread other than first fails to
* spawn due to lack of resources.
*/
if (i == 0 || TST_RET != EAGAIN)
goto out;
}
}
/* wait for one of threads to exit whole process */
while (1)
sleep(1);
out:
// 退出程序,结束其所在进程
exit(1);
}
*child_alloc_thread():
static void *child_alloc_thread(void *args)
{
int ret = 0;
/* keep allocating until there's an error */
while (!ret)
// 循环申请,直至返回错误
ret = alloc_mem(LENGTH, (long)args); // #define LENGTH (3UL<<30)
exit(ret);
}
alloc_mem():
static int alloc_mem(long int length, int testcase)
{
char *s;
long i, pagesz = getpagesize(); // 读取页内存大小
int loop = 10;
tst_res(TINFO, "thread (%lx), allocating %ld bytes.", (unsigned long) pthread_self(), length);
// 利用mmap进行内存映射,映射length页
s = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (s == MAP_FAILED)
return errno;
if (testcase == MLOCK) {
while (mlock(s, length) == -1 && loop > 0) {
if (EAGAIN != errno)
return errno;
usleep(300000);
loop--;
}
}
#ifdef HAVE_DECL_MADV_MERGEABLE
if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
return errno;
#endif
// 对虚拟内存进行写操作,以申请实际的物理内存
for (i = 0; i < length; i += pagesz)
s[i] = '\a';
return 0;
}
1.3综述
结合对1.2小节各个函数的分析情况,可以得出第一组测试是为了测试在"overcommit_memory=2"的情况下,不断申请物理内存是否会造成OOM机制启动,只有进程在提示ENOMEM这一标准错误后,测试才认为是PASS,否则就是失败!因为”overcommit_memory=2“时,内存管理策略是“When this flag is 2, the kernel uses a "never overcommit" policy that attempts to prevent any overcommit of memory.”。
尾言
温故知新,岁岁常新!:) 因博主水平能力有限,如果有大佬在阅读过程种发现其中的缪误,希望可以不吝赐教,3Q。