Home | History | Annotate | Download | only in madvise
      1 /*
      2  * Copyright (c) 2016 Red Hat, Inc.
      3  *
      4  * This program is free software: you can redistribute it and/or modify
      5  * it under the terms of the GNU General Public License as published by
      6  * the Free Software Foundation, either version 3 of the License, or
      7  * (at your option) any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful,
     10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  * GNU General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16  */
     17 
     18 /*
     19  * DESCRIPTION
     20  *
     21  *   Page fault occurs in spite that madvise(WILLNEED) system call is called
     22  *   to prefetch the page. This issue is reproduced by running a program
     23  *   which sequentially accesses to a shared memory and calls madvise(WILLNEED)
     24  *   to the next page on a page fault.
     25  *
     26  *   This bug is present in all RHEL7 versions. It looks like this was fixed in
     27  *   mainline kernel > v3.15 by the following patch:
     28  *
     29  *   commit 55231e5c898c5c03c14194001e349f40f59bd300
     30  *   Author: Johannes Weiner <hannes (at) cmpxchg.org>
     31  *   Date:   Thu May 22 11:54:17 2014 -0700
     32  *
     33  *       mm: madvise: fix MADV_WILLNEED on shmem swapouts
     34  */
     35 
     36 #include <errno.h>
     37 #include <stdio.h>
     38 #include <sys/mount.h>
     39 #include <sys/sysinfo.h>
     40 #include "tst_test.h"
     41 
     42 #define CHUNK_SZ (400*1024*1024L)
     43 #define CHUNK_PAGES (CHUNK_SZ / pg_sz)
     44 #define PASS_THRESHOLD (CHUNK_SZ / 4)
     45 
     46 #define MNT_NAME "memory"
     47 #define GROUP_NAME "madvise06"
     48 
     49 static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches";
     50 static int pg_sz;
     51 
     52 static void check_path(const char *path)
     53 {
     54 	if (access(path, R_OK | W_OK))
     55 		tst_brk(TCONF, "file needed: %s\n", path);
     56 }
     57 
     58 static void setup(void)
     59 {
     60 	struct sysinfo sys_buf_start;
     61 
     62 	pg_sz = getpagesize();
     63 
     64 	check_path(drop_caches_fname);
     65 	tst_res(TINFO, "dropping caches");
     66 	sync();
     67 	SAFE_FILE_PRINTF(drop_caches_fname, "3");
     68 
     69 	sysinfo(&sys_buf_start);
     70 	if (sys_buf_start.freeram < 2 * CHUNK_SZ) {
     71 		tst_brk(TCONF, "System RAM is too small (%li bytes needed)",
     72 			2 * CHUNK_SZ);
     73 	}
     74 	if (sys_buf_start.freeswap < 2 * CHUNK_SZ) {
     75 		tst_brk(TCONF, "System swap is too small (%li bytes needed)",
     76 			2 * CHUNK_SZ);
     77 	}
     78 
     79 	SAFE_MKDIR(MNT_NAME, 0700);
     80 	if (mount("memory", MNT_NAME, "cgroup", 0, "memory") == -1) {
     81 		if (errno == ENODEV || errno == ENOENT)
     82 			tst_brk(TCONF, "memory cgroup needed");
     83 	}
     84 	SAFE_MKDIR(MNT_NAME"/"GROUP_NAME, 0700);
     85 
     86 	check_path("/proc/self/oom_score_adj");
     87 	check_path(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes");
     88 	check_path(MNT_NAME"/"GROUP_NAME"/memory.swappiness");
     89 	check_path(MNT_NAME"/"GROUP_NAME"/tasks");
     90 
     91 	SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000);
     92 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes", "%ld\n",
     93 		PASS_THRESHOLD);
     94 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.swappiness", "60");
     95 	SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/tasks", "%d\n", getpid());
     96 }
     97 
     98 static void cleanup(void)
     99 {
    100 	if (!access(MNT_NAME"/tasks", F_OK)) {
    101 		SAFE_FILE_PRINTF(MNT_NAME"/tasks", "%d\n", getpid());
    102 		SAFE_RMDIR(MNT_NAME"/"GROUP_NAME);
    103 		SAFE_UMOUNT(MNT_NAME);
    104 	}
    105 }
    106 
    107 static void dirty_pages(char *ptr, long size)
    108 {
    109 	long i;
    110 	long pages = size / pg_sz;
    111 
    112 	for (i = 0; i < pages; i++)
    113 		ptr[i * pg_sz] = 'x';
    114 }
    115 
    116 static int get_page_fault_num(void)
    117 {
    118 	int pg;
    119 
    120 	SAFE_FILE_SCANF("/proc/self/stat",
    121 			"%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d",
    122 			&pg);
    123 	return pg;
    124 }
    125 
    126 static void test_advice_willneed(void)
    127 {
    128 	int loops = 50;
    129 	char *target;
    130 	long swapcached_start, swapcached;
    131 	int page_fault_num_1, page_fault_num_2;
    132 
    133 	target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE,
    134 			MAP_SHARED | MAP_ANONYMOUS,
    135 			-1, 0);
    136 	dirty_pages(target, CHUNK_SZ);
    137 
    138 	SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
    139 		&swapcached_start);
    140 	tst_res(TINFO, "SwapCached (before madvise): %ld", swapcached_start);
    141 
    142 	TEST(madvise(target, CHUNK_SZ, MADV_WILLNEED));
    143 	if (TST_RET == -1)
    144 		tst_brk(TBROK | TERRNO, "madvise failed");
    145 
    146 	do {
    147 		loops--;
    148 		usleep(100000);
    149 		SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld",
    150 			&swapcached);
    151 	} while (swapcached < swapcached_start + PASS_THRESHOLD / 1024
    152 		&& loops > 0);
    153 
    154 	tst_res(TINFO, "SwapCached (after madvise): %ld", swapcached);
    155 	if (swapcached > swapcached_start + PASS_THRESHOLD / 1024) {
    156 		tst_res(TPASS, "Regression test pass");
    157 		SAFE_MUNMAP(target, CHUNK_SZ);
    158 		return;
    159 	}
    160 
    161 	/*
    162 	 * We may have hit a bug or we just have slow I/O,
    163 	 * try accessing first page.
    164 	 */
    165 	page_fault_num_1 = get_page_fault_num();
    166 	tst_res(TINFO, "PageFault(madvice / no mem access): %d",
    167 			page_fault_num_1);
    168 	target[0] = 'a';
    169 	page_fault_num_2 = get_page_fault_num();
    170 	tst_res(TINFO, "PageFault(madvice / mem access): %d",
    171 			page_fault_num_2);
    172 
    173 	if (page_fault_num_1 != page_fault_num_2)
    174 		tst_res(TFAIL, "Bug has been reproduced");
    175 	else
    176 		tst_res(TPASS, "Regression test pass");
    177 
    178 	SAFE_MUNMAP(target, CHUNK_SZ);
    179 }
    180 
    181 static struct tst_test test = {
    182 	.test_all = test_advice_willneed,
    183 	.setup = setup,
    184 	.cleanup = cleanup,
    185 	.min_kver = "3.10.0",
    186 	.needs_tmpdir = 1,
    187 	.needs_root = 1,
    188 };
    189