1 /* 2 * Copyright (c) 2016 Red Hat, Inc. 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18 /* 19 * DESCRIPTION 20 * 21 * Page fault occurs in spite that madvise(WILLNEED) system call is called 22 * to prefetch the page. This issue is reproduced by running a program 23 * which sequentially accesses to a shared memory and calls madvise(WILLNEED) 24 * to the next page on a page fault. 25 * 26 * This bug is present in all RHEL7 versions. It looks like this was fixed in 27 * mainline kernel > v3.15 by the following patch: 28 * 29 * commit 55231e5c898c5c03c14194001e349f40f59bd300 30 * Author: Johannes Weiner <hannes (at) cmpxchg.org> 31 * Date: Thu May 22 11:54:17 2014 -0700 32 * 33 * mm: madvise: fix MADV_WILLNEED on shmem swapouts 34 */ 35 36 #include <errno.h> 37 #include <stdio.h> 38 #include <sys/mount.h> 39 #include <sys/sysinfo.h> 40 #include "tst_test.h" 41 42 #define CHUNK_SZ (400*1024*1024L) 43 #define CHUNK_PAGES (CHUNK_SZ / pg_sz) 44 #define PASS_THRESHOLD (CHUNK_SZ / 4) 45 46 #define MNT_NAME "memory" 47 #define GROUP_NAME "madvise06" 48 49 static const char drop_caches_fname[] = "/proc/sys/vm/drop_caches"; 50 static int pg_sz; 51 52 static void check_path(const char *path) 53 { 54 if (access(path, R_OK | W_OK)) 55 tst_brk(TCONF, "file needed: %s\n", path); 56 } 57 58 static void setup(void) 59 { 60 struct sysinfo sys_buf_start; 61 62 pg_sz = getpagesize(); 63 64 check_path(drop_caches_fname); 65 tst_res(TINFO, "dropping caches"); 66 sync(); 67 SAFE_FILE_PRINTF(drop_caches_fname, "3"); 68 69 sysinfo(&sys_buf_start); 70 if (sys_buf_start.freeram < 2 * CHUNK_SZ) { 71 tst_brk(TCONF, "System RAM is too small (%li bytes needed)", 72 2 * CHUNK_SZ); 73 } 74 if (sys_buf_start.freeswap < 2 * CHUNK_SZ) { 75 tst_brk(TCONF, "System swap is too small (%li bytes needed)", 76 2 * CHUNK_SZ); 77 } 78 79 SAFE_MKDIR(MNT_NAME, 0700); 80 if (mount("memory", MNT_NAME, "cgroup", 0, "memory") == -1) { 81 if (errno == ENODEV || errno == ENOENT) 82 tst_brk(TCONF, "memory cgroup needed"); 83 } 84 SAFE_MKDIR(MNT_NAME"/"GROUP_NAME, 0700); 85 86 check_path("/proc/self/oom_score_adj"); 87 check_path(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes"); 88 check_path(MNT_NAME"/"GROUP_NAME"/memory.swappiness"); 89 check_path(MNT_NAME"/"GROUP_NAME"/tasks"); 90 91 SAFE_FILE_PRINTF("/proc/self/oom_score_adj", "%d", -1000); 92 SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.limit_in_bytes", "%ld\n", 93 PASS_THRESHOLD); 94 SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/memory.swappiness", "60"); 95 SAFE_FILE_PRINTF(MNT_NAME"/"GROUP_NAME"/tasks", "%d\n", getpid()); 96 } 97 98 static void cleanup(void) 99 { 100 if (!access(MNT_NAME"/tasks", F_OK)) { 101 SAFE_FILE_PRINTF(MNT_NAME"/tasks", "%d\n", getpid()); 102 SAFE_RMDIR(MNT_NAME"/"GROUP_NAME); 103 SAFE_UMOUNT(MNT_NAME); 104 } 105 } 106 107 static void dirty_pages(char *ptr, long size) 108 { 109 long i; 110 long pages = size / pg_sz; 111 112 for (i = 0; i < pages; i++) 113 ptr[i * pg_sz] = 'x'; 114 } 115 116 static int get_page_fault_num(void) 117 { 118 int pg; 119 120 SAFE_FILE_SCANF("/proc/self/stat", 121 "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %d", 122 &pg); 123 return pg; 124 } 125 126 static void test_advice_willneed(void) 127 { 128 int loops = 50; 129 char *target; 130 long swapcached_start, swapcached; 131 int page_fault_num_1, page_fault_num_2; 132 133 target = SAFE_MMAP(NULL, CHUNK_SZ, PROT_READ | PROT_WRITE, 134 MAP_SHARED | MAP_ANONYMOUS, 135 -1, 0); 136 dirty_pages(target, CHUNK_SZ); 137 138 SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", 139 &swapcached_start); 140 tst_res(TINFO, "SwapCached (before madvise): %ld", swapcached_start); 141 142 TEST(madvise(target, CHUNK_SZ, MADV_WILLNEED)); 143 if (TST_RET == -1) 144 tst_brk(TBROK | TERRNO, "madvise failed"); 145 146 do { 147 loops--; 148 usleep(100000); 149 SAFE_FILE_LINES_SCANF("/proc/meminfo", "SwapCached: %ld", 150 &swapcached); 151 } while (swapcached < swapcached_start + PASS_THRESHOLD / 1024 152 && loops > 0); 153 154 tst_res(TINFO, "SwapCached (after madvise): %ld", swapcached); 155 if (swapcached > swapcached_start + PASS_THRESHOLD / 1024) { 156 tst_res(TPASS, "Regression test pass"); 157 SAFE_MUNMAP(target, CHUNK_SZ); 158 return; 159 } 160 161 /* 162 * We may have hit a bug or we just have slow I/O, 163 * try accessing first page. 164 */ 165 page_fault_num_1 = get_page_fault_num(); 166 tst_res(TINFO, "PageFault(madvice / no mem access): %d", 167 page_fault_num_1); 168 target[0] = 'a'; 169 page_fault_num_2 = get_page_fault_num(); 170 tst_res(TINFO, "PageFault(madvice / mem access): %d", 171 page_fault_num_2); 172 173 if (page_fault_num_1 != page_fault_num_2) 174 tst_res(TFAIL, "Bug has been reproduced"); 175 else 176 tst_res(TPASS, "Regression test pass"); 177 178 SAFE_MUNMAP(target, CHUNK_SZ); 179 } 180 181 static struct tst_test test = { 182 .test_all = test_advice_willneed, 183 .setup = setup, 184 .cleanup = cleanup, 185 .min_kver = "3.10.0", 186 .needs_tmpdir = 1, 187 .needs_root = 1, 188 }; 189