1 /* 2 * Copyright (C) 2012 Linux Test Project, Inc. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it would be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 11 * 12 * Further, this software is distributed without any warranty that it 13 * is free of the rightful claim of any third person regarding 14 * infringement or the like. Any license provided herein, whether 15 * implied or otherwise, applies only to this software file. Patent 16 * licenses, if any, provided herein do not apply to combinations of 17 * this program with other software, or any other product whatsoever. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write the Free Software 21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 22 * 02110-1301, USA. 23 */ 24 25 /* 26 * use migrate_pages() and check that address is on correct node 27 * 1. process A can migrate its non-shared mem with CAP_SYS_NICE 28 * 2. process A can migrate its non-shared mem without CAP_SYS_NICE 29 * 3. process A can migrate shared mem only with CAP_SYS_NICE 30 * 4. process A can migrate non-shared mem in process B with same effective uid 31 * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE 32 */ 33 #include <sys/types.h> 34 #include <sys/syscall.h> 35 #include <sys/wait.h> 36 #include <sys/mman.h> 37 #include <sys/prctl.h> 38 #include <errno.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <unistd.h> 42 #include <pwd.h> 43 44 #include "tst_test.h" 45 #include "lapi/syscalls.h" 46 #include "numa_helper.h" 47 #include "migrate_pages_common.h" 48 49 /* 50 * This is an estimated minimum of free mem required to migrate this 51 * process to another node as migrate_pages will fail if there is not 52 * enough free space on node. While running this test on x86_64 53 * it used ~2048 pages (total VM, not just RSS). Considering ia64 as 54 * architecture with largest (non-huge) page size (16k), this limit 55 * is set to 2048*16k == 32M. 56 */ 57 #define NODE_MIN_FREEMEM (32*1024*1024) 58 59 #ifdef HAVE_NUMA_V2 60 61 static const char nobody_uid[] = "nobody"; 62 static struct passwd *ltpuser; 63 static int *nodes, nodeA, nodeB; 64 static int num_nodes; 65 66 static const char * const save_restore[] = { 67 "?/proc/sys/kernel/numa_balancing", 68 NULL, 69 }; 70 71 static void print_mem_stats(pid_t pid, int node) 72 { 73 char s[64]; 74 long long node_size, freep; 75 76 if (pid == 0) 77 pid = getpid(); 78 79 tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node); 80 81 /* dump pid's VM info */ 82 sprintf(s, "cat /proc/%d/status", pid); 83 system(s); 84 sprintf(s, "cat /proc/%d/numa_maps", pid); 85 system(s); 86 87 /* dump node free mem */ 88 node_size = numa_node_size64(node, &freep); 89 tst_res(TINFO, "Node id: %d, size: %lld, free: %lld", 90 node, node_size, freep); 91 } 92 93 static int migrate_to_node(pid_t pid, int node) 94 { 95 unsigned long nodemask_size, max_node; 96 unsigned long *old_nodes, *new_nodes; 97 int i; 98 99 tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d", 100 getpid(), pid, node); 101 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); 102 nodemask_size = max_node / 8; 103 old_nodes = SAFE_MALLOC(nodemask_size); 104 new_nodes = SAFE_MALLOC(nodemask_size); 105 106 memset(old_nodes, 0, nodemask_size); 107 memset(new_nodes, 0, nodemask_size); 108 for (i = 0; i < num_nodes; i++) 109 set_bit(old_nodes, nodes[i], 1); 110 set_bit(new_nodes, node, 1); 111 112 TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes, 113 new_nodes)); 114 if (TST_RET != 0) { 115 if (TST_RET < 0) { 116 tst_res(TFAIL | TERRNO, "migrate_pages failed " 117 "ret: %ld, ", TST_RET); 118 print_mem_stats(pid, node); 119 } else { 120 tst_res(TINFO, "migrate_pages could not migrate all " 121 "pages, not migrated: %ld", TST_RET); 122 } 123 } 124 free(old_nodes); 125 free(new_nodes); 126 return TST_RET; 127 } 128 129 static int addr_on_node(void *addr) 130 { 131 int node; 132 int ret; 133 134 ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0, 135 (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR); 136 if (ret == -1) { 137 tst_res(TBROK | TERRNO, "error getting memory policy " 138 "for page %p", addr); 139 } 140 return node; 141 } 142 143 static int check_addr_on_node(void *addr, int exp_node) 144 { 145 int node; 146 147 node = addr_on_node(addr); 148 if (node == exp_node) { 149 tst_res(TPASS, "pid(%d) addr %p is on expected node: %d", 150 getpid(), addr, exp_node); 151 return TPASS; 152 } else { 153 tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d " 154 ", expected %d", getpid(), addr, node, exp_node); 155 print_mem_stats(0, exp_node); 156 return TFAIL; 157 } 158 } 159 160 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice) 161 { 162 char *private, *shared; 163 int ret; 164 pid_t child; 165 166 /* parent can migrate its non-shared memory */ 167 tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice); 168 private = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 169 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 170 private[0] = 0; 171 tst_res(TINFO, "private anonymous: %p", private); 172 173 migrate_to_node(0, node2); 174 check_addr_on_node(private, node2); 175 migrate_to_node(0, node1); 176 check_addr_on_node(private, node1); 177 SAFE_MUNMAP(private, getpagesize()); 178 179 /* parent can migrate shared memory with CAP_SYS_NICE */ 180 shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE, 181 MAP_ANONYMOUS | MAP_SHARED, 0, 0); 182 shared[0] = 1; 183 tst_res(TINFO, "shared anonymous: %p", shared); 184 migrate_to_node(0, node2); 185 check_addr_on_node(shared, node2); 186 187 /* shared mem is on node2, try to migrate in child to node1 */ 188 fflush(stdout); 189 child = SAFE_FORK(); 190 if (child == 0) { 191 tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d", 192 cap_sys_nice); 193 private = SAFE_MMAP(NULL, getpagesize(), 194 PROT_READ | PROT_WRITE, 195 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 196 private[0] = 1; 197 shared[0] = 1; 198 if (!cap_sys_nice) 199 SAFE_SETEUID(ltpuser->pw_uid); 200 201 migrate_to_node(0, node1); 202 /* child can migrate non-shared memory */ 203 ret = check_addr_on_node(private, node1); 204 205 exit(ret); 206 } 207 208 SAFE_WAITPID(child, NULL, 0); 209 if (cap_sys_nice) 210 /* child can migrate shared memory only 211 * with CAP_SYS_NICE */ 212 check_addr_on_node(shared, node1); 213 else 214 check_addr_on_node(shared, node2); 215 SAFE_MUNMAP(shared, getpagesize()); 216 } 217 218 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice) 219 { 220 char *private; 221 int ret; 222 pid_t child1, child2; 223 224 tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice); 225 226 fflush(stdout); 227 child1 = SAFE_FORK(); 228 if (child1 == 0) { 229 private = SAFE_MMAP(NULL, getpagesize(), 230 PROT_READ | PROT_WRITE, 231 MAP_ANONYMOUS | MAP_PRIVATE, 0, 0); 232 private[0] = 0; 233 234 /* make sure we are on node1 */ 235 migrate_to_node(0, node1); 236 check_addr_on_node(private, node1); 237 238 SAFE_SETUID(ltpuser->pw_uid); 239 240 /* commit_creds() will clear dumpable, restore it */ 241 if (prctl(PR_SET_DUMPABLE, 1)) 242 tst_brk(TBROK | TERRNO, "prctl"); 243 244 /* signal child2 it's OK to migrate child1 and wait */ 245 TST_CHECKPOINT_WAKE(0); 246 TST_CHECKPOINT_WAIT(1); 247 248 /* child2 can migrate child1 process if it's privileged */ 249 /* child2 can migrate child1 process if it has same uid */ 250 ret = check_addr_on_node(private, node2); 251 252 exit(ret); 253 } 254 255 fflush(stdout); 256 child2 = SAFE_FORK(); 257 if (child2 == 0) { 258 if (!cap_sys_nice) 259 SAFE_SETUID(ltpuser->pw_uid); 260 261 /* wait until child1 is ready on node1, then migrate and 262 * signal to check current node */ 263 TST_CHECKPOINT_WAIT(0); 264 migrate_to_node(child1, node2); 265 TST_CHECKPOINT_WAKE(1); 266 267 exit(TPASS); 268 } 269 270 SAFE_WAITPID(child1, NULL, 0); 271 SAFE_WAITPID(child2, NULL, 0); 272 } 273 274 static void run(void) 275 { 276 test_migrate_current_process(nodeA, nodeB, 1); 277 test_migrate_current_process(nodeA, nodeB, 0); 278 test_migrate_other_process(nodeA, nodeB, 1); 279 test_migrate_other_process(nodeA, nodeB, 0); 280 } 281 282 static void setup(void) 283 { 284 int ret, i, j; 285 int pagesize = getpagesize(); 286 void *p; 287 288 tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL); 289 290 if (numa_available() == -1) 291 tst_brk(TCONF, "NUMA not available"); 292 293 ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes); 294 if (ret < 0) 295 tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret); 296 297 if (num_nodes < 2) 298 tst_brk(TCONF, "at least 2 allowed NUMA nodes" 299 " are required"); 300 else if (tst_kvercmp(2, 6, 18) < 0) 301 tst_brk(TCONF, "2.6.18 or greater kernel required"); 302 303 FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0"); 304 /* 305 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes 306 * The reason is that: 307 * 1. migrate_pages() is expected to succeed 308 * 2. this test avoids hitting: 309 * Bug 870326 - migrate_pages() reports success, but pages are 310 * not moved to desired node 311 * https://bugzilla.redhat.com/show_bug.cgi?id=870326 312 */ 313 nodeA = nodeB = -1; 314 for (i = 0; i < num_nodes; i++) { 315 p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]); 316 if (p == NULL) 317 break; 318 memset(p, 0xff, NODE_MIN_FREEMEM); 319 320 j = 0; 321 while (j < NODE_MIN_FREEMEM) { 322 if (addr_on_node(p + j) != nodes[i]) 323 break; 324 j += pagesize; 325 } 326 numa_free(p, NODE_MIN_FREEMEM); 327 328 if (j >= NODE_MIN_FREEMEM) { 329 if (nodeA == -1) 330 nodeA = nodes[i]; 331 else if (nodeB == -1) 332 nodeB = nodes[i]; 333 else 334 break; 335 } 336 } 337 338 if (nodeA == -1 || nodeB == -1) 339 tst_brk(TCONF, "at least 2 NUMA nodes with " 340 "free mem > %d are needed", NODE_MIN_FREEMEM); 341 tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB); 342 343 ltpuser = getpwnam(nobody_uid); 344 if (ltpuser == NULL) 345 tst_brk(TBROK | TERRNO, "getpwnam failed"); 346 } 347 348 static struct tst_test test = { 349 .needs_root = 1, 350 .needs_checkpoints = 1, 351 .forks_child = 1, 352 .test_all = run, 353 .setup = setup, 354 .save_restore = save_restore, 355 }; 356 #else 357 TST_TEST_TCONF(NUMA_ERROR_MSG); 358 #endif 359