Home | History | Annotate | Download | only in migrate_pages
      1 /*
      2  * Copyright (C) 2012 Linux Test Project, Inc.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of version 2 of the GNU General Public
      6  * License as published by the Free Software Foundation.
      7  *
      8  * This program is distributed in the hope that it would be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     11  *
     12  * Further, this software is distributed without any warranty that it
     13  * is free of the rightful claim of any third person regarding
     14  * infringement or the like.  Any license provided herein, whether
     15  * implied or otherwise, applies only to this software file.  Patent
     16  * licenses, if any, provided herein do not apply to combinations of
     17  * this program with other software, or any other product whatsoever.
     18  *
     19  * You should have received a copy of the GNU General Public License
     20  * along with this program; if not, write the Free Software
     21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     22  * 02110-1301, USA.
     23  */
     24 
     25 /*
     26  * use migrate_pages() and check that address is on correct node
     27  * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
     28  * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
     29  * 3. process A can migrate shared mem only with CAP_SYS_NICE
     30  * 4. process A can migrate non-shared mem in process B with same effective uid
     31  * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
     32  */
     33 #include <sys/types.h>
     34 #include <sys/syscall.h>
     35 #include <sys/wait.h>
     36 #include <sys/mman.h>
     37 #include <errno.h>
     38 #if HAVE_NUMA_H
     39 #include <numa.h>
     40 #endif
     41 #if HAVE_NUMAIF_H
     42 #include <numaif.h>
     43 #endif
     44 #include <stdio.h>
     45 #include <stdlib.h>
     46 #include <unistd.h>
     47 #include <pwd.h>
     48 #include "config.h"
     49 #include "test.h"
     50 #include "safe_macros.h"
     51 #include "linux_syscall_numbers.h"
     52 #include "numa_helper.h"
     53 #include "migrate_pages_common.h"
     54 
     55 /*
     56  * This is an estimated minimum of free mem required to migrate this
     57  * process to another node as migrate_pages will fail if there is not
     58  * enough free space on node. While running this test on x86_64
     59  * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
     60  * architecture with largest (non-huge) page size (16k), this limit
     61  * is set to 2048*16k == 32M.
     62  */
     63 #define NODE_MIN_FREEMEM (32*1024*1024)
     64 
     65 char *TCID = "migrate_pages02";
     66 int TST_TOTAL = 1;
     67 
     68 #if defined(__NR_migrate_pages) && HAVE_NUMA_H && HAVE_NUMAIF_H
     69 static const char nobody_uid[] = "nobody";
     70 static struct passwd *ltpuser;
     71 static int *nodes, nodeA, nodeB;
     72 static int num_nodes;
     73 
     74 static void setup(void);
     75 static void cleanup(void);
     76 
     77 option_t options[] = {
     78 	{NULL, NULL, NULL}
     79 };
     80 
     81 static void print_mem_stats(pid_t pid, int node)
     82 {
     83 	char s[64];
     84 	long long node_size, freep;
     85 
     86 	if (pid == 0)
     87 		pid = getpid();
     88 
     89 	tst_resm(TINFO, "mem_stats pid: %d, node: %d", pid, node);
     90 
     91 	/* dump pid's VM info */
     92 	sprintf(s, "cat /proc/%d/status", pid);
     93 	system(s);
     94 	sprintf(s, "cat /proc/%d/numa_maps", pid);
     95 	system(s);
     96 
     97 	/* dump node free mem */
     98 	node_size = numa_node_size64(node, &freep);
     99 	tst_resm(TINFO, "Node id: %d, size: %lld, free: %lld",
    100 		 node, node_size, freep);
    101 }
    102 
    103 static int migrate_to_node(pid_t pid, int node)
    104 {
    105 	unsigned long nodemask_size, max_node;
    106 	unsigned long *old_nodes, *new_nodes;
    107 	int i;
    108 
    109 	tst_resm(TINFO, "pid(%d) migrate pid %d to node -> %d",
    110 		 getpid(), pid, node);
    111 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
    112 	nodemask_size = max_node / 8;
    113 	old_nodes = SAFE_MALLOC(NULL, nodemask_size);
    114 	new_nodes = SAFE_MALLOC(NULL, nodemask_size);
    115 
    116 	memset(old_nodes, 0, nodemask_size);
    117 	memset(new_nodes, 0, nodemask_size);
    118 	for (i = 0; i < num_nodes; i++)
    119 		set_bit(old_nodes, nodes[i], 1);
    120 	set_bit(new_nodes, node, 1);
    121 
    122 	TEST(ltp_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
    123 		new_nodes));
    124 	if (TEST_RETURN != 0) {
    125 		if (TEST_RETURN < 0)
    126 			tst_resm(TFAIL | TERRNO, "migrate_pages failed "
    127 				 "ret: %ld, ", TEST_RETURN);
    128 		else
    129 			tst_resm(TWARN, "migrate_pages could not migrate all "
    130 				 "pages, not migrated: %ld", TEST_RETURN);
    131 		print_mem_stats(pid, node);
    132 	}
    133 	free(old_nodes);
    134 	free(new_nodes);
    135 	return TEST_RETURN;
    136 }
    137 
    138 static int addr_on_node(void *addr)
    139 {
    140 	int node;
    141 	int ret;
    142 
    143 	ret = ltp_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
    144 		      (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
    145 	if (ret == -1) {
    146 		tst_resm(TBROK | TERRNO, "error getting memory policy "
    147 			 "for page %p", addr);
    148 	}
    149 	return node;
    150 }
    151 
    152 static int check_addr_on_node(void *addr, int exp_node)
    153 {
    154 	int node;
    155 
    156 	node = addr_on_node(addr);
    157 	if (node == exp_node) {
    158 		tst_resm(TPASS, "pid(%d) addr %p is on expected node: %d",
    159 			 getpid(), addr, exp_node);
    160 		return 0;
    161 	} else {
    162 		tst_resm(TFAIL, "pid(%d) addr %p not on expected node: %d "
    163 			 ", expected %d", getpid(), addr, node, exp_node);
    164 		print_mem_stats(0, exp_node);
    165 		return 1;
    166 	}
    167 }
    168 
    169 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
    170 {
    171 	char *testp, *testp2;
    172 	int ret, status;
    173 	pid_t child;
    174 
    175 	/* parent can migrate its non-shared memory */
    176 	tst_resm(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
    177 	testp = SAFE_MALLOC(NULL, getpagesize());
    178 	testp[0] = 0;
    179 	tst_resm(TINFO, "private anonymous: %p", testp);
    180 	migrate_to_node(0, node2);
    181 	check_addr_on_node(testp, node2);
    182 	migrate_to_node(0, node1);
    183 	check_addr_on_node(testp, node1);
    184 	free(testp);
    185 
    186 	/* parent can migrate shared memory with CAP_SYS_NICE */
    187 	testp2 = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
    188 		      MAP_ANONYMOUS | MAP_SHARED, 0, 0);
    189 	if (testp2 == MAP_FAILED)
    190 		tst_brkm(TBROK | TERRNO, cleanup, "mmap failed");
    191 	testp2[0] = 1;
    192 	tst_resm(TINFO, "shared anonymous: %p", testp2);
    193 	migrate_to_node(0, node2);
    194 	check_addr_on_node(testp2, node2);
    195 
    196 	/* shared mem is on node2, try to migrate in child to node1 */
    197 	fflush(stdout);
    198 	child = fork();
    199 	switch (child) {
    200 	case -1:
    201 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
    202 		break;
    203 	case 0:
    204 		tst_resm(TINFO, "child shared anonymous, cap_sys_nice: %d",
    205 			 cap_sys_nice);
    206 		testp = SAFE_MALLOC(NULL, getpagesize());
    207 		testp[0] = 1;
    208 		testp2[0] = 1;
    209 		if (!cap_sys_nice)
    210 			if (seteuid(ltpuser->pw_uid) == -1)
    211 				tst_brkm(TBROK | TERRNO, NULL,
    212 					 "seteuid failed");
    213 
    214 		migrate_to_node(0, node1);
    215 		/* child can migrate non-shared memory */
    216 		ret = check_addr_on_node(testp, node1);
    217 
    218 		free(testp);
    219 		munmap(testp2, getpagesize());
    220 		exit(ret);
    221 	default:
    222 		if (waitpid(child, &status, 0) == -1)
    223 			tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
    224 		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
    225 			tst_resm(TFAIL, "child returns %d", status);
    226 		if (cap_sys_nice)
    227 			/* child can migrate shared memory only
    228 			 * with CAP_SYS_NICE */
    229 			check_addr_on_node(testp2, node1);
    230 		else
    231 			check_addr_on_node(testp2, node2);
    232 		munmap(testp2, getpagesize());
    233 	}
    234 }
    235 
    236 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
    237 {
    238 	char *testp;
    239 	int status, ret, tmp;
    240 	pid_t child;
    241 	int child_ready[2];
    242 	int pages_migrated[2];
    243 
    244 	/* setup pipes to synchronize child/parent */
    245 	if (pipe(child_ready) == -1)
    246 		tst_resm(TBROK | TERRNO, "pipe #1 failed");
    247 	if (pipe(pages_migrated) == -1)
    248 		tst_resm(TBROK | TERRNO, "pipe #2 failed");
    249 
    250 	tst_resm(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
    251 
    252 	fflush(stdout);
    253 	child = fork();
    254 	switch (child) {
    255 	case -1:
    256 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
    257 		break;
    258 	case 0:
    259 		close(child_ready[0]);
    260 		close(pages_migrated[1]);
    261 
    262 		testp = SAFE_MALLOC(NULL, getpagesize());
    263 		testp[0] = 0;
    264 
    265 		/* make sure we are on node1 */
    266 		migrate_to_node(0, node1);
    267 		check_addr_on_node(testp, node1);
    268 
    269 		if (seteuid(ltpuser->pw_uid) == -1)
    270 			tst_brkm(TBROK | TERRNO, NULL, "seteuid failed");
    271 
    272 		/* signal parent it's OK to migrate child and wait */
    273 		if (write(child_ready[1], &tmp, 1) != 1)
    274 			tst_brkm(TBROK | TERRNO, NULL, "write #1 failed");
    275 		if (read(pages_migrated[0], &tmp, 1) != 1)
    276 			tst_brkm(TBROK | TERRNO, NULL, "read #1 failed");
    277 
    278 		/* parent can migrate child process with same euid */
    279 		/* parent can migrate child process with CAP_SYS_NICE */
    280 		ret = check_addr_on_node(testp, node2);
    281 
    282 		free(testp);
    283 		close(child_ready[1]);
    284 		close(pages_migrated[0]);
    285 		exit(ret);
    286 	default:
    287 		close(child_ready[1]);
    288 		close(pages_migrated[0]);
    289 
    290 		if (!cap_sys_nice)
    291 			if (seteuid(ltpuser->pw_uid) == -1)
    292 				tst_brkm(TBROK | TERRNO, NULL,
    293 					 "seteuid failed");
    294 
    295 		/* wait until child is ready on node1, then migrate and
    296 		 * signal to check current node */
    297 		if (read(child_ready[0], &tmp, 1) != 1)
    298 			tst_brkm(TBROK | TERRNO, NULL, "read #2 failed");
    299 		migrate_to_node(child, node2);
    300 		if (write(pages_migrated[1], &tmp, 1) != 1)
    301 			tst_brkm(TBROK | TERRNO, NULL, "write #2 failed");
    302 
    303 		if (waitpid(child, &status, 0) == -1)
    304 			tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
    305 		if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
    306 			tst_resm(TFAIL, "child returns %d", status);
    307 		close(child_ready[0]);
    308 		close(pages_migrated[1]);
    309 
    310 		/* reset euid, so this testcase can be used in loop */
    311 		if (!cap_sys_nice)
    312 			if (seteuid(0) == -1)
    313 				tst_brkm(TBROK | TERRNO, NULL,
    314 					 "seteuid failed");
    315 	}
    316 }
    317 
    318 int main(int argc, char *argv[])
    319 {
    320 	int lc;
    321 
    322 	tst_parse_opts(argc, argv, options, NULL);
    323 
    324 	setup();
    325 	for (lc = 0; TEST_LOOPING(lc); lc++) {
    326 		tst_count = 0;
    327 		test_migrate_current_process(nodeA, nodeB, 1);
    328 		test_migrate_current_process(nodeA, nodeB, 0);
    329 		test_migrate_other_process(nodeA, nodeB, 1);
    330 		test_migrate_other_process(nodeA, nodeB, 0);
    331 	}
    332 	cleanup();
    333 	tst_exit();
    334 }
    335 
    336 static void setup(void)
    337 {
    338 	int ret, i, j;
    339 	int pagesize = getpagesize();
    340 	void *p;
    341 
    342 	tst_require_root();
    343 	TEST(ltp_syscall(__NR_migrate_pages, 0, 0, NULL, NULL));
    344 
    345 	if (numa_available() == -1)
    346 		tst_brkm(TCONF, NULL, "NUMA not available");
    347 
    348 	ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
    349 	if (ret < 0)
    350 		tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes(): %d", ret);
    351 
    352 	if (num_nodes < 2)
    353 		tst_brkm(TCONF, NULL, "at least 2 allowed NUMA nodes"
    354 			 " are required");
    355 	else if (tst_kvercmp(2, 6, 18) < 0)
    356 		tst_brkm(TCONF, NULL, "2.6.18 or greater kernel required");
    357 
    358 	/*
    359 	 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
    360 	 * The reason is that:
    361 	 * 1. migrate_pages() is expected to succeed
    362 	 * 2. this test avoids hitting:
    363 	 *    Bug 870326 - migrate_pages() reports success, but pages are
    364 	 *                 not moved to desired node
    365 	 *    https://bugzilla.redhat.com/show_bug.cgi?id=870326
    366 	 */
    367 	nodeA = nodeB = -1;
    368 	for (i = 0; i < num_nodes; i++) {
    369 		p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
    370 		if (p == NULL)
    371 			break;
    372 		memset(p, 0xff, NODE_MIN_FREEMEM);
    373 
    374 		j = 0;
    375 		while (j < NODE_MIN_FREEMEM) {
    376 			if (addr_on_node(p + j) != nodes[i])
    377 				break;
    378 			j += pagesize;
    379 		}
    380 		numa_free(p, NODE_MIN_FREEMEM);
    381 
    382 		if (j >= NODE_MIN_FREEMEM) {
    383 			if (nodeA == -1)
    384 				nodeA = nodes[i];
    385 			else if (nodeB == -1)
    386 				nodeB = nodes[i];
    387 			else
    388 				break;
    389 		}
    390 	}
    391 
    392 	if (nodeA == -1 || nodeB == -1)
    393 		tst_brkm(TCONF, NULL, "at least 2 NUMA nodes with "
    394 			 "free mem > %d are needed", NODE_MIN_FREEMEM);
    395 	tst_resm(TINFO, "Using nodes: %d %d", nodeA, nodeB);
    396 
    397 	ltpuser = getpwnam(nobody_uid);
    398 	if (ltpuser == NULL)
    399 		tst_brkm(TBROK | TERRNO, NULL, "getpwnam failed");
    400 
    401 	TEST_PAUSE;
    402 }
    403 
    404 static void cleanup(void)
    405 {
    406 	free(nodes);
    407 }
    408 
    409 #else /* __NR_migrate_pages */
    410 int main(void)
    411 {
    412 	tst_brkm(TCONF, NULL, "System doesn't support __NR_migrate_pages"
    413 		 " or libnuma is not available");
    414 }
    415 #endif
    416