Home | History | Annotate | Download | only in migrate_pages
      1 /*
      2  * Copyright (C) 2012 Linux Test Project, Inc.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of version 2 of the GNU General Public
      6  * License as published by the Free Software Foundation.
      7  *
      8  * This program is distributed in the hope that it would be useful,
      9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     11  *
     12  * Further, this software is distributed without any warranty that it
     13  * is free of the rightful claim of any third person regarding
     14  * infringement or the like.  Any license provided herein, whether
     15  * implied or otherwise, applies only to this software file.  Patent
     16  * licenses, if any, provided herein do not apply to combinations of
     17  * this program with other software, or any other product whatsoever.
     18  *
     19  * You should have received a copy of the GNU General Public License
     20  * along with this program; if not, write the Free Software
     21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
     22  * 02110-1301, USA.
     23  */
     24 
     25 /*
     26  * use migrate_pages() and check that address is on correct node
     27  * 1. process A can migrate its non-shared mem with CAP_SYS_NICE
     28  * 2. process A can migrate its non-shared mem without CAP_SYS_NICE
     29  * 3. process A can migrate shared mem only with CAP_SYS_NICE
     30  * 4. process A can migrate non-shared mem in process B with same effective uid
     31  * 5. process A can migrate non-shared mem in process B with CAP_SYS_NICE
     32  */
     33 #include <sys/types.h>
     34 #include <sys/syscall.h>
     35 #include <sys/wait.h>
     36 #include <sys/mman.h>
     37 #include <sys/prctl.h>
     38 #include <errno.h>
     39 #include <stdio.h>
     40 #include <stdlib.h>
     41 #include <unistd.h>
     42 #include <pwd.h>
     43 
     44 #include "tst_test.h"
     45 #include "lapi/syscalls.h"
     46 #include "numa_helper.h"
     47 #include "migrate_pages_common.h"
     48 
     49 /*
     50  * This is an estimated minimum of free mem required to migrate this
     51  * process to another node as migrate_pages will fail if there is not
     52  * enough free space on node. While running this test on x86_64
     53  * it used ~2048 pages (total VM, not just RSS). Considering ia64 as
     54  * architecture with largest (non-huge) page size (16k), this limit
     55  * is set to 2048*16k == 32M.
     56  */
     57 #define NODE_MIN_FREEMEM (32*1024*1024)
     58 
     59 #ifdef HAVE_NUMA_V2
     60 
     61 static const char nobody_uid[] = "nobody";
     62 static struct passwd *ltpuser;
     63 static int *nodes, nodeA, nodeB;
     64 static int num_nodes;
     65 
     66 static const char * const save_restore[] = {
     67 	"?/proc/sys/kernel/numa_balancing",
     68 	NULL,
     69 };
     70 
     71 static void print_mem_stats(pid_t pid, int node)
     72 {
     73 	char s[64];
     74 	long long node_size, freep;
     75 
     76 	if (pid == 0)
     77 		pid = getpid();
     78 
     79 	tst_res(TINFO, "mem_stats pid: %d, node: %d", pid, node);
     80 
     81 	/* dump pid's VM info */
     82 	sprintf(s, "cat /proc/%d/status", pid);
     83 	system(s);
     84 	sprintf(s, "cat /proc/%d/numa_maps", pid);
     85 	system(s);
     86 
     87 	/* dump node free mem */
     88 	node_size = numa_node_size64(node, &freep);
     89 	tst_res(TINFO, "Node id: %d, size: %lld, free: %lld",
     90 		 node, node_size, freep);
     91 }
     92 
     93 static int migrate_to_node(pid_t pid, int node)
     94 {
     95 	unsigned long nodemask_size, max_node;
     96 	unsigned long *old_nodes, *new_nodes;
     97 	int i;
     98 
     99 	tst_res(TINFO, "pid(%d) migrate pid %d to node -> %d",
    100 		 getpid(), pid, node);
    101 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
    102 	nodemask_size = max_node / 8;
    103 	old_nodes = SAFE_MALLOC(nodemask_size);
    104 	new_nodes = SAFE_MALLOC(nodemask_size);
    105 
    106 	memset(old_nodes, 0, nodemask_size);
    107 	memset(new_nodes, 0, nodemask_size);
    108 	for (i = 0; i < num_nodes; i++)
    109 		set_bit(old_nodes, nodes[i], 1);
    110 	set_bit(new_nodes, node, 1);
    111 
    112 	TEST(tst_syscall(__NR_migrate_pages, pid, max_node, old_nodes,
    113 		new_nodes));
    114 	if (TST_RET != 0) {
    115 		if (TST_RET < 0) {
    116 			tst_res(TFAIL | TERRNO, "migrate_pages failed "
    117 				 "ret: %ld, ", TST_RET);
    118 			print_mem_stats(pid, node);
    119 		} else {
    120 			tst_res(TINFO, "migrate_pages could not migrate all "
    121 				 "pages, not migrated: %ld", TST_RET);
    122 		}
    123 	}
    124 	free(old_nodes);
    125 	free(new_nodes);
    126 	return TST_RET;
    127 }
    128 
    129 static int addr_on_node(void *addr)
    130 {
    131 	int node;
    132 	int ret;
    133 
    134 	ret = tst_syscall(__NR_get_mempolicy, &node, NULL, (unsigned long)0,
    135 		      (unsigned long)addr, MPOL_F_NODE | MPOL_F_ADDR);
    136 	if (ret == -1) {
    137 		tst_res(TBROK | TERRNO, "error getting memory policy "
    138 			 "for page %p", addr);
    139 	}
    140 	return node;
    141 }
    142 
    143 static int check_addr_on_node(void *addr, int exp_node)
    144 {
    145 	int node;
    146 
    147 	node = addr_on_node(addr);
    148 	if (node == exp_node) {
    149 		tst_res(TPASS, "pid(%d) addr %p is on expected node: %d",
    150 			 getpid(), addr, exp_node);
    151 		return TPASS;
    152 	} else {
    153 		tst_res(TFAIL, "pid(%d) addr %p not on expected node: %d "
    154 			 ", expected %d", getpid(), addr, node, exp_node);
    155 		print_mem_stats(0, exp_node);
    156 		return TFAIL;
    157 	}
    158 }
    159 
    160 static void test_migrate_current_process(int node1, int node2, int cap_sys_nice)
    161 {
    162 	char *private, *shared;
    163 	int ret;
    164 	pid_t child;
    165 
    166 	/* parent can migrate its non-shared memory */
    167 	tst_res(TINFO, "current_process, cap_sys_nice: %d", cap_sys_nice);
    168 	private =  SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
    169 		MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
    170 	private[0] = 0;
    171 	tst_res(TINFO, "private anonymous: %p", private);
    172 
    173 	migrate_to_node(0, node2);
    174 	check_addr_on_node(private, node2);
    175 	migrate_to_node(0, node1);
    176 	check_addr_on_node(private, node1);
    177 	SAFE_MUNMAP(private, getpagesize());
    178 
    179 	/* parent can migrate shared memory with CAP_SYS_NICE */
    180 	shared = SAFE_MMAP(NULL, getpagesize(), PROT_READ | PROT_WRITE,
    181 		      MAP_ANONYMOUS | MAP_SHARED, 0, 0);
    182 	shared[0] = 1;
    183 	tst_res(TINFO, "shared anonymous: %p", shared);
    184 	migrate_to_node(0, node2);
    185 	check_addr_on_node(shared, node2);
    186 
    187 	/* shared mem is on node2, try to migrate in child to node1 */
    188 	fflush(stdout);
    189 	child = SAFE_FORK();
    190 	if (child == 0) {
    191 		tst_res(TINFO, "child shared anonymous, cap_sys_nice: %d",
    192 			 cap_sys_nice);
    193 		private =  SAFE_MMAP(NULL, getpagesize(),
    194 			PROT_READ | PROT_WRITE,
    195 			MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
    196 		private[0] = 1;
    197 		shared[0] = 1;
    198 		if (!cap_sys_nice)
    199 			SAFE_SETEUID(ltpuser->pw_uid);
    200 
    201 		migrate_to_node(0, node1);
    202 		/* child can migrate non-shared memory */
    203 		ret = check_addr_on_node(private, node1);
    204 
    205 		exit(ret);
    206 	}
    207 
    208 	SAFE_WAITPID(child, NULL, 0);
    209 	if (cap_sys_nice)
    210 		/* child can migrate shared memory only
    211 		 * with CAP_SYS_NICE */
    212 		check_addr_on_node(shared, node1);
    213 	else
    214 		check_addr_on_node(shared, node2);
    215 	SAFE_MUNMAP(shared, getpagesize());
    216 }
    217 
    218 static void test_migrate_other_process(int node1, int node2, int cap_sys_nice)
    219 {
    220 	char *private;
    221 	int ret;
    222 	pid_t child1, child2;
    223 
    224 	tst_res(TINFO, "other_process, cap_sys_nice: %d", cap_sys_nice);
    225 
    226 	fflush(stdout);
    227 	child1 = SAFE_FORK();
    228 	if (child1 == 0) {
    229 		private =  SAFE_MMAP(NULL, getpagesize(),
    230 			PROT_READ | PROT_WRITE,
    231 			MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
    232 		private[0] = 0;
    233 
    234 		/* make sure we are on node1 */
    235 		migrate_to_node(0, node1);
    236 		check_addr_on_node(private, node1);
    237 
    238 		SAFE_SETUID(ltpuser->pw_uid);
    239 
    240 		/* commit_creds() will clear dumpable, restore it */
    241 		if (prctl(PR_SET_DUMPABLE, 1))
    242 			tst_brk(TBROK | TERRNO, "prctl");
    243 
    244 		/* signal child2 it's OK to migrate child1 and wait */
    245 		TST_CHECKPOINT_WAKE(0);
    246 		TST_CHECKPOINT_WAIT(1);
    247 
    248 		/* child2 can migrate child1 process if it's privileged */
    249 		/* child2 can migrate child1 process if it has same uid */
    250 		ret = check_addr_on_node(private, node2);
    251 
    252 		exit(ret);
    253 	}
    254 
    255 	fflush(stdout);
    256 	child2 = SAFE_FORK();
    257 	if (child2 == 0) {
    258 		if (!cap_sys_nice)
    259 			SAFE_SETUID(ltpuser->pw_uid);
    260 
    261 		/* wait until child1 is ready on node1, then migrate and
    262 		 * signal to check current node */
    263 		TST_CHECKPOINT_WAIT(0);
    264 		migrate_to_node(child1, node2);
    265 		TST_CHECKPOINT_WAKE(1);
    266 
    267 		exit(TPASS);
    268 	}
    269 
    270 	SAFE_WAITPID(child1, NULL, 0);
    271 	SAFE_WAITPID(child2, NULL, 0);
    272 }
    273 
    274 static void run(void)
    275 {
    276 	test_migrate_current_process(nodeA, nodeB, 1);
    277 	test_migrate_current_process(nodeA, nodeB, 0);
    278 	test_migrate_other_process(nodeA, nodeB, 1);
    279 	test_migrate_other_process(nodeA, nodeB, 0);
    280 }
    281 
    282 static void setup(void)
    283 {
    284 	int ret, i, j;
    285 	int pagesize = getpagesize();
    286 	void *p;
    287 
    288 	tst_syscall(__NR_migrate_pages, 0, 0, NULL, NULL);
    289 
    290 	if (numa_available() == -1)
    291 		tst_brk(TCONF, "NUMA not available");
    292 
    293 	ret = get_allowed_nodes_arr(NH_MEMS, &num_nodes, &nodes);
    294 	if (ret < 0)
    295 		tst_brk(TBROK | TERRNO, "get_allowed_nodes(): %d", ret);
    296 
    297 	if (num_nodes < 2)
    298 		tst_brk(TCONF, "at least 2 allowed NUMA nodes"
    299 			 " are required");
    300 	else if (tst_kvercmp(2, 6, 18) < 0)
    301 		tst_brk(TCONF, "2.6.18 or greater kernel required");
    302 
    303 	FILE_PRINTF("/proc/sys/kernel/numa_balancing", "0");
    304 	/*
    305 	 * find 2 nodes, which can hold NODE_MIN_FREEMEM bytes
    306 	 * The reason is that:
    307 	 * 1. migrate_pages() is expected to succeed
    308 	 * 2. this test avoids hitting:
    309 	 *    Bug 870326 - migrate_pages() reports success, but pages are
    310 	 *                 not moved to desired node
    311 	 *    https://bugzilla.redhat.com/show_bug.cgi?id=870326
    312 	 */
    313 	nodeA = nodeB = -1;
    314 	for (i = 0; i < num_nodes; i++) {
    315 		p = numa_alloc_onnode(NODE_MIN_FREEMEM, nodes[i]);
    316 		if (p == NULL)
    317 			break;
    318 		memset(p, 0xff, NODE_MIN_FREEMEM);
    319 
    320 		j = 0;
    321 		while (j < NODE_MIN_FREEMEM) {
    322 			if (addr_on_node(p + j) != nodes[i])
    323 				break;
    324 			j += pagesize;
    325 		}
    326 		numa_free(p, NODE_MIN_FREEMEM);
    327 
    328 		if (j >= NODE_MIN_FREEMEM) {
    329 			if (nodeA == -1)
    330 				nodeA = nodes[i];
    331 			else if (nodeB == -1)
    332 				nodeB = nodes[i];
    333 			else
    334 				break;
    335 		}
    336 	}
    337 
    338 	if (nodeA == -1 || nodeB == -1)
    339 		tst_brk(TCONF, "at least 2 NUMA nodes with "
    340 			 "free mem > %d are needed", NODE_MIN_FREEMEM);
    341 	tst_res(TINFO, "Using nodes: %d %d", nodeA, nodeB);
    342 
    343 	ltpuser = getpwnam(nobody_uid);
    344 	if (ltpuser == NULL)
    345 		tst_brk(TBROK | TERRNO, "getpwnam failed");
    346 }
    347 
    348 static struct tst_test test = {
    349 	.needs_root = 1,
    350 	.needs_checkpoints = 1,
    351 	.forks_child = 1,
    352 	.test_all = run,
    353 	.setup = setup,
    354 	.save_restore = save_restore,
    355 };
    356 #else
    357 TST_TEST_TCONF(NUMA_ERROR_MSG);
    358 #endif
    359