Home | History | Annotate | Download | only in lib
      1 #define TST_NO_DEFAULT_MAIN
      2 
      3 #include "config.h"
      4 #include <sys/types.h>
      5 #include <sys/mman.h>
      6 #include <sys/mount.h>
      7 #include <sys/stat.h>
      8 #include <sys/wait.h>
      9 #include <sys/param.h>
     10 #include <errno.h>
     11 #include <fcntl.h>
     12 #if HAVE_NUMA_H
     13 #include <numa.h>
     14 #endif
     15 #if HAVE_NUMAIF_H
     16 #include <numaif.h>
     17 #endif
     18 #include <pthread.h>
     19 #include <stdarg.h>
     20 #include <stdio.h>
     21 #include <string.h>
     22 #include <stdlib.h>
     23 #include <unistd.h>
     24 
     25 #include "mem.h"
     26 #include "numa_helper.h"
     27 
     28 /* OOM */
     29 
     30 long overcommit = -1;
     31 
     32 static int alloc_mem(long int length, int testcase)
     33 {
     34 	char *s;
     35 	long i, pagesz = getpagesize();
     36 	int loop = 10;
     37 
     38 	tst_res(TINFO, "thread (%lx), allocating %ld bytes.",
     39 		(unsigned long) pthread_self(), length);
     40 
     41 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
     42 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     43 	if (s == MAP_FAILED)
     44 		return errno;
     45 
     46 	if (testcase == MLOCK) {
     47 		while (mlock(s, length) == -1 && loop > 0) {
     48 			if (EAGAIN != errno)
     49 				return errno;
     50 			usleep(300000);
     51 			loop--;
     52 		}
     53 	}
     54 
     55 #ifdef HAVE_MADV_MERGEABLE
     56 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
     57 		return errno;
     58 #endif
     59 	for (i = 0; i < length; i += pagesz)
     60 		s[i] = '\a';
     61 
     62 	return 0;
     63 }
     64 
     65 static void *child_alloc_thread(void *args)
     66 {
     67 	int ret = 0;
     68 
     69 	/* keep allocating until there's an error */
     70 	while (!ret)
     71 		ret = alloc_mem(LENGTH, (long)args);
     72 	exit(ret);
     73 }
     74 
     75 static void child_alloc(int testcase, int lite, int threads)
     76 {
     77 	int i;
     78 	pthread_t *th;
     79 
     80 	if (lite) {
     81 		int ret = alloc_mem(TESTMEM + MB, testcase);
     82 		exit(ret);
     83 	}
     84 
     85 	th = malloc(sizeof(pthread_t) * threads);
     86 	if (!th) {
     87 		tst_res(TINFO | TERRNO, "malloc");
     88 		goto out;
     89 	}
     90 
     91 	for (i = 0; i < threads; i++) {
     92 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
     93 			(void *)((long)testcase)));
     94 		if (TST_RET) {
     95 			tst_res(TINFO | TRERRNO, "pthread_create");
     96 			/*
     97 			 * Keep going if thread other than first fails to
     98 			 * spawn due to lack of resources.
     99 			 */
    100 			if (i == 0 || TST_RET != EAGAIN)
    101 				goto out;
    102 		}
    103 	}
    104 
    105 	/* wait for one of threads to exit whole process */
    106 	while (1)
    107 		sleep(1);
    108 out:
    109 	exit(1);
    110 }
    111 
    112 /*
    113  * oom - allocates memory according to specified testcase and checks
    114  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
    115  * @testcase: selects how child allocates memory
    116  *            valid choices are: NORMAL, MLOCK and KSM
    117  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
    118  *        if zero, child keeps allocating memory until it gets killed
    119  *        or some operation fails
    120  * @retcode: expected return code of child process
    121  *           if matches child ret code, this function reports PASS,
    122  *           otherwise it reports FAIL
    123  * @allow_sigkill: if zero and child is killed, this function reports FAIL
    124  *                 if non-zero, then if child is killed by SIGKILL
    125  *                 it is considered as PASS
    126  */
    127 void oom(int testcase, int lite, int retcode, int allow_sigkill)
    128 {
    129 	pid_t pid;
    130 	int status, threads;
    131 
    132 	switch (pid = SAFE_FORK()) {
    133 	case 0:
    134 		threads = MAX(1, tst_ncpus() - 1);
    135 		child_alloc(testcase, lite, threads);
    136 	default:
    137 		break;
    138 	}
    139 
    140 	tst_res(TINFO, "expected victim is %d.", pid);
    141 	SAFE_WAITPID(-1, &status, 0);
    142 
    143 	if (WIFSIGNALED(status)) {
    144 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
    145 			tst_res(TPASS, "victim signalled: (%d) %s",
    146 				SIGKILL,
    147 				tst_strsig(SIGKILL));
    148 		} else {
    149 			tst_res(TFAIL, "victim signalled: (%d) %s",
    150 				WTERMSIG(status),
    151 				tst_strsig(WTERMSIG(status)));
    152 		}
    153 	} else if (WIFEXITED(status)) {
    154 		if (WEXITSTATUS(status) == retcode) {
    155 			tst_res(TPASS, "victim retcode: (%d) %s",
    156 				retcode, strerror(retcode));
    157 		} else {
    158 			tst_res(TFAIL, "victim unexpectedly ended with "
    159 				"retcode: %d, expected: %d",
    160 				WEXITSTATUS(status), retcode);
    161 		}
    162 	} else {
    163 		tst_res(TFAIL, "victim unexpectedly ended");
    164 	}
    165 }
    166 
    167 #ifdef HAVE_NUMA_V2
    168 static void set_global_mempolicy(int mempolicy)
    169 {
    170 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    171 	int num_nodes, *nodes;
    172 	int ret;
    173 
    174 	if (mempolicy) {
    175 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    176 		if (ret != 0)
    177 			tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
    178 		if (num_nodes < 2) {
    179 			tst_res(TINFO, "mempolicy need NUMA system support");
    180 			free(nodes);
    181 			return;
    182 		}
    183 		switch(mempolicy) {
    184 		case MPOL_BIND:
    185 			/* bind the second node */
    186 			set_node(nmask, nodes[1]);
    187 			break;
    188 		case MPOL_INTERLEAVE:
    189 		case MPOL_PREFERRED:
    190 			if (num_nodes == 2) {
    191 				tst_res(TINFO, "The mempolicy need "
    192 					 "more than 2 numa nodes");
    193 				free(nodes);
    194 				return;
    195 			} else {
    196 				/* Using the 2nd,3rd node */
    197 				set_node(nmask, nodes[1]);
    198 				set_node(nmask, nodes[2]);
    199 			}
    200 			break;
    201 		default:
    202 			tst_brk(TBROK|TERRNO, "Bad mempolicy mode");
    203 		}
    204 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
    205 			tst_brk(TBROK|TERRNO, "set_mempolicy");
    206 	}
    207 }
    208 #else
    209 static void set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED) { }
    210 #endif
    211 
    212 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
    213 {
    214 	int ksm_run_orig;
    215 
    216 	set_global_mempolicy(mempolicy);
    217 
    218 	tst_res(TINFO, "start normal OOM testing.");
    219 	oom(NORMAL, lite, retcode, allow_sigkill);
    220 
    221 	tst_res(TINFO, "start OOM testing for mlocked pages.");
    222 	oom(MLOCK, lite, retcode, allow_sigkill);
    223 
    224 	/*
    225 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
    226 	 * run to run, which isn't reliable for oom03 cgroup test.
    227 	 */
    228 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
    229 		tst_res(TINFO, "KSM is not configed or lite == 1, "
    230 			 "skip OOM test for KSM pags");
    231 	} else {
    232 		tst_res(TINFO, "start OOM testing for KSM pages.");
    233 		SAFE_FILE_SCANF(PATH_KSM "run", "%d", &ksm_run_orig);
    234 		SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    235 		oom(KSM, lite, retcode, allow_sigkill);
    236 		SAFE_FILE_PRINTF(PATH_KSM "run", "%d", ksm_run_orig);
    237 	}
    238 }
    239 
    240 /* KSM */
    241 
    242 static void check(char *path, long int value)
    243 {
    244 	char fullpath[BUFSIZ];
    245 	long actual_val;
    246 
    247 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
    248 	SAFE_FILE_SCANF(fullpath, "%ld", &actual_val);
    249 
    250 	if (actual_val != value)
    251 		tst_res(TFAIL, "%s is not %ld but %ld.", path, value,
    252 			actual_val);
    253 	else
    254 		tst_res(TPASS, "%s is %ld.", path, actual_val);
    255 }
    256 
    257 static void final_group_check(int run, int pages_shared, int pages_sharing,
    258 			  int pages_volatile, int pages_unshared,
    259 			  int sleep_millisecs, int pages_to_scan)
    260 {
    261 	tst_res(TINFO, "check!");
    262 	check("run", run);
    263 	check("pages_shared", pages_shared);
    264 	check("pages_sharing", pages_sharing);
    265 	check("pages_volatile", pages_volatile);
    266 	check("pages_unshared", pages_unshared);
    267 	check("sleep_millisecs", sleep_millisecs);
    268 	check("pages_to_scan", pages_to_scan);
    269 }
    270 
    271 static void group_check(int run, int pages_shared, int pages_sharing,
    272 			int pages_volatile, int pages_unshared,
    273 			int sleep_millisecs, int pages_to_scan)
    274 {
    275 	if (run != 1) {
    276 		tst_res(TFAIL, "group_check run is not 1, %d.", run);
    277 	} else {
    278 		/* wait for ksm daemon to scan all mergeable pages. */
    279 		wait_ksmd_full_scan();
    280 	}
    281 
    282 	final_group_check(run, pages_shared, pages_sharing,
    283 			  pages_volatile, pages_unshared,
    284 			  sleep_millisecs, pages_to_scan);
    285 }
    286 
    287 static void verify(char **memory, char value, int proc,
    288 		    int start, int end, int start2, int end2)
    289 {
    290 	int i, j;
    291 	void *s = NULL;
    292 
    293 	s = SAFE_MALLOC((end - start) * (end2 - start2));
    294 
    295 	tst_res(TINFO, "child %d verifies memory content.", proc);
    296 	memset(s, value, (end - start) * (end2 - start2));
    297 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
    298 	    != 0)
    299 		for (j = start; j < end; j++)
    300 			for (i = start2; i < end2; i++)
    301 				if (memory[j][i] != value)
    302 					tst_res(TFAIL, "child %d has %c at "
    303 						 "%d,%d,%d.",
    304 						 proc, memory[j][i], proc,
    305 						 j, i);
    306 	free(s);
    307 }
    308 
    309 void check_hugepage(void)
    310 {
    311 	if (access(PATH_HUGEPAGES, F_OK))
    312 		tst_brk(TCONF, "Huge page is not supported.");
    313 }
    314 
    315 void write_memcg(void)
    316 {
    317 	SAFE_FILE_PRINTF(MEMCG_LIMIT, "%ld", TESTMEM);
    318 
    319 	SAFE_FILE_PRINTF(MEMCG_PATH_NEW "/tasks", "%d", getpid());
    320 }
    321 
    322 struct ksm_merge_data {
    323 	char data;
    324 	unsigned int mergeable_size;
    325 };
    326 
    327 static void ksm_child_memset(int child_num, int size, int total_unit,
    328 		 struct ksm_merge_data ksm_merge_data, char **memory)
    329 {
    330 	int i = 0, j;
    331 	int unit = size / total_unit;
    332 
    333 	tst_res(TINFO, "child %d continues...", child_num);
    334 
    335 	if (ksm_merge_data.mergeable_size == size * MB) {
    336 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'",
    337 			child_num, size, ksm_merge_data.data);
    338 
    339 	} else {
    340 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'"
    341 				" except one page with 'e'",
    342 				child_num, size, ksm_merge_data.data);
    343 	}
    344 
    345 	for (j = 0; j < total_unit; j++) {
    346 		for (i = 0; (unsigned int)i < unit * MB; i++)
    347 			memory[j][i] = ksm_merge_data.data;
    348 	}
    349 
    350 	/* if it contains unshared page, then set 'e' char
    351 	 * at the end of the last page
    352 	 */
    353 	if (ksm_merge_data.mergeable_size < size * MB)
    354 		memory[j-1][i-1] = 'e';
    355 }
    356 
    357 static void create_ksm_child(int child_num, int size, int unit,
    358 		       struct ksm_merge_data *ksm_merge_data)
    359 {
    360 	int j, total_unit;
    361 	char **memory;
    362 
    363 	/* The total units in all */
    364 	total_unit = size / unit;
    365 
    366 	/* Apply for the space for memory */
    367 	memory = SAFE_MALLOC(total_unit * sizeof(char *));
    368 	for (j = 0; j < total_unit; j++) {
    369 		memory[j] = SAFE_MMAP(NULL, unit * MB, PROT_READ|PROT_WRITE,
    370 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    371 #ifdef HAVE_MADV_MERGEABLE
    372 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
    373 			tst_brk(TBROK|TERRNO, "madvise");
    374 #endif
    375 	}
    376 
    377 	tst_res(TINFO, "child %d stops.", child_num);
    378 	if (raise(SIGSTOP) == -1)
    379 		tst_brk(TBROK|TERRNO, "kill");
    380 	fflush(stdout);
    381 
    382 	for (j = 0; j < 4; j++) {
    383 
    384 		ksm_child_memset(child_num, size, total_unit,
    385 				  ksm_merge_data[j], memory);
    386 
    387 		fflush(stdout);
    388 
    389 		tst_res(TINFO, "child %d stops.", child_num);
    390 		if (raise(SIGSTOP) == -1)
    391 			tst_brk(TBROK|TERRNO, "kill");
    392 
    393 		if (ksm_merge_data[j].mergeable_size < size * MB) {
    394 			verify(memory, 'e', child_num, total_unit - 1,
    395 				total_unit, unit * MB - 1, unit * MB);
    396 			verify(memory, ksm_merge_data[j].data, child_num,
    397 				0, total_unit, 0, unit * MB - 1);
    398 		} else {
    399 			verify(memory, ksm_merge_data[j].data, child_num,
    400 				0, total_unit, 0, unit * MB);
    401 		}
    402 	}
    403 
    404 	tst_res(TINFO, "child %d finished.", child_num);
    405 }
    406 
    407 static void stop_ksm_children(int *child, int num)
    408 {
    409 	int k, status;
    410 
    411 	tst_res(TINFO, "wait for all children to stop.");
    412 	for (k = 0; k < num; k++) {
    413 		SAFE_WAITPID(child[k], &status, WUNTRACED);
    414 		if (!WIFSTOPPED(status))
    415 			tst_brk(TBROK, "child %d was not stopped", k);
    416 	}
    417 }
    418 
    419 static void resume_ksm_children(int *child, int num)
    420 {
    421 	int k;
    422 
    423 	tst_res(TINFO, "resume all children.");
    424 	for (k = 0; k < num; k++)
    425 		SAFE_KILL(child[k], SIGCONT);
    426 
    427 	fflush(stdout);
    428 }
    429 
    430 void create_same_memory(int size, int num, int unit)
    431 {
    432 	int i, j, status, *child;
    433 	unsigned long ps, pages;
    434 	struct ksm_merge_data **ksm_data;
    435 
    436 	struct ksm_merge_data ksm_data0[] = {
    437 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
    438 	};
    439 	struct ksm_merge_data ksm_data1[] = {
    440 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
    441 	};
    442 	struct ksm_merge_data ksm_data2[] = {
    443 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
    444 	};
    445 
    446 	ps = sysconf(_SC_PAGE_SIZE);
    447 	pages = MB / ps;
    448 
    449 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
    450 	/* Since from third child, the data is same with the first child's */
    451 	for (i = 0; i < num - 3; i++) {
    452 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
    453 		for (j = 0; j < 4; j++) {
    454 			ksm_data[i][j].data = ksm_data0[j].data;
    455 			ksm_data[i][j].mergeable_size =
    456 				ksm_data0[j].mergeable_size;
    457 		}
    458 	}
    459 
    460 	child = SAFE_MALLOC(num * sizeof(int));
    461 
    462 	for (i = 0; i < num; i++) {
    463 		fflush(stdout);
    464 		switch (child[i] = SAFE_FORK()) {
    465 		case 0:
    466 			if (i == 0) {
    467 				create_ksm_child(i, size, unit, ksm_data0);
    468 				exit(0);
    469 			} else if (i == 1) {
    470 				create_ksm_child(i, size, unit, ksm_data1);
    471 				exit(0);
    472 			} else if (i == 2) {
    473 				create_ksm_child(i, size, unit, ksm_data2);
    474 				exit(0);
    475 			} else {
    476 				create_ksm_child(i, size, unit, ksm_data[i-3]);
    477 				exit(0);
    478 			}
    479 		}
    480 	}
    481 
    482 	stop_ksm_children(child, num);
    483 
    484 	tst_res(TINFO, "KSM merging...");
    485 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    486 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing", "%ld", size * pages * num);
    487 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    488 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld", size * pages * num);
    489 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
    490 
    491 	resume_ksm_children(child, num);
    492 	stop_ksm_children(child, num);
    493 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
    494 
    495 	resume_ksm_children(child, num);
    496 	stop_ksm_children(child, num);
    497 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
    498 
    499 	resume_ksm_children(child, num);
    500 	stop_ksm_children(child, num);
    501 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
    502 
    503 	resume_ksm_children(child, num);
    504 	stop_ksm_children(child, num);
    505 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
    506 
    507 	tst_res(TINFO, "KSM unmerging...");
    508 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    509 
    510 	resume_ksm_children(child, num);
    511 	final_group_check(2, 0, 0, 0, 0, 0, size * pages * num);
    512 
    513 	tst_res(TINFO, "stop KSM.");
    514 	SAFE_FILE_PRINTF(PATH_KSM "run", "0");
    515 	final_group_check(0, 0, 0, 0, 0, 0, size * pages * num);
    516 
    517 	while (waitpid(-1, &status, 0) > 0)
    518 		if (WEXITSTATUS(status) != 0)
    519 			tst_res(TFAIL, "child exit status is %d",
    520 				 WEXITSTATUS(status));
    521 }
    522 
    523 void test_ksm_merge_across_nodes(unsigned long nr_pages)
    524 {
    525 	char **memory;
    526 	int i, ret;
    527 	int num_nodes, *nodes;
    528 	unsigned long length;
    529 	unsigned long pagesize;
    530 
    531 #ifdef HAVE_NUMA_V2
    532 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    533 #endif
    534 
    535 	ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    536 	if (ret != 0)
    537 		tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
    538 	if (num_nodes < 2) {
    539 		tst_res(TINFO, "need NUMA system support");
    540 		free(nodes);
    541 		return;
    542 	}
    543 
    544 	pagesize = sysconf(_SC_PAGE_SIZE);
    545 	length = nr_pages * pagesize;
    546 
    547 	memory = SAFE_MALLOC(num_nodes * sizeof(char *));
    548 	for (i = 0; i < num_nodes; i++) {
    549 		memory[i] = SAFE_MMAP(NULL, length, PROT_READ|PROT_WRITE,
    550 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    551 #ifdef HAVE_MADV_MERGEABLE
    552 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
    553 			tst_brk(TBROK|TERRNO, "madvise");
    554 #endif
    555 
    556 #ifdef HAVE_NUMA_V2
    557 		clean_node(nmask);
    558 		set_node(nmask, nodes[i]);
    559 		/*
    560 		 * Use mbind() to make sure each node contains
    561 		 * length size memory.
    562 		 */
    563 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
    564 		if (ret == -1)
    565 			tst_brk(TBROK|TERRNO, "mbind");
    566 #endif
    567 
    568 		memset(memory[i], 10, length);
    569 	}
    570 
    571 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
    572 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld",
    573 			 nr_pages * num_nodes);
    574 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    575 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing",
    576 			"%ld", nr_pages * num_nodes);
    577 	/*
    578 	 * merge_across_nodes setting can be changed only when there
    579 	 * are no ksm shared pages in system, so set run 2 to unmerge
    580 	 * pages first, then to 1 after changing merge_across_nodes,
    581 	 * to remerge according to the new setting.
    582 	 */
    583 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    584 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=1");
    585 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "1");
    586 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    587 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
    588 		    nr_pages * num_nodes);
    589 
    590 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    591 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=0");
    592 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "0");
    593 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    594 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
    595 		    0, 0, 0, nr_pages * num_nodes);
    596 
    597 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    598 }
    599 
    600 /* THP */
    601 
    602 /* cpuset/memcg */
    603 
    604 static void gather_node_cpus(char *cpus, long nd)
    605 {
    606 	int ncpus = 0;
    607 	int i;
    608 	long online;
    609 	char buf[BUFSIZ];
    610 	char path[BUFSIZ], path1[BUFSIZ];
    611 
    612 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
    613 		ncpus++;
    614 
    615 	for (i = 0; i < ncpus; i++) {
    616 		snprintf(path, BUFSIZ,
    617 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
    618 		if (path_exist(path)) {
    619 			snprintf(path1, BUFSIZ, "%s/online", path);
    620 			/*
    621 			 * if there is no online knob, then the cpu cannot
    622 			 * be taken offline
    623 			 */
    624 			if (path_exist(path1)) {
    625 				SAFE_FILE_SCANF(path1, "%ld", &online);
    626 				if (online == 0)
    627 					continue;
    628 			}
    629 			sprintf(buf, "%d,", i);
    630 			strcat(cpus, buf);
    631 		}
    632 	}
    633 	/* Remove the trailing comma. */
    634 	cpus[strlen(cpus) - 1] = '\0';
    635 }
    636 
    637 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
    638 {
    639 	int fd;
    640 	char path[BUFSIZ];
    641 
    642 	/*
    643 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    644 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    645 	 * for details
    646 	 */
    647 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    648 	fd = open(path, O_RDONLY);
    649 	if (fd == -1) {
    650 		if (errno == ENOENT) {
    651 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    652 				 prefix, filename);
    653 			fd = SAFE_OPEN(path, O_RDONLY);
    654 		} else
    655 			tst_brk(TBROK | TERRNO, "open %s", path);
    656 	}
    657 	if (read(fd, retbuf, BUFSIZ) < 0)
    658 		tst_brk(TBROK | TERRNO, "read %s", path);
    659 	close(fd);
    660 }
    661 
    662 void write_cpuset_files(char *prefix, char *filename, char *buf)
    663 {
    664 	int fd;
    665 	char path[BUFSIZ];
    666 
    667 	/*
    668 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    669 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    670 	 * for details
    671 	 */
    672 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    673 	fd = open(path, O_WRONLY);
    674 	if (fd == -1) {
    675 		if (errno == ENOENT) {
    676 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    677 				 prefix, filename);
    678 			fd = SAFE_OPEN(path, O_WRONLY);
    679 		} else
    680 			tst_brk(TBROK | TERRNO, "open %s", path);
    681 	}
    682 	SAFE_WRITE(1, fd, buf, strlen(buf));
    683 	close(fd);
    684 }
    685 
    686 void write_cpusets(long nd)
    687 {
    688 	char buf[BUFSIZ];
    689 	char cpus[BUFSIZ] = "";
    690 
    691 	snprintf(buf, BUFSIZ, "%ld", nd);
    692 	write_cpuset_files(CPATH_NEW, "mems", buf);
    693 
    694 	gather_node_cpus(cpus, nd);
    695 	/*
    696 	 * If the 'nd' node doesn't contain any CPUs,
    697 	 * the first ID of CPU '0' will be used as
    698 	 * the value of cpuset.cpus.
    699 	 */
    700 	if (strlen(cpus) != 0) {
    701 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
    702 	} else {
    703 		tst_res(TINFO, "No CPUs in the node%ld; "
    704 				"using only CPU0", nd);
    705 		write_cpuset_files(CPATH_NEW, "cpus", "0");
    706 	}
    707 
    708 	SAFE_FILE_PRINTF(CPATH_NEW "/tasks", "%d", getpid());
    709 }
    710 
    711 void umount_mem(char *path, char *path_new)
    712 {
    713 	FILE *fp;
    714 	int fd;
    715 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
    716 
    717 	/* Move all processes in task to its parent node. */
    718 	sprintf(s, "%s/tasks", path);
    719 	fd = open(s, O_WRONLY);
    720 	if (fd == -1)
    721 		tst_res(TWARN | TERRNO, "open %s", s);
    722 
    723 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
    724 	fp = fopen(s_new, "r");
    725 	if (fp == NULL)
    726 		tst_res(TWARN | TERRNO, "fopen %s", s_new);
    727 	if ((fd != -1) && (fp != NULL)) {
    728 		while (fgets(value, BUFSIZ, fp) != NULL)
    729 			if (write(fd, value, strlen(value) - 1)
    730 			    != (ssize_t)strlen(value) - 1)
    731 				tst_res(TWARN | TERRNO, "write %s", s);
    732 	}
    733 	if (fd != -1)
    734 		close(fd);
    735 	if (fp != NULL)
    736 		fclose(fp);
    737 	if (rmdir(path_new) == -1)
    738 		tst_res(TWARN | TERRNO, "rmdir %s", path_new);
    739 	if (umount(path) == -1)
    740 		tst_res(TWARN | TERRNO, "umount %s", path);
    741 	if (rmdir(path) == -1)
    742 		tst_res(TWARN | TERRNO, "rmdir %s", path);
    743 }
    744 
    745 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
    746 {
    747 	SAFE_MKDIR(path, 0777);
    748 	if (mount(name, path, fs, 0, options) == -1) {
    749 		if (errno == ENODEV) {
    750 			if (rmdir(path) == -1)
    751 				tst_res(TWARN | TERRNO, "rmdir %s failed",
    752 					 path);
    753 			tst_brk(TCONF,
    754 				 "file system %s is not configured in kernel",
    755 				 fs);
    756 		}
    757 		tst_brk(TBROK | TERRNO, "mount %s", path);
    758 	}
    759 	SAFE_MKDIR(path_new, 0777);
    760 }
    761 
    762 /* shared */
    763 
    764 /* Warning: *DO NOT* use this function in child */
    765 unsigned int get_a_numa_node(void)
    766 {
    767 	unsigned int nd1, nd2;
    768 	int ret;
    769 
    770 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
    771 	switch (ret) {
    772 	case 0:
    773 		break;
    774 	case -3:
    775 		tst_brk(TCONF, "requires a NUMA system.");
    776 	default:
    777 		tst_brk(TBROK | TERRNO, "1st get_allowed_nodes");
    778 	}
    779 
    780 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
    781 	switch (ret) {
    782 	case 0:
    783 		tst_res(TINFO, "get node%u.", nd1);
    784 		return nd1;
    785 	case -3:
    786 		tst_brk(TCONF, "requires a NUMA system that has "
    787 			 "at least one node with both memory and CPU "
    788 			 "available.");
    789 	default:
    790 		tst_brk(TBROK | TERRNO, "2nd get_allowed_nodes");
    791 	}
    792 
    793 	/* not reached */
    794 	abort();
    795 }
    796 
    797 int path_exist(const char *path, ...)
    798 {
    799 	va_list ap;
    800 	char pathbuf[PATH_MAX];
    801 
    802 	va_start(ap, path);
    803 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
    804 	va_end(ap);
    805 
    806 	return access(pathbuf, F_OK) == 0;
    807 }
    808 
    809 void set_sys_tune(char *sys_file, long tune, int check)
    810 {
    811 	long val;
    812 	char path[BUFSIZ];
    813 
    814 	tst_res(TINFO, "set %s to %ld", sys_file, tune);
    815 
    816 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    817 	SAFE_FILE_PRINTF(path, "%ld", tune);
    818 
    819 	if (check) {
    820 		val = get_sys_tune(sys_file);
    821 		if (val != tune)
    822 			tst_brk(TBROK, "%s = %ld, but expect %ld",
    823 				 sys_file, val, tune);
    824 	}
    825 }
    826 
    827 long get_sys_tune(char *sys_file)
    828 {
    829 	char path[BUFSIZ];
    830 	long tune;
    831 
    832 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    833 	SAFE_FILE_SCANF(path, "%ld", &tune);
    834 
    835 	return tune;
    836 }
    837 
    838 void update_shm_size(size_t * shm_size)
    839 {
    840 	size_t shmmax;
    841 
    842 	SAFE_FILE_SCANF(PATH_SHMMAX, "%zu", &shmmax);
    843 	if (*shm_size > shmmax) {
    844 		tst_res(TINFO, "Set shm_size to shmmax: %zu", shmmax);
    845 		*shm_size = shmmax;
    846 	}
    847 }
    848 
    849 int range_is_mapped(unsigned long low, unsigned long high)
    850 {
    851 	FILE *fp;
    852 
    853 	fp = fopen("/proc/self/maps", "r");
    854 	if (fp == NULL)
    855 		tst_brk(TBROK | TERRNO, "Failed to open /proc/self/maps.");
    856 
    857 	while (!feof(fp)) {
    858 		unsigned long start, end;
    859 		int ret;
    860 
    861 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
    862 		if (ret != 2) {
    863 			fclose(fp);
    864 			tst_brk(TBROK | TERRNO, "Couldn't parse /proc/self/maps line.");
    865 		}
    866 
    867 		if ((start >= low) && (start < high)) {
    868 			fclose(fp);
    869 			return 1;
    870 		}
    871 		if ((end >= low) && (end < high)) {
    872 			fclose(fp);
    873 			return 1;
    874 		}
    875 	}
    876 
    877 	fclose(fp);
    878 	return 0;
    879 }
    880