Home | History | Annotate | Download | only in lib
      1 #define TST_NO_DEFAULT_MAIN
      2 
      3 #include "config.h"
      4 #include <sys/types.h>
      5 #include <sys/mman.h>
      6 #include <sys/mount.h>
      7 #include <sys/stat.h>
      8 #include <sys/wait.h>
      9 #include <sys/param.h>
     10 #include <errno.h>
     11 #include <fcntl.h>
     12 #if HAVE_NUMA_H
     13 #include <numa.h>
     14 #endif
     15 #if HAVE_NUMAIF_H
     16 #include <numaif.h>
     17 #endif
     18 #include <pthread.h>
     19 #include <stdarg.h>
     20 #include <stdio.h>
     21 #include <string.h>
     22 #include <stdlib.h>
     23 #include <unistd.h>
     24 
     25 #include "mem.h"
     26 #include "numa_helper.h"
     27 
     28 /* OOM */
     29 
     30 static int alloc_mem(long int length, int testcase)
     31 {
     32 	char *s;
     33 	long i, pagesz = getpagesize();
     34 	int loop = 10;
     35 
     36 	tst_res(TINFO, "thread (%lx), allocating %ld bytes.",
     37 		(unsigned long) pthread_self(), length);
     38 
     39 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
     40 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     41 	if (s == MAP_FAILED)
     42 		return errno;
     43 
     44 	if (testcase == MLOCK) {
     45 		while (mlock(s, length) == -1 && loop > 0) {
     46 			if (EAGAIN != errno)
     47 				return errno;
     48 			usleep(300000);
     49 			loop--;
     50 		}
     51 	}
     52 
     53 #ifdef HAVE_MADV_MERGEABLE
     54 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
     55 		return errno;
     56 #endif
     57 	for (i = 0; i < length; i += pagesz)
     58 		s[i] = '\a';
     59 
     60 	return 0;
     61 }
     62 
     63 static void *child_alloc_thread(void *args)
     64 {
     65 	int ret = 0;
     66 
     67 	/* keep allocating until there's an error */
     68 	while (!ret)
     69 		ret = alloc_mem(LENGTH, (long)args);
     70 	exit(ret);
     71 }
     72 
     73 static void child_alloc(int testcase, int lite, int threads)
     74 {
     75 	int i;
     76 	pthread_t *th;
     77 
     78 	if (lite) {
     79 		int ret = alloc_mem(TESTMEM + MB, testcase);
     80 		exit(ret);
     81 	}
     82 
     83 	th = malloc(sizeof(pthread_t) * threads);
     84 	if (!th) {
     85 		tst_res(TINFO | TERRNO, "malloc");
     86 		goto out;
     87 	}
     88 
     89 	for (i = 0; i < threads; i++) {
     90 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
     91 			(void *)((long)testcase)));
     92 		if (TEST_RETURN) {
     93 			tst_res(TINFO | TRERRNO, "pthread_create");
     94 			/*
     95 			 * Keep going if thread other than first fails to
     96 			 * spawn due to lack of resources.
     97 			 */
     98 			if (i == 0 || TEST_RETURN != EAGAIN)
     99 				goto out;
    100 		}
    101 	}
    102 
    103 	/* wait for one of threads to exit whole process */
    104 	while (1)
    105 		sleep(1);
    106 out:
    107 	exit(1);
    108 }
    109 
    110 /*
    111  * oom - allocates memory according to specified testcase and checks
    112  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
    113  * @testcase: selects how child allocates memory
    114  *            valid choices are: NORMAL, MLOCK and KSM
    115  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
    116  *        if zero, child keeps allocating memory until it gets killed
    117  *        or some operation fails
    118  * @retcode: expected return code of child process
    119  *           if matches child ret code, this function reports PASS,
    120  *           otherwise it reports FAIL
    121  * @allow_sigkill: if zero and child is killed, this function reports FAIL
    122  *                 if non-zero, then if child is killed by SIGKILL
    123  *                 it is considered as PASS
    124  */
    125 void oom(int testcase, int lite, int retcode, int allow_sigkill)
    126 {
    127 	pid_t pid;
    128 	int status, threads;
    129 
    130 	switch (pid = SAFE_FORK()) {
    131 	case 0:
    132 		threads = MAX(1, tst_ncpus() - 1);
    133 		child_alloc(testcase, lite, threads);
    134 	default:
    135 		break;
    136 	}
    137 
    138 	tst_res(TINFO, "expected victim is %d.", pid);
    139 	SAFE_WAITPID(-1, &status, 0);
    140 
    141 	if (WIFSIGNALED(status)) {
    142 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
    143 			tst_res(TPASS, "victim signalled: (%d) %s",
    144 				SIGKILL,
    145 				tst_strsig(SIGKILL));
    146 		} else {
    147 			tst_res(TFAIL, "victim signalled: (%d) %s",
    148 				WTERMSIG(status),
    149 				tst_strsig(WTERMSIG(status)));
    150 		}
    151 	} else if (WIFEXITED(status)) {
    152 		if (WEXITSTATUS(status) == retcode) {
    153 			tst_res(TPASS, "victim retcode: (%d) %s",
    154 				retcode, strerror(retcode));
    155 		} else {
    156 			tst_res(TFAIL, "victim unexpectedly ended with "
    157 				"retcode: %d, expected: %d",
    158 				WEXITSTATUS(status), retcode);
    159 		}
    160 	} else {
    161 		tst_res(TFAIL, "victim unexpectedly ended");
    162 	}
    163 }
    164 
    165 #ifdef HAVE_NUMA_V2
    166 static void set_global_mempolicy(int mempolicy)
    167 {
    168 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    169 	int num_nodes, *nodes;
    170 	int ret;
    171 
    172 	if (mempolicy) {
    173 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    174 		if (ret != 0)
    175 			tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
    176 		if (num_nodes < 2) {
    177 			tst_res(TINFO, "mempolicy need NUMA system support");
    178 			free(nodes);
    179 			return;
    180 		}
    181 		switch(mempolicy) {
    182 		case MPOL_BIND:
    183 			/* bind the second node */
    184 			set_node(nmask, nodes[1]);
    185 			break;
    186 		case MPOL_INTERLEAVE:
    187 		case MPOL_PREFERRED:
    188 			if (num_nodes == 2) {
    189 				tst_res(TINFO, "The mempolicy need "
    190 					 "more than 2 numa nodes");
    191 				free(nodes);
    192 				return;
    193 			} else {
    194 				/* Using the 2nd,3rd node */
    195 				set_node(nmask, nodes[1]);
    196 				set_node(nmask, nodes[2]);
    197 			}
    198 			break;
    199 		default:
    200 			tst_brk(TBROK|TERRNO, "Bad mempolicy mode");
    201 		}
    202 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
    203 			tst_brk(TBROK|TERRNO, "set_mempolicy");
    204 	}
    205 }
    206 #else
    207 static void set_global_mempolicy(int mempolicy LTP_ATTRIBUTE_UNUSED) { }
    208 #endif
    209 
    210 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
    211 {
    212 	int ksm_run_orig;
    213 
    214 	set_global_mempolicy(mempolicy);
    215 
    216 	tst_res(TINFO, "start normal OOM testing.");
    217 	oom(NORMAL, lite, retcode, allow_sigkill);
    218 
    219 	tst_res(TINFO, "start OOM testing for mlocked pages.");
    220 	oom(MLOCK, lite, retcode, allow_sigkill);
    221 
    222 	/*
    223 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
    224 	 * run to run, which isn't reliable for oom03 cgroup test.
    225 	 */
    226 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
    227 		tst_res(TINFO, "KSM is not configed or lite == 1, "
    228 			 "skip OOM test for KSM pags");
    229 	} else {
    230 		tst_res(TINFO, "start OOM testing for KSM pages.");
    231 		SAFE_FILE_SCANF(PATH_KSM "run", "%d", &ksm_run_orig);
    232 		SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    233 		oom(KSM, lite, retcode, allow_sigkill);
    234 		SAFE_FILE_PRINTF(PATH_KSM "run", "%d", ksm_run_orig);
    235 	}
    236 }
    237 
    238 /* KSM */
    239 
    240 static int max_page_sharing;
    241 
    242 void save_max_page_sharing(void)
    243 {
    244 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    245 		SAFE_FILE_SCANF(PATH_KSM "max_page_sharing",
    246 				"%d", &max_page_sharing);
    247 }
    248 
    249 void restore_max_page_sharing(void)
    250 {
    251 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    252 	        FILE_PRINTF(PATH_KSM "max_page_sharing",
    253 	                         "%d", max_page_sharing);
    254 }
    255 
    256 static void check(char *path, long int value)
    257 {
    258 	char fullpath[BUFSIZ];
    259 	long actual_val;
    260 
    261 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
    262 	SAFE_FILE_SCANF(fullpath, "%ld", &actual_val);
    263 
    264 	if (actual_val != value)
    265 		tst_res(TFAIL, "%s is not %ld but %ld.", path, value,
    266 			actual_val);
    267 	else
    268 		tst_res(TPASS, "%s is %ld.", path, actual_val);
    269 }
    270 
    271 static void wait_ksmd_full_scan(void)
    272 {
    273 	unsigned long full_scans, at_least_one_full_scan;
    274 	int count = 0;
    275 
    276 	SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
    277 	/*
    278 	 * The current scan is already in progress so we can't guarantee that
    279 	 * the get_user_pages() is called on every existing rmap_item if we
    280 	 * only waited for the remaining part of the scan.
    281 	 *
    282 	 * The actual merging happens after the unstable tree has been built so
    283 	 * we need to wait at least two full scans to guarantee merging, hence
    284 	 * wait full_scans to increment by 3 so that at least two full scans
    285 	 * will run.
    286 	 */
    287 	at_least_one_full_scan = full_scans + 3;
    288 	while (full_scans < at_least_one_full_scan) {
    289 		sleep(1);
    290 		count++;
    291 		SAFE_FILE_SCANF(PATH_KSM "full_scans", "%lu", &full_scans);
    292 	}
    293 
    294 	tst_res(TINFO, "ksm daemon takes %ds to run two full scans",
    295 		count);
    296 }
    297 
    298 static void final_group_check(int run, int pages_shared, int pages_sharing,
    299 			  int pages_volatile, int pages_unshared,
    300 			  int sleep_millisecs, int pages_to_scan)
    301 {
    302 	tst_res(TINFO, "check!");
    303 	check("run", run);
    304 	check("pages_shared", pages_shared);
    305 	check("pages_sharing", pages_sharing);
    306 	check("pages_volatile", pages_volatile);
    307 	check("pages_unshared", pages_unshared);
    308 	check("sleep_millisecs", sleep_millisecs);
    309 	check("pages_to_scan", pages_to_scan);
    310 }
    311 
    312 static void group_check(int run, int pages_shared, int pages_sharing,
    313 			int pages_volatile, int pages_unshared,
    314 			int sleep_millisecs, int pages_to_scan)
    315 {
    316 	if (run != 1) {
    317 		tst_res(TFAIL, "group_check run is not 1, %d.", run);
    318 	} else {
    319 		/* wait for ksm daemon to scan all mergeable pages. */
    320 		wait_ksmd_full_scan();
    321 	}
    322 
    323 	final_group_check(run, pages_shared, pages_sharing,
    324 			  pages_volatile, pages_unshared,
    325 			  sleep_millisecs, pages_to_scan);
    326 }
    327 
    328 static void verify(char **memory, char value, int proc,
    329 		    int start, int end, int start2, int end2)
    330 {
    331 	int i, j;
    332 	void *s = NULL;
    333 
    334 	s = SAFE_MALLOC((end - start) * (end2 - start2));
    335 
    336 	tst_res(TINFO, "child %d verifies memory content.", proc);
    337 	memset(s, value, (end - start) * (end2 - start2));
    338 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
    339 	    != 0)
    340 		for (j = start; j < end; j++)
    341 			for (i = start2; i < end2; i++)
    342 				if (memory[j][i] != value)
    343 					tst_res(TFAIL, "child %d has %c at "
    344 						 "%d,%d,%d.",
    345 						 proc, memory[j][i], proc,
    346 						 j, i);
    347 	free(s);
    348 }
    349 
    350 void check_hugepage(void)
    351 {
    352 	if (access(PATH_HUGEPAGES, F_OK))
    353 		tst_brk(TCONF, "Huge page is not supported.");
    354 }
    355 
    356 void write_memcg(void)
    357 {
    358 	SAFE_FILE_PRINTF(MEMCG_LIMIT, "%ld", TESTMEM);
    359 
    360 	SAFE_FILE_PRINTF(MEMCG_PATH_NEW "/tasks", "%d", getpid());
    361 }
    362 
    363 struct ksm_merge_data {
    364 	char data;
    365 	unsigned int mergeable_size;
    366 };
    367 
    368 static void ksm_child_memset(int child_num, int size, int total_unit,
    369 		 struct ksm_merge_data ksm_merge_data, char **memory)
    370 {
    371 	int i = 0, j;
    372 	int unit = size / total_unit;
    373 
    374 	tst_res(TINFO, "child %d continues...", child_num);
    375 
    376 	if (ksm_merge_data.mergeable_size == size * MB) {
    377 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'",
    378 			child_num, size, ksm_merge_data.data);
    379 
    380 	} else {
    381 		tst_res(TINFO, "child %d allocates %d MB filled with '%c'"
    382 				" except one page with 'e'",
    383 				child_num, size, ksm_merge_data.data);
    384 	}
    385 
    386 	for (j = 0; j < total_unit; j++) {
    387 		for (i = 0; (unsigned int)i < unit * MB; i++)
    388 			memory[j][i] = ksm_merge_data.data;
    389 	}
    390 
    391 	/* if it contains unshared page, then set 'e' char
    392 	 * at the end of the last page
    393 	 */
    394 	if (ksm_merge_data.mergeable_size < size * MB)
    395 		memory[j-1][i-1] = 'e';
    396 }
    397 
    398 static void create_ksm_child(int child_num, int size, int unit,
    399 		       struct ksm_merge_data *ksm_merge_data)
    400 {
    401 	int j, total_unit;
    402 	char **memory;
    403 
    404 	/* The total units in all */
    405 	total_unit = size / unit;
    406 
    407 	/* Apply for the space for memory */
    408 	memory = SAFE_MALLOC(total_unit * sizeof(char *));
    409 	for (j = 0; j < total_unit; j++) {
    410 		memory[j] = SAFE_MMAP(NULL, unit * MB, PROT_READ|PROT_WRITE,
    411 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    412 #ifdef HAVE_MADV_MERGEABLE
    413 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
    414 			tst_brk(TBROK|TERRNO, "madvise");
    415 #endif
    416 	}
    417 
    418 	tst_res(TINFO, "child %d stops.", child_num);
    419 	if (raise(SIGSTOP) == -1)
    420 		tst_brk(TBROK|TERRNO, "kill");
    421 	fflush(stdout);
    422 
    423 	for (j = 0; j < 4; j++) {
    424 
    425 		ksm_child_memset(child_num, size, total_unit,
    426 				  ksm_merge_data[j], memory);
    427 
    428 		fflush(stdout);
    429 
    430 		tst_res(TINFO, "child %d stops.", child_num);
    431 		if (raise(SIGSTOP) == -1)
    432 			tst_brk(TBROK|TERRNO, "kill");
    433 
    434 		if (ksm_merge_data[j].mergeable_size < size * MB) {
    435 			verify(memory, 'e', child_num, total_unit - 1,
    436 				total_unit, unit * MB - 1, unit * MB);
    437 			verify(memory, ksm_merge_data[j].data, child_num,
    438 				0, total_unit, 0, unit * MB - 1);
    439 		} else {
    440 			verify(memory, ksm_merge_data[j].data, child_num,
    441 				0, total_unit, 0, unit * MB);
    442 		}
    443 	}
    444 
    445 	tst_res(TINFO, "child %d finished.", child_num);
    446 }
    447 
    448 static void stop_ksm_children(int *child, int num)
    449 {
    450 	int k, status;
    451 
    452 	tst_res(TINFO, "wait for all children to stop.");
    453 	for (k = 0; k < num; k++) {
    454 		SAFE_WAITPID(child[k], &status, WUNTRACED);
    455 		if (!WIFSTOPPED(status))
    456 			tst_brk(TBROK, "child %d was not stopped", k);
    457 	}
    458 }
    459 
    460 static void resume_ksm_children(int *child, int num)
    461 {
    462 	int k;
    463 
    464 	tst_res(TINFO, "resume all children.");
    465 	for (k = 0; k < num; k++)
    466 		SAFE_KILL(child[k], SIGCONT);
    467 
    468 	fflush(stdout);
    469 }
    470 
    471 void create_same_memory(int size, int num, int unit)
    472 {
    473 	int i, j, status, *child;
    474 	unsigned long ps, pages;
    475 	struct ksm_merge_data **ksm_data;
    476 
    477 	struct ksm_merge_data ksm_data0[] = {
    478 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
    479 	};
    480 	struct ksm_merge_data ksm_data1[] = {
    481 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
    482 	};
    483 	struct ksm_merge_data ksm_data2[] = {
    484 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
    485 	};
    486 
    487 	ps = sysconf(_SC_PAGE_SIZE);
    488 	pages = MB / ps;
    489 
    490 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
    491 	/* Since from third child, the data is same with the first child's */
    492 	for (i = 0; i < num - 3; i++) {
    493 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
    494 		for (j = 0; j < 4; j++) {
    495 			ksm_data[i][j].data = ksm_data0[j].data;
    496 			ksm_data[i][j].mergeable_size =
    497 				ksm_data0[j].mergeable_size;
    498 		}
    499 	}
    500 
    501 	child = SAFE_MALLOC(num * sizeof(int));
    502 
    503 	for (i = 0; i < num; i++) {
    504 		fflush(stdout);
    505 		switch (child[i] = SAFE_FORK()) {
    506 		case 0:
    507 			if (i == 0) {
    508 				create_ksm_child(i, size, unit, ksm_data0);
    509 				exit(0);
    510 			} else if (i == 1) {
    511 				create_ksm_child(i, size, unit, ksm_data1);
    512 				exit(0);
    513 			} else if (i == 2) {
    514 				create_ksm_child(i, size, unit, ksm_data2);
    515 				exit(0);
    516 			} else {
    517 				create_ksm_child(i, size, unit, ksm_data[i-3]);
    518 				exit(0);
    519 			}
    520 		}
    521 	}
    522 
    523 	stop_ksm_children(child, num);
    524 
    525 	tst_res(TINFO, "KSM merging...");
    526 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    527 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing", "%ld", size * pages * num);
    528 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    529 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld", size * pages * num);
    530 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
    531 
    532 	resume_ksm_children(child, num);
    533 	stop_ksm_children(child, num);
    534 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
    535 
    536 	resume_ksm_children(child, num);
    537 	stop_ksm_children(child, num);
    538 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
    539 
    540 	resume_ksm_children(child, num);
    541 	stop_ksm_children(child, num);
    542 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
    543 
    544 	resume_ksm_children(child, num);
    545 	stop_ksm_children(child, num);
    546 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
    547 
    548 	tst_res(TINFO, "KSM unmerging...");
    549 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    550 
    551 	resume_ksm_children(child, num);
    552 	final_group_check(2, 0, 0, 0, 0, 0, size * pages * num);
    553 
    554 	tst_res(TINFO, "stop KSM.");
    555 	SAFE_FILE_PRINTF(PATH_KSM "run", "0");
    556 	final_group_check(0, 0, 0, 0, 0, 0, size * pages * num);
    557 
    558 	while (waitpid(-1, &status, 0) > 0)
    559 		if (WEXITSTATUS(status) != 0)
    560 			tst_res(TFAIL, "child exit status is %d",
    561 				 WEXITSTATUS(status));
    562 }
    563 
    564 void test_ksm_merge_across_nodes(unsigned long nr_pages)
    565 {
    566 	char **memory;
    567 	int i, ret;
    568 	int num_nodes, *nodes;
    569 	unsigned long length;
    570 	unsigned long pagesize;
    571 
    572 #ifdef HAVE_NUMA_V2
    573 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    574 #endif
    575 
    576 	ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    577 	if (ret != 0)
    578 		tst_brk(TBROK|TERRNO, "get_allowed_nodes_arr");
    579 	if (num_nodes < 2) {
    580 		tst_res(TINFO, "need NUMA system support");
    581 		free(nodes);
    582 		return;
    583 	}
    584 
    585 	pagesize = sysconf(_SC_PAGE_SIZE);
    586 	length = nr_pages * pagesize;
    587 
    588 	memory = SAFE_MALLOC(num_nodes * sizeof(char *));
    589 	for (i = 0; i < num_nodes; i++) {
    590 		memory[i] = SAFE_MMAP(NULL, length, PROT_READ|PROT_WRITE,
    591 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    592 #ifdef HAVE_MADV_MERGEABLE
    593 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
    594 			tst_brk(TBROK|TERRNO, "madvise");
    595 #endif
    596 
    597 #ifdef HAVE_NUMA_V2
    598 		clean_node(nmask);
    599 		set_node(nmask, nodes[i]);
    600 		/*
    601 		 * Use mbind() to make sure each node contains
    602 		 * length size memory.
    603 		 */
    604 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
    605 		if (ret == -1)
    606 			tst_brk(TBROK|TERRNO, "mbind");
    607 #endif
    608 
    609 		memset(memory[i], 10, length);
    610 	}
    611 
    612 	SAFE_FILE_PRINTF(PATH_KSM "sleep_millisecs", "0");
    613 	SAFE_FILE_PRINTF(PATH_KSM "pages_to_scan", "%ld",
    614 			 nr_pages * num_nodes);
    615 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    616 		SAFE_FILE_PRINTF(PATH_KSM "max_page_sharing",
    617 			"%ld", nr_pages * num_nodes);
    618 	/*
    619 	 * merge_across_nodes setting can be changed only when there
    620 	 * are no ksm shared pages in system, so set run 2 to unmerge
    621 	 * pages first, then to 1 after changing merge_across_nodes,
    622 	 * to remerge according to the new setting.
    623 	 */
    624 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    625 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=1");
    626 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "1");
    627 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    628 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
    629 		    nr_pages * num_nodes);
    630 
    631 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    632 	tst_res(TINFO, "Start to test KSM with merge_across_nodes=0");
    633 	SAFE_FILE_PRINTF(PATH_KSM "merge_across_nodes", "0");
    634 	SAFE_FILE_PRINTF(PATH_KSM "run", "1");
    635 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
    636 		    0, 0, 0, nr_pages * num_nodes);
    637 
    638 	SAFE_FILE_PRINTF(PATH_KSM "run", "2");
    639 }
    640 
    641 /* THP */
    642 
    643 /* cpuset/memcg */
    644 
    645 static void gather_node_cpus(char *cpus, long nd)
    646 {
    647 	int ncpus = 0;
    648 	int i;
    649 	long online;
    650 	char buf[BUFSIZ];
    651 	char path[BUFSIZ], path1[BUFSIZ];
    652 
    653 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
    654 		ncpus++;
    655 
    656 	for (i = 0; i < ncpus; i++) {
    657 		snprintf(path, BUFSIZ,
    658 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
    659 		if (path_exist(path)) {
    660 			snprintf(path1, BUFSIZ, "%s/online", path);
    661 			/*
    662 			 * if there is no online knob, then the cpu cannot
    663 			 * be taken offline
    664 			 */
    665 			if (path_exist(path1)) {
    666 				SAFE_FILE_SCANF(path1, "%ld", &online);
    667 				if (online == 0)
    668 					continue;
    669 			}
    670 			sprintf(buf, "%d,", i);
    671 			strcat(cpus, buf);
    672 		}
    673 	}
    674 	/* Remove the trailing comma. */
    675 	cpus[strlen(cpus) - 1] = '\0';
    676 }
    677 
    678 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
    679 {
    680 	int fd;
    681 	char path[BUFSIZ];
    682 
    683 	/*
    684 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    685 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    686 	 * for details
    687 	 */
    688 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    689 	fd = open(path, O_RDONLY);
    690 	if (fd == -1) {
    691 		if (errno == ENOENT) {
    692 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    693 				 prefix, filename);
    694 			fd = SAFE_OPEN(path, O_RDONLY);
    695 		} else
    696 			tst_brk(TBROK | TERRNO, "open %s", path);
    697 	}
    698 	if (read(fd, retbuf, BUFSIZ) < 0)
    699 		tst_brk(TBROK | TERRNO, "read %s", path);
    700 	close(fd);
    701 }
    702 
    703 void write_cpuset_files(char *prefix, char *filename, char *buf)
    704 {
    705 	int fd;
    706 	char path[BUFSIZ];
    707 
    708 	/*
    709 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    710 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    711 	 * for details
    712 	 */
    713 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    714 	fd = open(path, O_WRONLY);
    715 	if (fd == -1) {
    716 		if (errno == ENOENT) {
    717 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    718 				 prefix, filename);
    719 			fd = SAFE_OPEN(path, O_WRONLY);
    720 		} else
    721 			tst_brk(TBROK | TERRNO, "open %s", path);
    722 	}
    723 	SAFE_WRITE(1, fd, buf, strlen(buf));
    724 	close(fd);
    725 }
    726 
    727 void write_cpusets(long nd)
    728 {
    729 	char buf[BUFSIZ];
    730 	char cpus[BUFSIZ] = "";
    731 
    732 	snprintf(buf, BUFSIZ, "%ld", nd);
    733 	write_cpuset_files(CPATH_NEW, "mems", buf);
    734 
    735 	gather_node_cpus(cpus, nd);
    736 	/*
    737 	 * If the 'nd' node doesn't contain any CPUs,
    738 	 * the first ID of CPU '0' will be used as
    739 	 * the value of cpuset.cpus.
    740 	 */
    741 	if (strlen(cpus) != 0) {
    742 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
    743 	} else {
    744 		tst_res(TINFO, "No CPUs in the node%ld; "
    745 				"using only CPU0", nd);
    746 		write_cpuset_files(CPATH_NEW, "cpus", "0");
    747 	}
    748 
    749 	SAFE_FILE_PRINTF(CPATH_NEW "/tasks", "%d", getpid());
    750 }
    751 
    752 void umount_mem(char *path, char *path_new)
    753 {
    754 	FILE *fp;
    755 	int fd;
    756 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
    757 
    758 	/* Move all processes in task to its parent node. */
    759 	sprintf(s, "%s/tasks", path);
    760 	fd = open(s, O_WRONLY);
    761 	if (fd == -1)
    762 		tst_res(TWARN | TERRNO, "open %s", s);
    763 
    764 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
    765 	fp = fopen(s_new, "r");
    766 	if (fp == NULL)
    767 		tst_res(TWARN | TERRNO, "fopen %s", s_new);
    768 	if ((fd != -1) && (fp != NULL)) {
    769 		while (fgets(value, BUFSIZ, fp) != NULL)
    770 			if (write(fd, value, strlen(value) - 1)
    771 			    != (ssize_t)strlen(value) - 1)
    772 				tst_res(TWARN | TERRNO, "write %s", s);
    773 	}
    774 	if (fd != -1)
    775 		close(fd);
    776 	if (fp != NULL)
    777 		fclose(fp);
    778 	if (rmdir(path_new) == -1)
    779 		tst_res(TWARN | TERRNO, "rmdir %s", path_new);
    780 	if (umount(path) == -1)
    781 		tst_res(TWARN | TERRNO, "umount %s", path);
    782 	if (rmdir(path) == -1)
    783 		tst_res(TWARN | TERRNO, "rmdir %s", path);
    784 }
    785 
    786 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
    787 {
    788 	SAFE_MKDIR(path, 0777);
    789 	if (mount(name, path, fs, 0, options) == -1) {
    790 		if (errno == ENODEV) {
    791 			if (rmdir(path) == -1)
    792 				tst_res(TWARN | TERRNO, "rmdir %s failed",
    793 					 path);
    794 			tst_brk(TCONF,
    795 				 "file system %s is not configured in kernel",
    796 				 fs);
    797 		}
    798 		tst_brk(TBROK | TERRNO, "mount %s", path);
    799 	}
    800 	SAFE_MKDIR(path_new, 0777);
    801 }
    802 
    803 /* shared */
    804 
    805 /* Warning: *DO NOT* use this function in child */
    806 unsigned int get_a_numa_node(void)
    807 {
    808 	unsigned int nd1, nd2;
    809 	int ret;
    810 
    811 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
    812 	switch (ret) {
    813 	case 0:
    814 		break;
    815 	case -3:
    816 		tst_brk(TCONF, "requires a NUMA system.");
    817 	default:
    818 		tst_brk(TBROK | TERRNO, "1st get_allowed_nodes");
    819 	}
    820 
    821 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
    822 	switch (ret) {
    823 	case 0:
    824 		tst_res(TINFO, "get node%u.", nd1);
    825 		return nd1;
    826 	case -3:
    827 		tst_brk(TCONF, "requires a NUMA system that has "
    828 			 "at least one node with both memory and CPU "
    829 			 "available.");
    830 	default:
    831 		tst_brk(TBROK | TERRNO, "2nd get_allowed_nodes");
    832 	}
    833 
    834 	/* not reached */
    835 	abort();
    836 }
    837 
    838 int path_exist(const char *path, ...)
    839 {
    840 	va_list ap;
    841 	char pathbuf[PATH_MAX];
    842 
    843 	va_start(ap, path);
    844 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
    845 	va_end(ap);
    846 
    847 	return access(pathbuf, F_OK) == 0;
    848 }
    849 
    850 void set_sys_tune(char *sys_file, long tune, int check)
    851 {
    852 	long val;
    853 	char path[BUFSIZ];
    854 
    855 	tst_res(TINFO, "set %s to %ld", sys_file, tune);
    856 
    857 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    858 	SAFE_FILE_PRINTF(path, "%ld", tune);
    859 
    860 	if (check) {
    861 		val = get_sys_tune(sys_file);
    862 		if (val != tune)
    863 			tst_brk(TBROK, "%s = %ld, but expect %ld",
    864 				 sys_file, val, tune);
    865 	}
    866 }
    867 
    868 long get_sys_tune(char *sys_file)
    869 {
    870 	char path[BUFSIZ];
    871 	long tune;
    872 
    873 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    874 	SAFE_FILE_SCANF(path, "%ld", &tune);
    875 
    876 	return tune;
    877 }
    878 
    879 void update_shm_size(size_t * shm_size)
    880 {
    881 	size_t shmmax;
    882 
    883 	SAFE_FILE_SCANF(PATH_SHMMAX, "%zu", &shmmax);
    884 	if (*shm_size > shmmax) {
    885 		tst_res(TINFO, "Set shm_size to shmmax: %zu", shmmax);
    886 		*shm_size = shmmax;
    887 	}
    888 }
    889 
    890 int range_is_mapped(unsigned long low, unsigned long high)
    891 {
    892 	FILE *fp;
    893 
    894 	fp = fopen("/proc/self/maps", "r");
    895 	if (fp == NULL)
    896 		tst_brk(TBROK | TERRNO, "Failed to open /proc/self/maps.");
    897 
    898 	while (!feof(fp)) {
    899 		unsigned long start, end;
    900 		int ret;
    901 
    902 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
    903 		if (ret != 2) {
    904 			fclose(fp);
    905 			tst_brk(TBROK | TERRNO, "Couldn't parse /proc/self/maps line.");
    906 		}
    907 
    908 		if ((start >= low) && (start < high)) {
    909 			fclose(fp);
    910 			return 1;
    911 		}
    912 		if ((end >= low) && (end < high)) {
    913 			fclose(fp);
    914 			return 1;
    915 		}
    916 	}
    917 
    918 	fclose(fp);
    919 	return 0;
    920 }
    921