Home | History | Annotate | Download | only in lib
      1 #include "config.h"
      2 #include <sys/types.h>
      3 #include <sys/mman.h>
      4 #include <sys/mount.h>
      5 #include <sys/stat.h>
      6 #include <sys/wait.h>
      7 #include <errno.h>
      8 #include <fcntl.h>
      9 #if HAVE_NUMA_H
     10 #include <numa.h>
     11 #endif
     12 #if HAVE_NUMAIF_H
     13 #include <numaif.h>
     14 #endif
     15 #include <pthread.h>
     16 #include <stdarg.h>
     17 #include <stdio.h>
     18 #include <string.h>
     19 #include <unistd.h>
     20 
     21 #include "test.h"
     22 #include "safe_macros.h"
     23 #include "mem.h"
     24 #include "numa_helper.h"
     25 
     26 /* OOM */
     27 
     28 static int alloc_mem(long int length, int testcase)
     29 {
     30 	char *s;
     31 	long i, pagesz = getpagesize();
     32 	int loop = 10;
     33 
     34 	tst_resm(TINFO, "thread (%lx), allocating %ld bytes.",
     35 		(unsigned long) pthread_self(), length);
     36 
     37 	s = mmap(NULL, length, PROT_READ | PROT_WRITE,
     38 		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
     39 	if (s == MAP_FAILED)
     40 		return errno;
     41 
     42 	if (testcase == MLOCK) {
     43 		while (mlock(s, length) == -1 && loop > 0) {
     44 			if (EAGAIN != errno)
     45 				return errno;
     46 			usleep(300000);
     47 			loop--;
     48 		}
     49 	}
     50 
     51 #ifdef HAVE_MADV_MERGEABLE
     52 	if (testcase == KSM && madvise(s, length, MADV_MERGEABLE) == -1)
     53 		return errno;
     54 #endif
     55 	for (i = 0; i < length; i += pagesz)
     56 		s[i] = '\a';
     57 
     58 	return 0;
     59 }
     60 
     61 static void *child_alloc_thread(void *args)
     62 {
     63 	int ret = 0;
     64 
     65 	/* keep allocating until there's an error */
     66 	while (!ret)
     67 		ret = alloc_mem(LENGTH, (long)args);
     68 	exit(ret);
     69 }
     70 
     71 static void child_alloc(int testcase, int lite, int threads)
     72 {
     73 	int i;
     74 	pthread_t *th;
     75 
     76 	if (lite) {
     77 		int ret = alloc_mem(TESTMEM + MB, testcase);
     78 		exit(ret);
     79 	}
     80 
     81 	th = malloc(sizeof(pthread_t) * threads);
     82 	if (!th) {
     83 		tst_resm(TINFO | TERRNO, "malloc");
     84 		goto out;
     85 	}
     86 
     87 	for (i = 0; i < threads; i++) {
     88 		TEST(pthread_create(&th[i], NULL, child_alloc_thread,
     89 			(void *)((long)testcase)));
     90 		if (TEST_RETURN) {
     91 			tst_resm(TINFO | TRERRNO, "pthread_create");
     92 			/*
     93 			 * Keep going if thread other than first fails to
     94 			 * spawn due to lack of resources.
     95 			 */
     96 			if (i == 0 || TEST_RETURN != EAGAIN)
     97 				goto out;
     98 		}
     99 	}
    100 
    101 	/* wait for one of threads to exit whole process */
    102 	while (1)
    103 		sleep(1);
    104 out:
    105 	exit(1);
    106 }
    107 
    108 /*
    109  * oom - allocates memory according to specified testcase and checks
    110  *       desired outcome (e.g. child killed, operation failed with ENOMEM)
    111  * @testcase: selects how child allocates memory
    112  *            valid choices are: NORMAL, MLOCK and KSM
    113  * @lite: if non-zero, child makes only single TESTMEM+MB allocation
    114  *        if zero, child keeps allocating memory until it gets killed
    115  *        or some operation fails
    116  * @retcode: expected return code of child process
    117  *           if matches child ret code, this function reports PASS,
    118  *           otherwise it reports FAIL
    119  * @allow_sigkill: if zero and child is killed, this function reports FAIL
    120  *                 if non-zero, then if child is killed by SIGKILL
    121  *                 it is considered as PASS
    122  */
    123 void oom(int testcase, int lite, int retcode, int allow_sigkill)
    124 {
    125 	pid_t pid;
    126 	int status, threads;
    127 
    128 	switch (pid = fork()) {
    129 	case -1:
    130 		if (errno == retcode) {
    131 			tst_resm(TPASS | TERRNO, "fork");
    132 			return;
    133 		}
    134 		tst_brkm(TBROK | TERRNO, cleanup, "fork");
    135 	case 0:
    136 		threads = MAX(1, tst_ncpus() - 1);
    137 		child_alloc(testcase, lite, threads);
    138 	default:
    139 		break;
    140 	}
    141 
    142 	tst_resm(TINFO, "expected victim is %d.", pid);
    143 	if (waitpid(-1, &status, 0) == -1)
    144 		tst_brkm(TBROK | TERRNO, cleanup, "waitpid");
    145 
    146 	if (WIFSIGNALED(status)) {
    147 		if (allow_sigkill && WTERMSIG(status) == SIGKILL) {
    148 			tst_resm(TPASS, "victim signalled: (%d) %s",
    149 				SIGKILL,
    150 				tst_strsig(SIGKILL));
    151 		} else {
    152 			tst_resm(TFAIL, "victim signalled: (%d) %s",
    153 				WTERMSIG(status),
    154 				tst_strsig(WTERMSIG(status)));
    155 		}
    156 	} else if (WIFEXITED(status)) {
    157 		if (WEXITSTATUS(status) == retcode) {
    158 			tst_resm(TPASS, "victim retcode: (%d) %s",
    159 				retcode, strerror(retcode));
    160 		} else {
    161 			tst_resm(TFAIL, "victim unexpectedly ended with "
    162 				"retcode: %d, expected: %d",
    163 				WEXITSTATUS(status), retcode);
    164 		}
    165 	} else {
    166 		tst_resm(TFAIL, "victim unexpectedly ended");
    167 	}
    168 }
    169 
    170 static void set_global_mempolicy(int mempolicy)
    171 {
    172 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
    173 	&& HAVE_MPOL_CONSTANTS
    174 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    175 	int num_nodes, *nodes;
    176 	int ret;
    177 
    178 	if (mempolicy) {
    179 		ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    180 		if (ret != 0)
    181 			tst_brkm(TBROK|TERRNO, cleanup,
    182 				 "get_allowed_nodes_arr");
    183 		if (num_nodes < 2) {
    184 			tst_resm(TINFO, "mempolicy need NUMA system support");
    185 			free(nodes);
    186 			return;
    187 		}
    188 		switch(mempolicy) {
    189 		case MPOL_BIND:
    190 			/* bind the second node */
    191 			set_node(nmask, nodes[1]);
    192 			break;
    193 		case MPOL_INTERLEAVE:
    194 		case MPOL_PREFERRED:
    195 			if (num_nodes == 2) {
    196 				tst_resm(TINFO, "The mempolicy need "
    197 					 "more than 2 numa nodes");
    198 				free(nodes);
    199 				return;
    200 			} else {
    201 				/* Using the 2nd,3rd node */
    202 				set_node(nmask, nodes[1]);
    203 				set_node(nmask, nodes[2]);
    204 			}
    205 			break;
    206 		default:
    207 			tst_brkm(TBROK|TERRNO, cleanup, "Bad mempolicy mode");
    208 		}
    209 		if (set_mempolicy(mempolicy, nmask, MAXNODES) == -1)
    210 			tst_brkm(TBROK|TERRNO, cleanup, "set_mempolicy");
    211 	}
    212 #endif
    213 }
    214 
    215 void testoom(int mempolicy, int lite, int retcode, int allow_sigkill)
    216 {
    217 	int ksm_run_orig;
    218 
    219 	set_global_mempolicy(mempolicy);
    220 
    221 	tst_resm(TINFO, "start normal OOM testing.");
    222 	oom(NORMAL, lite, retcode, allow_sigkill);
    223 
    224 	tst_resm(TINFO, "start OOM testing for mlocked pages.");
    225 	oom(MLOCK, lite, retcode, allow_sigkill);
    226 
    227 	/*
    228 	 * Skip oom(KSM) if lite == 1, since limit_in_bytes may vary from
    229 	 * run to run, which isn't reliable for oom03 cgroup test.
    230 	 */
    231 	if (access(PATH_KSM, F_OK) == -1 || lite == 1) {
    232 		tst_resm(TINFO, "KSM is not configed or lite == 1, "
    233 			 "skip OOM test for KSM pags");
    234 	} else {
    235 		tst_resm(TINFO, "start OOM testing for KSM pages.");
    236 		SAFE_FILE_SCANF(cleanup, PATH_KSM "run", "%d", &ksm_run_orig);
    237 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
    238 		oom(KSM, lite, retcode, allow_sigkill);
    239 		SAFE_FILE_PRINTF(cleanup,PATH_KSM "run", "%d", ksm_run_orig);
    240 	}
    241 }
    242 
    243 /* KSM */
    244 
    245 static int max_page_sharing;
    246 
    247 void save_max_page_sharing(void)
    248 {
    249 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    250 	        SAFE_FILE_SCANF(NULL, PATH_KSM "max_page_sharing",
    251 	                        "%d", &max_page_sharing);
    252 }
    253 
    254 void restore_max_page_sharing(void)
    255 {
    256 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    257 	        FILE_PRINTF(PATH_KSM "max_page_sharing",
    258 	                         "%d", max_page_sharing);
    259 }
    260 
    261 static void check(char *path, long int value)
    262 {
    263 	char fullpath[BUFSIZ];
    264 	long actual_val;
    265 
    266 	snprintf(fullpath, BUFSIZ, PATH_KSM "%s", path);
    267 	SAFE_FILE_SCANF(cleanup, fullpath, "%ld", &actual_val);
    268 
    269 	tst_resm(TINFO, "%s is %ld.", path, actual_val);
    270 	if (actual_val != value)
    271 		tst_resm(TFAIL, "%s is not %ld.", path, value);
    272 }
    273 
    274 static void wait_ksmd_done(void)
    275 {
    276 	long pages_shared, pages_sharing, pages_volatile, pages_unshared;
    277 	long old_pages_shared = 0, old_pages_sharing = 0;
    278 	long old_pages_volatile = 0, old_pages_unshared = 0;
    279 	int changing = 1, count = 0;
    280 
    281 	while (changing) {
    282 		sleep(10);
    283 		count++;
    284 
    285 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_shared",
    286 				"%ld", &pages_shared);
    287 
    288 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_sharing",
    289 				"%ld", &pages_sharing);
    290 
    291 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_volatile",
    292 				"%ld", &pages_volatile);
    293 
    294 		SAFE_FILE_SCANF(cleanup, PATH_KSM "pages_unshared",
    295 				"%ld", &pages_unshared);
    296 
    297 		if (pages_shared != old_pages_shared ||
    298 		    pages_sharing != old_pages_sharing ||
    299 		    pages_volatile != old_pages_volatile ||
    300 		    pages_unshared != old_pages_unshared) {
    301 			old_pages_shared = pages_shared;
    302 			old_pages_sharing = pages_sharing;
    303 			old_pages_volatile = pages_volatile;
    304 			old_pages_unshared = pages_unshared;
    305 		} else {
    306 			changing = 0;
    307 		}
    308 	}
    309 
    310 	tst_resm(TINFO, "ksm daemon takes %ds to scan all mergeable pages",
    311 		 count * 10);
    312 }
    313 
    314 static void group_check(int run, int pages_shared, int pages_sharing,
    315 			 int pages_volatile, int pages_unshared,
    316 			 int sleep_millisecs, int pages_to_scan)
    317 {
    318 	/* wait for ksm daemon to scan all mergeable pages. */
    319 	wait_ksmd_done();
    320 
    321 	tst_resm(TINFO, "check!");
    322 	check("run", run);
    323 	check("pages_shared", pages_shared);
    324 	check("pages_sharing", pages_sharing);
    325 	check("pages_volatile", pages_volatile);
    326 	check("pages_unshared", pages_unshared);
    327 	check("sleep_millisecs", sleep_millisecs);
    328 	check("pages_to_scan", pages_to_scan);
    329 }
    330 
    331 static void verify(char **memory, char value, int proc,
    332 		    int start, int end, int start2, int end2)
    333 {
    334 	int i, j;
    335 	void *s = NULL;
    336 
    337 	s = malloc((end - start) * (end2 - start2));
    338 	if (s == NULL)
    339 		tst_brkm(TBROK | TERRNO, tst_exit, "malloc");
    340 
    341 	tst_resm(TINFO, "child %d verifies memory content.", proc);
    342 	memset(s, value, (end - start) * (end2 - start2));
    343 	if (memcmp(memory[start], s, (end - start) * (end2 - start2))
    344 	    != 0)
    345 		for (j = start; j < end; j++)
    346 			for (i = start2; i < end2; i++)
    347 				if (memory[j][i] != value)
    348 					tst_resm(TFAIL, "child %d has %c at "
    349 						 "%d,%d,%d.",
    350 						 proc, memory[j][i], proc,
    351 						 j, i);
    352 	free(s);
    353 }
    354 
    355 void write_memcg(void)
    356 {
    357 	SAFE_FILE_PRINTF(NULL, MEMCG_LIMIT, "%ld", TESTMEM);
    358 
    359 	SAFE_FILE_PRINTF(NULL, MEMCG_PATH_NEW "/tasks", "%d", getpid());
    360 }
    361 
    362 struct ksm_merge_data {
    363 	char data;
    364 	unsigned int mergeable_size;
    365 };
    366 
    367 static void ksm_child_memset(int child_num, int size, int total_unit,
    368 		 struct ksm_merge_data ksm_merge_data, char **memory)
    369 {
    370 	int i = 0, j;
    371 	int unit = size / total_unit;
    372 
    373 	tst_resm(TINFO, "child %d continues...", child_num);
    374 
    375 	if (ksm_merge_data.mergeable_size == size * MB) {
    376 		tst_resm(TINFO, "child %d allocates %d MB filled with '%c'",
    377 			child_num, size, ksm_merge_data.data);
    378 
    379 	} else {
    380 		tst_resm(TINFO, "child %d allocates %d MB filled with '%c'"
    381 				" except one page with 'e'",
    382 				child_num, size, ksm_merge_data.data);
    383 	}
    384 
    385 	for (j = 0; j < total_unit; j++) {
    386 		for (i = 0; (unsigned int)i < unit * MB; i++)
    387 			memory[j][i] = ksm_merge_data.data;
    388 	}
    389 
    390 	/* if it contains unshared page, then set 'e' char
    391 	 * at the end of the last page
    392 	 */
    393 	if (ksm_merge_data.mergeable_size < size * MB)
    394 		memory[j-1][i-1] = 'e';
    395 }
    396 
    397 static void create_ksm_child(int child_num, int size, int unit,
    398 		       struct ksm_merge_data *ksm_merge_data)
    399 {
    400 	int j, total_unit;
    401 	char **memory;
    402 
    403 	/* The total units in all */
    404 	total_unit = size / unit;
    405 
    406 	/* Apply for the space for memory */
    407 	memory = malloc(total_unit * sizeof(char *));
    408 	for (j = 0; j < total_unit; j++) {
    409 		memory[j] = mmap(NULL, unit * MB, PROT_READ|PROT_WRITE,
    410 			MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    411 		if (memory[j] == MAP_FAILED)
    412 			tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
    413 #ifdef HAVE_MADV_MERGEABLE
    414 		if (madvise(memory[j], unit * MB, MADV_MERGEABLE) == -1)
    415 			tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
    416 #endif
    417 	}
    418 
    419 	tst_resm(TINFO, "child %d stops.", child_num);
    420 	if (raise(SIGSTOP) == -1)
    421 		tst_brkm(TBROK|TERRNO, tst_exit, "kill");
    422 	fflush(stdout);
    423 
    424 	for (j = 0; j < 4; j++) {
    425 
    426 		ksm_child_memset(child_num, size, total_unit,
    427 				  ksm_merge_data[j], memory);
    428 
    429 		fflush(stdout);
    430 
    431 		tst_resm(TINFO, "child %d stops.", child_num);
    432 		if (raise(SIGSTOP) == -1)
    433 			tst_brkm(TBROK|TERRNO, tst_exit, "kill");
    434 
    435 		if (ksm_merge_data[j].mergeable_size < size * MB) {
    436 			verify(memory, 'e', child_num, total_unit - 1,
    437 				total_unit, unit * MB - 1, unit * MB);
    438 			verify(memory, ksm_merge_data[j].data, child_num,
    439 				0, total_unit, 0, unit * MB - 1);
    440 		} else {
    441 			verify(memory, ksm_merge_data[j].data, child_num,
    442 				0, total_unit, 0, unit * MB);
    443 		}
    444 	}
    445 
    446 	tst_resm(TINFO, "child %d finished.", child_num);
    447 }
    448 
    449 static void stop_ksm_children(int *child, int num)
    450 {
    451 	int k, status;
    452 
    453 	tst_resm(TINFO, "wait for all children to stop.");
    454 	for (k = 0; k < num; k++) {
    455 		if (waitpid(child[k], &status, WUNTRACED) == -1)
    456 			tst_brkm(TBROK|TERRNO, cleanup, "waitpid");
    457 		if (!WIFSTOPPED(status))
    458 			tst_brkm(TBROK, cleanup, "child %d was not stopped", k);
    459 	}
    460 }
    461 
    462 static void resume_ksm_children(int *child, int num)
    463 {
    464 	int k;
    465 
    466 	tst_resm(TINFO, "resume all children.");
    467 	for (k = 0; k < num; k++) {
    468 		if (kill(child[k], SIGCONT) == -1)
    469 			tst_brkm(TBROK|TERRNO, cleanup, "kill child[%d]", k);
    470 	}
    471 	fflush(stdout);
    472 }
    473 
    474 void create_same_memory(int size, int num, int unit)
    475 {
    476 	int i, j, status, *child;
    477 	unsigned long ps, pages;
    478 	struct ksm_merge_data **ksm_data;
    479 
    480 	struct ksm_merge_data ksm_data0[] = {
    481 	       {'c', size*MB}, {'c', size*MB}, {'d', size*MB}, {'d', size*MB},
    482 	};
    483 	struct ksm_merge_data ksm_data1[] = {
    484 	       {'a', size*MB}, {'b', size*MB}, {'d', size*MB}, {'d', size*MB-1},
    485 	};
    486 	struct ksm_merge_data ksm_data2[] = {
    487 	       {'a', size*MB}, {'a', size*MB}, {'d', size*MB}, {'d', size*MB},
    488 	};
    489 
    490 	ps = sysconf(_SC_PAGE_SIZE);
    491 	pages = MB / ps;
    492 
    493 	ksm_data = malloc((num - 3) * sizeof(struct ksm_merge_data *));
    494 	/* Since from third child, the data is same with the first child's */
    495 	for (i = 0; i < num - 3; i++) {
    496 		ksm_data[i] = malloc(4 * sizeof(struct ksm_merge_data));
    497 		for (j = 0; j < 4; j++) {
    498 			ksm_data[i][j].data = ksm_data0[j].data;
    499 			ksm_data[i][j].mergeable_size =
    500 				ksm_data0[j].mergeable_size;
    501 		}
    502 	}
    503 
    504 	child = malloc(num * sizeof(int));
    505 	if (child == NULL)
    506 		tst_brkm(TBROK | TERRNO, cleanup, "malloc");
    507 
    508 	for (i = 0; i < num; i++) {
    509 		fflush(stdout);
    510 		switch (child[i] = fork()) {
    511 		case -1:
    512 			tst_brkm(TBROK|TERRNO, cleanup, "fork");
    513 		case 0:
    514 			if (i == 0) {
    515 				create_ksm_child(i, size, unit, ksm_data0);
    516 				exit(0);
    517 			} else if (i == 1) {
    518 				create_ksm_child(i, size, unit, ksm_data1);
    519 				exit(0);
    520 			} else if (i == 2) {
    521 				create_ksm_child(i, size, unit, ksm_data2);
    522 				exit(0);
    523 			} else {
    524 				create_ksm_child(i, size, unit, ksm_data[i-3]);
    525 				exit(0);
    526 			}
    527 		}
    528 	}
    529 
    530 	stop_ksm_children(child, num);
    531 
    532 	tst_resm(TINFO, "KSM merging...");
    533 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    534 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
    535 				"%ld", size * pages * num);
    536 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
    537 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
    538 			 size * pages * num);
    539 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
    540 
    541 	resume_ksm_children(child, num);
    542 	group_check(1, 2, size * num * pages - 2, 0, 0, 0, size * pages * num);
    543 
    544 	stop_ksm_children(child, num);
    545 	resume_ksm_children(child, num);
    546 	group_check(1, 3, size * num * pages - 3, 0, 0, 0, size * pages * num);
    547 
    548 	stop_ksm_children(child, num);
    549 	resume_ksm_children(child, num);
    550 	group_check(1, 1, size * num * pages - 1, 0, 0, 0, size * pages * num);
    551 
    552 	stop_ksm_children(child, num);
    553 	resume_ksm_children(child, num);
    554 	group_check(1, 1, size * num * pages - 2, 0, 1, 0, size * pages * num);
    555 
    556 	stop_ksm_children(child, num);
    557 
    558 	tst_resm(TINFO, "KSM unmerging...");
    559 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
    560 
    561 	resume_ksm_children(child, num);
    562 	group_check(2, 0, 0, 0, 0, 0, size * pages * num);
    563 
    564 	tst_resm(TINFO, "stop KSM.");
    565 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "0");
    566 	group_check(0, 0, 0, 0, 0, 0, size * pages * num);
    567 
    568 	while (waitpid(-1, &status, WUNTRACED | WCONTINUED) > 0)
    569 		if (WEXITSTATUS(status) != 0)
    570 			tst_resm(TFAIL, "child exit status is %d",
    571 				 WEXITSTATUS(status));
    572 }
    573 
    574 void test_ksm_merge_across_nodes(unsigned long nr_pages)
    575 {
    576 	char **memory;
    577 	int i, ret;
    578 	int num_nodes, *nodes;
    579 	unsigned long length;
    580 	unsigned long pagesize;
    581 
    582 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
    583 	&& HAVE_MPOL_CONSTANTS
    584 	unsigned long nmask[MAXNODES / BITS_PER_LONG] = { 0 };
    585 #endif
    586 
    587 	ret = get_allowed_nodes_arr(NH_MEMS|NH_CPUS, &num_nodes, &nodes);
    588 	if (ret != 0)
    589 		tst_brkm(TBROK|TERRNO, cleanup, "get_allowed_nodes_arr");
    590 	if (num_nodes < 2) {
    591 		tst_resm(TINFO, "need NUMA system support");
    592 		free(nodes);
    593 		return;
    594 	}
    595 
    596 	pagesize = sysconf(_SC_PAGE_SIZE);
    597 	length = nr_pages * pagesize;
    598 
    599 	memory = malloc(num_nodes * sizeof(char *));
    600 	for (i = 0; i < num_nodes; i++) {
    601 		memory[i] = mmap(NULL, length, PROT_READ|PROT_WRITE,
    602 			    MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
    603 		if (memory[i] == MAP_FAILED)
    604 			tst_brkm(TBROK|TERRNO, tst_exit, "mmap");
    605 #ifdef HAVE_MADV_MERGEABLE
    606 		if (madvise(memory[i], length, MADV_MERGEABLE) == -1)
    607 			tst_brkm(TBROK|TERRNO, tst_exit, "madvise");
    608 #endif
    609 
    610 #if HAVE_NUMA_H && HAVE_LINUX_MEMPOLICY_H && HAVE_NUMAIF_H \
    611 	&& HAVE_MPOL_CONSTANTS
    612 		clean_node(nmask);
    613 		set_node(nmask, nodes[i]);
    614 		/*
    615 		 * Use mbind() to make sure each node contains
    616 		 * length size memory.
    617 		 */
    618 		ret = mbind(memory[i], length, MPOL_BIND, nmask, MAXNODES, 0);
    619 		if (ret == -1)
    620 			tst_brkm(TBROK|TERRNO, tst_exit, "mbind");
    621 #endif
    622 
    623 		memset(memory[i], 10, length);
    624 	}
    625 
    626 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "sleep_millisecs", "0");
    627 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "pages_to_scan", "%ld",
    628 			 nr_pages * num_nodes);
    629 	if (access(PATH_KSM "max_page_sharing", F_OK) == 0)
    630 		SAFE_FILE_PRINTF(cleanup, PATH_KSM "max_page_sharing",
    631 			"%ld", nr_pages * num_nodes);
    632 	/*
    633 	 * merge_across_nodes setting can be changed only when there
    634 	 * are no ksm shared pages in system, so set run 2 to unmerge
    635 	 * pages first, then to 1 after changing merge_across_nodes,
    636 	 * to remerge according to the new setting.
    637 	 */
    638 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
    639 	wait_ksmd_done();
    640 	tst_resm(TINFO, "Start to test KSM with merge_across_nodes=1");
    641 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "1");
    642 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
    643 	group_check(1, 1, nr_pages * num_nodes - 1, 0, 0, 0,
    644 		    nr_pages * num_nodes);
    645 
    646 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
    647 	wait_ksmd_done();
    648 	tst_resm(TINFO, "Start to test KSM with merge_across_nodes=0");
    649 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "merge_across_nodes", "0");
    650 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "1");
    651 	group_check(1, num_nodes, nr_pages * num_nodes - num_nodes,
    652 		    0, 0, 0, nr_pages * num_nodes);
    653 
    654 	SAFE_FILE_PRINTF(cleanup, PATH_KSM "run", "2");
    655 	wait_ksmd_done();
    656 }
    657 
    658 void check_ksm_options(int *size, int *num, int *unit)
    659 {
    660 	if (opt_size) {
    661 		*size = atoi(opt_sizestr);
    662 		if (*size < 1)
    663 			tst_brkm(TBROK, cleanup, "size cannot be less than 1.");
    664 	}
    665 	if (opt_unit) {
    666 		*unit = atoi(opt_unitstr);
    667 		if (*unit > *size)
    668 			tst_brkm(TBROK, cleanup,
    669 				 "unit cannot be greater than size.");
    670 		if (*size % *unit != 0)
    671 			tst_brkm(TBROK, cleanup,
    672 				 "the remainder of division of size by unit is "
    673 				 "not zero.");
    674 	}
    675 	if (opt_num) {
    676 		*num = atoi(opt_numstr);
    677 		if (*num < 3)
    678 			tst_brkm(TBROK, cleanup,
    679 				 "process number cannot be less 3.");
    680 	}
    681 }
    682 
    683 void ksm_usage(void)
    684 {
    685 	printf("  -n      Number of processes\n");
    686 	printf("  -s      Memory allocation size in MB\n");
    687 	printf("  -u      Memory allocation unit in MB\n");
    688 }
    689 
    690 /* THP */
    691 
    692 /* cpuset/memcg */
    693 
    694 static void gather_node_cpus(char *cpus, long nd)
    695 {
    696 	int ncpus = 0;
    697 	int i;
    698 	long online;
    699 	char buf[BUFSIZ];
    700 	char path[BUFSIZ], path1[BUFSIZ];
    701 
    702 	while (path_exist(PATH_SYS_SYSTEM "/cpu/cpu%d", ncpus))
    703 		ncpus++;
    704 
    705 	for (i = 0; i < ncpus; i++) {
    706 		snprintf(path, BUFSIZ,
    707 			 PATH_SYS_SYSTEM "/node/node%ld/cpu%d", nd, i);
    708 		if (path_exist(path)) {
    709 			snprintf(path1, BUFSIZ, "%s/online", path);
    710 			/*
    711 			 * if there is no online knob, then the cpu cannot
    712 			 * be taken offline
    713 			 */
    714 			if (path_exist(path1)) {
    715 				SAFE_FILE_SCANF(cleanup, path1, "%ld", &online);
    716 				if (online == 0)
    717 					continue;
    718 			}
    719 			sprintf(buf, "%d,", i);
    720 			strcat(cpus, buf);
    721 		}
    722 	}
    723 	/* Remove the trailing comma. */
    724 	cpus[strlen(cpus) - 1] = '\0';
    725 }
    726 
    727 void read_cpuset_files(char *prefix, char *filename, char *retbuf)
    728 {
    729 	int fd;
    730 	char path[BUFSIZ];
    731 
    732 	/*
    733 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    734 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    735 	 * for details
    736 	 */
    737 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    738 	fd = open(path, O_RDONLY);
    739 	if (fd == -1) {
    740 		if (errno == ENOENT) {
    741 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    742 				 prefix, filename);
    743 			fd = open(path, O_RDONLY);
    744 			if (fd == -1)
    745 				tst_brkm(TBROK | TERRNO, cleanup,
    746 					 "open %s", path);
    747 		} else
    748 			tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
    749 	}
    750 	if (read(fd, retbuf, BUFSIZ) < 0)
    751 		tst_brkm(TBROK | TERRNO, cleanup, "read %s", path);
    752 	close(fd);
    753 }
    754 
    755 void write_cpuset_files(char *prefix, char *filename, char *buf)
    756 {
    757 	int fd;
    758 	char path[BUFSIZ];
    759 
    760 	/*
    761 	 * try either '/dev/cpuset/XXXX' or '/dev/cpuset/cpuset.XXXX'
    762 	 * please see Documentation/cgroups/cpusets.txt from kernel src
    763 	 * for details
    764 	 */
    765 	snprintf(path, BUFSIZ, "%s/%s", prefix, filename);
    766 	fd = open(path, O_WRONLY);
    767 	if (fd == -1) {
    768 		if (errno == ENOENT) {
    769 			snprintf(path, BUFSIZ, "%s/cpuset.%s",
    770 				 prefix, filename);
    771 			fd = open(path, O_WRONLY);
    772 			if (fd == -1)
    773 				tst_brkm(TBROK | TERRNO, cleanup,
    774 					 "open %s", path);
    775 		} else
    776 			tst_brkm(TBROK | TERRNO, cleanup, "open %s", path);
    777 	}
    778 	if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
    779 		tst_brkm(TBROK | TERRNO, cleanup, "write %s", path);
    780 	close(fd);
    781 }
    782 
    783 void write_cpusets(long nd)
    784 {
    785 	char buf[BUFSIZ];
    786 	char cpus[BUFSIZ] = "";
    787 
    788 	snprintf(buf, BUFSIZ, "%ld", nd);
    789 	write_cpuset_files(CPATH_NEW, "mems", buf);
    790 
    791 	gather_node_cpus(cpus, nd);
    792 	/*
    793 	 * If the 'nd' node doesn't contain any CPUs,
    794 	 * the first ID of CPU '0' will be used as
    795 	 * the value of cpuset.cpus.
    796 	 */
    797 	if (strlen(cpus) != 0) {
    798 		write_cpuset_files(CPATH_NEW, "cpus", cpus);
    799 	} else {
    800 		tst_resm(TINFO, "No CPUs in the node%ld; "
    801 				"using only CPU0", nd);
    802 		write_cpuset_files(CPATH_NEW, "cpus", "0");
    803 	}
    804 
    805 	SAFE_FILE_PRINTF(NULL, CPATH_NEW "/tasks", "%d", getpid());
    806 }
    807 
    808 void umount_mem(char *path, char *path_new)
    809 {
    810 	FILE *fp;
    811 	int fd;
    812 	char s_new[BUFSIZ], s[BUFSIZ], value[BUFSIZ];
    813 
    814 	/* Move all processes in task to its parent node. */
    815 	sprintf(s, "%s/tasks", path);
    816 	fd = open(s, O_WRONLY);
    817 	if (fd == -1)
    818 		tst_resm(TWARN | TERRNO, "open %s", s);
    819 
    820 	snprintf(s_new, BUFSIZ, "%s/tasks", path_new);
    821 	fp = fopen(s_new, "r");
    822 	if (fp == NULL)
    823 		tst_resm(TWARN | TERRNO, "fopen %s", s_new);
    824 	if ((fd != -1) && (fp != NULL)) {
    825 		while (fgets(value, BUFSIZ, fp) != NULL)
    826 			if (write(fd, value, strlen(value) - 1)
    827 			    != (ssize_t)strlen(value) - 1)
    828 				tst_resm(TWARN | TERRNO, "write %s", s);
    829 	}
    830 	if (fd != -1)
    831 		close(fd);
    832 	if (fp != NULL)
    833 		fclose(fp);
    834 	if (rmdir(path_new) == -1)
    835 		tst_resm(TWARN | TERRNO, "rmdir %s", path_new);
    836 	if (umount(path) == -1)
    837 		tst_resm(TWARN | TERRNO, "umount %s", path);
    838 	if (rmdir(path) == -1)
    839 		tst_resm(TWARN | TERRNO, "rmdir %s", path);
    840 }
    841 
    842 void mount_mem(char *name, char *fs, char *options, char *path, char *path_new)
    843 {
    844 	if (mkdir(path, 0777) == -1)
    845 		tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path);
    846 	if (mount(name, path, fs, 0, options) == -1) {
    847 		if (errno == ENODEV) {
    848 			if (rmdir(path) == -1)
    849 				tst_resm(TWARN | TERRNO, "rmdir %s failed",
    850 					 path);
    851 			tst_brkm(TCONF, NULL,
    852 				 "file system %s is not configured in kernel",
    853 				 fs);
    854 		}
    855 		tst_brkm(TBROK | TERRNO, cleanup, "mount %s", path);
    856 	}
    857 	if (mkdir(path_new, 0777) == -1)
    858 		tst_brkm(TBROK | TERRNO, cleanup, "mkdir %s", path_new);
    859 }
    860 
    861 /* shared */
    862 
    863 /* Warning: *DO NOT* use this function in child */
    864 unsigned int get_a_numa_node(void (*cleanup_fn) (void))
    865 {
    866 	unsigned int nd1, nd2;
    867 	int ret;
    868 
    869 	ret = get_allowed_nodes(0, 2, &nd1, &nd2);
    870 	switch (ret) {
    871 	case 0:
    872 		break;
    873 	case -3:
    874 		tst_brkm(TCONF, cleanup_fn, "requires a NUMA system.");
    875 	default:
    876 		tst_brkm(TBROK | TERRNO, cleanup_fn, "1st get_allowed_nodes");
    877 	}
    878 
    879 	ret = get_allowed_nodes(NH_MEMS | NH_CPUS, 1, &nd1);
    880 	switch (ret) {
    881 	case 0:
    882 		tst_resm(TINFO, "get node%u.", nd1);
    883 		return nd1;
    884 	case -3:
    885 		tst_brkm(TCONF, cleanup_fn, "requires a NUMA system that has "
    886 			 "at least one node with both memory and CPU "
    887 			 "available.");
    888 	default:
    889 		break;
    890 	}
    891 	tst_brkm(TBROK | TERRNO, cleanup_fn, "2nd get_allowed_nodes");
    892 }
    893 
    894 int path_exist(const char *path, ...)
    895 {
    896 	va_list ap;
    897 	char pathbuf[PATH_MAX];
    898 
    899 	va_start(ap, path);
    900 	vsnprintf(pathbuf, sizeof(pathbuf), path, ap);
    901 	va_end(ap);
    902 
    903 	return access(pathbuf, F_OK) == 0;
    904 }
    905 
    906 long read_meminfo(char *item)
    907 {
    908 	FILE *fp;
    909 	char line[BUFSIZ], buf[BUFSIZ];
    910 	long val;
    911 
    912 	fp = fopen(PATH_MEMINFO, "r");
    913 	if (fp == NULL)
    914 		tst_brkm(TBROK | TERRNO, cleanup, "fopen %s", PATH_MEMINFO);
    915 
    916 	while (fgets(line, BUFSIZ, fp) != NULL) {
    917 		if (sscanf(line, "%64s %ld", buf, &val) == 2)
    918 			if (strcmp(buf, item) == 0) {
    919 				fclose(fp);
    920 				return val;
    921 			}
    922 		continue;
    923 	}
    924 	fclose(fp);
    925 
    926 	tst_brkm(TBROK, cleanup, "cannot find \"%s\" in %s",
    927 		 item, PATH_MEMINFO);
    928 }
    929 
    930 void set_sys_tune(char *sys_file, long tune, int check)
    931 {
    932 	long val;
    933 	char path[BUFSIZ];
    934 
    935 	tst_resm(TINFO, "set %s to %ld", sys_file, tune);
    936 
    937 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    938 	SAFE_FILE_PRINTF(NULL, path, "%ld", tune);
    939 
    940 	if (check) {
    941 		val = get_sys_tune(sys_file);
    942 		if (val != tune)
    943 			tst_brkm(TBROK, cleanup, "%s = %ld, but expect %ld",
    944 				 sys_file, val, tune);
    945 	}
    946 }
    947 
    948 long get_sys_tune(char *sys_file)
    949 {
    950 	char path[BUFSIZ];
    951 	long tune;
    952 
    953 	snprintf(path, BUFSIZ, PATH_SYSVM "%s", sys_file);
    954 	SAFE_FILE_SCANF(NULL, path, "%ld", &tune);
    955 
    956 	return tune;
    957 }
    958 
    959 void update_shm_size(size_t * shm_size)
    960 {
    961 	size_t shmmax;
    962 
    963 	SAFE_FILE_SCANF(cleanup, PATH_SHMMAX, "%ld", &shmmax);
    964 	if (*shm_size > shmmax) {
    965 		tst_resm(TINFO, "Set shm_size to shmmax: %ld", shmmax);
    966 		*shm_size = shmmax;
    967 	}
    968 }
    969 
    970 int range_is_mapped(void (*cleanup_fn) (void), unsigned long low, unsigned long high)
    971 {
    972 	FILE *fp;
    973 
    974 	fp = fopen("/proc/self/maps", "r");
    975 	if (fp == NULL)
    976 		tst_brkm(TBROK | TERRNO, cleanup_fn, "Failed to open /proc/self/maps.");
    977 
    978 	while (!feof(fp)) {
    979 		unsigned long start, end;
    980 		int ret;
    981 
    982 		ret = fscanf(fp, "%lx-%lx %*[^\n]\n", &start, &end);
    983 		if (ret != 2) {
    984 			fclose(fp);
    985 			tst_brkm(TBROK | TERRNO, cleanup_fn, "Couldn't parse /proc/self/maps line.");
    986 		}
    987 
    988 		if ((start >= low) && (start < high)) {
    989 			fclose(fp);
    990 			return 1;
    991 		}
    992 		if ((end >= low) && (end < high)) {
    993 			fclose(fp);
    994 			return 1;
    995 		}
    996 	}
    997 
    998 	fclose(fp);
    999 	return 0;
   1000 }
   1001