Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2012 Linux Test Project, Inc.
      3  *
      4  * This program is free software;  you can redistribute it and/or modify
      5  * it under the terms of the GNU General Public License as published by
      6  * the Free Software Foundation; either version 2 of the License, or
      7  * (at your option) any later version.
      8  *
      9  * This program is distributed in the hope that it will be useful,
     10  * but WITHOUT ANY WARRANTY;  without even the implied warranty of
     11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
     12  * the GNU General Public License for more details.
     13  *
     14  * You should have received a copy of the GNU General Public License
     15  * along with this program;  if not, write to the Free Software
     16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     17  */
     18 
     19 #include "config.h"
     20 #include <errno.h>
     21 #if HAVE_NUMA_H
     22 #include <numa.h>
     23 #endif
     24 #if HAVE_NUMAIF_H
     25 #include <numaif.h>
     26 #endif
     27 #include <stdarg.h>
     28 #include <stdio.h>
     29 #include <string.h>
     30 #include <stdlib.h>
     31 #include <unistd.h>
     32 #include <errno.h>
     33 
     34 #include "test.h"
     35 #include "safe_macros.h"
     36 #include "numa_helper.h"
     37 #include "linux_syscall_numbers.h"
     38 
     39 unsigned long get_max_node(void)
     40 {
     41 	unsigned long max_node = 0;
     42 #if HAVE_NUMA_H
     43 #if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2
     44 	max_node = NUMA_NUM_NODES;
     45 	/*
     46 	 * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking
     47 	 * at /proc/self/status to figure out correct number.
     48 	 * If buffer is not large enough get_mempolicy will fail with EINVAL.
     49 	 */
     50 	if (max_node < 1024)
     51 		max_node = 1024;
     52 #else
     53 	max_node = numa_max_possible_node() + 1;
     54 #endif
     55 #endif /* HAVE_NUMA_H */
     56 	return max_node;
     57 }
     58 
     59 #if HAVE_NUMA_H
     60 static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
     61 {
     62 	unsigned long nodemask_size = max_node / 8;
     63 	int i;
     64 	char fn[64];
     65 	struct stat st;
     66 
     67 	memset(nodemask, 0, nodemask_size);
     68 	for (i = 0; i < max_node; i++) {
     69 		sprintf(fn, "/sys/devices/system/node/node%d", i);
     70 		if (stat(fn, &st) == 0)
     71 			nodemask_set(nodemask, i);
     72 	}
     73 }
     74 
     75 static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
     76 {
     77 #if MPOL_F_MEMS_ALLOWED
     78 	unsigned long nodemask_size = max_node / 8;
     79 	memset(nodemask, 0, nodemask_size);
     80 	/*
     81 	 * avoid numa_get_mems_allowed(), because of bug in getpol()
     82 	 * utility function in older versions:
     83 	 * http://www.spinics.net/lists/linux-numa/msg00849.html
     84 	 *
     85 	 * At the moment numa_available() implementation also uses
     86 	 * get_mempolicy, but let's make explicit check for ENOSYS
     87 	 * here as well in case it changes in future. Silent ignore
     88 	 * of ENOSYS is OK, because without NUMA caller gets empty
     89 	 * set of nodes anyway.
     90 	 */
     91 	if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
     92 		    max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
     93 		if (errno == ENOSYS)
     94 			return 0;
     95 		return -2;
     96 	}
     97 #else
     98 	int i;
     99 	/*
    100 	 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
    101 	 * that we can use any node with memory > 0
    102 	 */
    103 	for (i = 0; i < max_node; i++) {
    104 		if (!nodemask_isset(nodemask, i))
    105 			continue;
    106 		if (numa_node_size64(i, NULL) <= 0)
    107 			nodemask_clr(nodemask, i);
    108 	}
    109 #endif /* MPOL_F_MEMS_ALLOWED */
    110 	return 0;
    111 }
    112 
    113 static int cpumask_has_cpus(char *cpumask, size_t len)
    114 {
    115 	int j;
    116 	for (j = 0; j < len; j++)
    117 		if (cpumask[j] == '\0')
    118 			return 0;
    119 		else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
    120 			 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
    121 			return 1;
    122 	return 0;
    123 
    124 }
    125 
    126 static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
    127 {
    128 	char *cpumask = NULL;
    129 	char fn[64];
    130 	FILE *f;
    131 	size_t len;
    132 	int i, ret;
    133 
    134 	for (i = 0; i < max_node; i++) {
    135 		if (!nodemask_isset(nodemask, i))
    136 			continue;
    137 		sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
    138 		f = fopen(fn, "r");
    139 		if (f) {
    140 			ret = getdelim(&cpumask, &len, '\n', f);
    141 			if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
    142 				nodemask_clr(nodemask, i);
    143 			fclose(f);
    144 		}
    145 	}
    146 	free(cpumask);
    147 }
    148 #endif /* HAVE_NUMA_H */
    149 
    150 /*
    151  * get_allowed_nodes_arr - get number and array of available nodes
    152  * @num_nodes: pointer where number of available nodes will be stored
    153  * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
    154  *                 node bitmask compacted (without holes), so that each field
    155  *                 contains node number. If NULL only num_nodes is
    156  *                 returned, otherwise it cotains new allocated array,
    157  *                 which caller is responsible to free.
    158  * RETURNS:
    159  *     0 on success
    160  *    -1 on allocation failure
    161  *    -2 on get_mempolicy failure
    162  */
    163 int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
    164 {
    165 	int ret = 0;
    166 #if HAVE_NUMA_H
    167 	int i;
    168 	nodemask_t *nodemask = NULL;
    169 #endif
    170 	*num_nodes = 0;
    171 	if (nodes)
    172 		*nodes = NULL;
    173 
    174 #if HAVE_NUMA_H
    175 	unsigned long max_node, nodemask_size;
    176 
    177 	if (numa_available() == -1)
    178 		return 0;
    179 
    180 	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
    181 	nodemask_size = max_node / 8;
    182 
    183 	nodemask = malloc(nodemask_size);
    184 	if (nodes)
    185 		*nodes = malloc(sizeof(int) * max_node);
    186 
    187 	do {
    188 		if (nodemask == NULL || (nodes && (*nodes == NULL))) {
    189 			ret = -1;
    190 			break;
    191 		}
    192 
    193 		/* allow all nodes at start, then filter based on flags */
    194 		get_nodemask_allnodes(nodemask, max_node);
    195 		if ((flag & NH_MEMS) == NH_MEMS) {
    196 			ret = filter_nodemask_mem(nodemask, max_node);
    197 			if (ret < 0)
    198 				break;
    199 		}
    200 		if ((flag & NH_CPUS) == NH_CPUS)
    201 			filter_nodemask_cpu(nodemask, max_node);
    202 
    203 		for (i = 0; i < max_node; i++) {
    204 			if (nodemask_isset(nodemask, i)) {
    205 				if (nodes)
    206 					(*nodes)[*num_nodes] = i;
    207 				(*num_nodes)++;
    208 			}
    209 		}
    210 	} while (0);
    211 	free(nodemask);
    212 #endif
    213 	return ret;
    214 }
    215 
    216 /*
    217  * get_allowed_nodes - convenience function to get fixed number of nodes
    218  * @count: how many nodes to get
    219  * @...: int pointers, where node ids will be stored
    220  * RETURNS:
    221  *     0 on success
    222  *    -1 on allocation failure
    223  *    -2 on get_mempolicy failure
    224  *    -3 on not enough allowed nodes
    225  */
    226 int get_allowed_nodes(int flag, int count, ...)
    227 {
    228 	int ret;
    229 	int i, *nodep;
    230 	va_list ap;
    231 	int num_nodes = 0;
    232 	int *nodes = NULL;
    233 
    234 	ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
    235 	if (ret < 0)
    236 		return ret;
    237 
    238 	va_start(ap, count);
    239 	for (i = 0; i < count; i++) {
    240 		nodep = va_arg(ap, int *);
    241 		if (i < num_nodes) {
    242 			*nodep = nodes[i];
    243 		} else {
    244 			ret = -3;
    245 			errno = EINVAL;
    246 			break;
    247 		}
    248 	}
    249 	free(nodes);
    250 	va_end(ap);
    251 
    252 	return ret;
    253 }
    254 
    255 static void print_node_info(int flag)
    256 {
    257 	int *allowed_nodes = NULL;
    258 	int i, ret, num_nodes;
    259 
    260 	ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
    261 	printf("nodes (flag=%d): ", flag);
    262 	if (ret == 0) {
    263 		for (i = 0; i < num_nodes; i++)
    264 			printf("%d ", allowed_nodes[i]);
    265 		printf("\n");
    266 	} else
    267 		printf("error(%d)\n", ret);
    268 	free(allowed_nodes);
    269 }
    270 
    271 /*
    272  * nh_dump_nodes - dump info about nodes to stdout
    273  */
    274 void nh_dump_nodes(void)
    275 {
    276 	print_node_info(0);
    277 	print_node_info(NH_MEMS);
    278 	print_node_info(NH_CPUS);
    279 	print_node_info(NH_MEMS | NH_CPUS);
    280 }
    281 
    282 /*
    283  * is_numa - judge a system is NUMA system or not
    284  * @flag: NH_MEMS and/or NH_CPUS
    285  * @min_nodes: find at least 'min_nodes' nodes with memory
    286  * NOTE: the function is designed to try to find at least 'min_nodes'
    287  * available nodes, where each node contains memory.
    288  * WARN: Don't use this func in child, as it calls tst_brkm()
    289  * RETURNS:
    290  *     0 - it's not a NUMA system
    291  *     1 - it's a NUMA system
    292  */
    293 int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
    294 {
    295 	int ret;
    296 	int numa_nodes = 0;
    297 
    298 	ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
    299 	if (ret < 0)
    300 		tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
    301 
    302 	if (numa_nodes >= min_nodes)
    303 		return 1;
    304 	else
    305 		return 0;
    306 }
    307