1 /* 2 * Copyright (C) 2012 Linux Test Project, Inc. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation; either version 2 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 12 * the GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19 #include "config.h" 20 #include <errno.h> 21 #if HAVE_NUMA_H 22 #include <numa.h> 23 #endif 24 #if HAVE_NUMAIF_H 25 #include <numaif.h> 26 #endif 27 #include <stdarg.h> 28 #include <stdio.h> 29 #include <string.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include <errno.h> 33 34 #include "test.h" 35 #include "safe_macros.h" 36 #include "numa_helper.h" 37 #include "linux_syscall_numbers.h" 38 39 unsigned long get_max_node(void) 40 { 41 unsigned long max_node = 0; 42 #if HAVE_NUMA_H 43 #if !defined(LIBNUMA_API_VERSION) || LIBNUMA_API_VERSION < 2 44 max_node = NUMA_NUM_NODES; 45 /* 46 * NUMA_NUM_NODES is not reliable, libnuma >=2 is looking 47 * at /proc/self/status to figure out correct number. 48 * If buffer is not large enough get_mempolicy will fail with EINVAL. 49 */ 50 if (max_node < 1024) 51 max_node = 1024; 52 #else 53 max_node = numa_max_possible_node() + 1; 54 #endif 55 #endif /* HAVE_NUMA_H */ 56 return max_node; 57 } 58 59 #if HAVE_NUMA_H 60 static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node) 61 { 62 unsigned long nodemask_size = max_node / 8; 63 int i; 64 char fn[64]; 65 struct stat st; 66 67 memset(nodemask, 0, nodemask_size); 68 for (i = 0; i < max_node; i++) { 69 sprintf(fn, "/sys/devices/system/node/node%d", i); 70 if (stat(fn, &st) == 0) 71 nodemask_set(nodemask, i); 72 } 73 } 74 75 static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node) 76 { 77 #if MPOL_F_MEMS_ALLOWED 78 unsigned long nodemask_size = max_node / 8; 79 memset(nodemask, 0, nodemask_size); 80 /* 81 * avoid numa_get_mems_allowed(), because of bug in getpol() 82 * utility function in older versions: 83 * http://www.spinics.net/lists/linux-numa/msg00849.html 84 * 85 * At the moment numa_available() implementation also uses 86 * get_mempolicy, but let's make explicit check for ENOSYS 87 * here as well in case it changes in future. Silent ignore 88 * of ENOSYS is OK, because without NUMA caller gets empty 89 * set of nodes anyway. 90 */ 91 if (syscall(__NR_get_mempolicy, NULL, nodemask->n, 92 max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) { 93 if (errno == ENOSYS) 94 return 0; 95 return -2; 96 } 97 #else 98 int i; 99 /* 100 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume 101 * that we can use any node with memory > 0 102 */ 103 for (i = 0; i < max_node; i++) { 104 if (!nodemask_isset(nodemask, i)) 105 continue; 106 if (numa_node_size64(i, NULL) <= 0) 107 nodemask_clr(nodemask, i); 108 } 109 #endif /* MPOL_F_MEMS_ALLOWED */ 110 return 0; 111 } 112 113 static int cpumask_has_cpus(char *cpumask, size_t len) 114 { 115 int j; 116 for (j = 0; j < len; j++) 117 if (cpumask[j] == '\0') 118 return 0; 119 else if ((cpumask[j] > '0' && cpumask[j] <= '9') || 120 (cpumask[j] >= 'a' && cpumask[j] <= 'f')) 121 return 1; 122 return 0; 123 124 } 125 126 static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node) 127 { 128 char *cpumask = NULL; 129 char fn[64]; 130 FILE *f; 131 size_t len; 132 int i, ret; 133 134 for (i = 0; i < max_node; i++) { 135 if (!nodemask_isset(nodemask, i)) 136 continue; 137 sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i); 138 f = fopen(fn, "r"); 139 if (f) { 140 ret = getdelim(&cpumask, &len, '\n', f); 141 if ((ret > 0) && (!cpumask_has_cpus(cpumask, len))) 142 nodemask_clr(nodemask, i); 143 fclose(f); 144 } 145 } 146 free(cpumask); 147 } 148 #endif /* HAVE_NUMA_H */ 149 150 /* 151 * get_allowed_nodes_arr - get number and array of available nodes 152 * @num_nodes: pointer where number of available nodes will be stored 153 * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED 154 * node bitmask compacted (without holes), so that each field 155 * contains node number. If NULL only num_nodes is 156 * returned, otherwise it cotains new allocated array, 157 * which caller is responsible to free. 158 * RETURNS: 159 * 0 on success 160 * -1 on allocation failure 161 * -2 on get_mempolicy failure 162 */ 163 int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes) 164 { 165 int ret = 0; 166 #if HAVE_NUMA_H 167 int i; 168 nodemask_t *nodemask = NULL; 169 #endif 170 *num_nodes = 0; 171 if (nodes) 172 *nodes = NULL; 173 174 #if HAVE_NUMA_H 175 unsigned long max_node, nodemask_size; 176 177 if (numa_available() == -1) 178 return 0; 179 180 max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8); 181 nodemask_size = max_node / 8; 182 183 nodemask = malloc(nodemask_size); 184 if (nodes) 185 *nodes = malloc(sizeof(int) * max_node); 186 187 do { 188 if (nodemask == NULL || (nodes && (*nodes == NULL))) { 189 ret = -1; 190 break; 191 } 192 193 /* allow all nodes at start, then filter based on flags */ 194 get_nodemask_allnodes(nodemask, max_node); 195 if ((flag & NH_MEMS) == NH_MEMS) { 196 ret = filter_nodemask_mem(nodemask, max_node); 197 if (ret < 0) 198 break; 199 } 200 if ((flag & NH_CPUS) == NH_CPUS) 201 filter_nodemask_cpu(nodemask, max_node); 202 203 for (i = 0; i < max_node; i++) { 204 if (nodemask_isset(nodemask, i)) { 205 if (nodes) 206 (*nodes)[*num_nodes] = i; 207 (*num_nodes)++; 208 } 209 } 210 } while (0); 211 free(nodemask); 212 #endif 213 return ret; 214 } 215 216 /* 217 * get_allowed_nodes - convenience function to get fixed number of nodes 218 * @count: how many nodes to get 219 * @...: int pointers, where node ids will be stored 220 * RETURNS: 221 * 0 on success 222 * -1 on allocation failure 223 * -2 on get_mempolicy failure 224 * -3 on not enough allowed nodes 225 */ 226 int get_allowed_nodes(int flag, int count, ...) 227 { 228 int ret; 229 int i, *nodep; 230 va_list ap; 231 int num_nodes = 0; 232 int *nodes = NULL; 233 234 ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes); 235 if (ret < 0) 236 return ret; 237 238 va_start(ap, count); 239 for (i = 0; i < count; i++) { 240 nodep = va_arg(ap, int *); 241 if (i < num_nodes) { 242 *nodep = nodes[i]; 243 } else { 244 ret = -3; 245 errno = EINVAL; 246 break; 247 } 248 } 249 free(nodes); 250 va_end(ap); 251 252 return ret; 253 } 254 255 static void print_node_info(int flag) 256 { 257 int *allowed_nodes = NULL; 258 int i, ret, num_nodes; 259 260 ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes); 261 printf("nodes (flag=%d): ", flag); 262 if (ret == 0) { 263 for (i = 0; i < num_nodes; i++) 264 printf("%d ", allowed_nodes[i]); 265 printf("\n"); 266 } else 267 printf("error(%d)\n", ret); 268 free(allowed_nodes); 269 } 270 271 /* 272 * nh_dump_nodes - dump info about nodes to stdout 273 */ 274 void nh_dump_nodes(void) 275 { 276 print_node_info(0); 277 print_node_info(NH_MEMS); 278 print_node_info(NH_CPUS); 279 print_node_info(NH_MEMS | NH_CPUS); 280 } 281 282 /* 283 * is_numa - judge a system is NUMA system or not 284 * @flag: NH_MEMS and/or NH_CPUS 285 * @min_nodes: find at least 'min_nodes' nodes with memory 286 * NOTE: the function is designed to try to find at least 'min_nodes' 287 * available nodes, where each node contains memory. 288 * WARN: Don't use this func in child, as it calls tst_brkm() 289 * RETURNS: 290 * 0 - it's not a NUMA system 291 * 1 - it's a NUMA system 292 */ 293 int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes) 294 { 295 int ret; 296 int numa_nodes = 0; 297 298 ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL); 299 if (ret < 0) 300 tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr"); 301 302 if (numa_nodes >= min_nodes) 303 return 1; 304 else 305 return 0; 306 } 307