1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* ChangeLog for this library: 30 * 31 * NDK r8d: Add android_setCpu(). 32 * 33 * NDK r8c: Add new ARM CPU features: VFPv2, VFP_D32, VFP_FP16, 34 * VFP_FMA, NEON_FMA, IDIV_ARM, IDIV_THUMB2 and iWMMXt. 35 * 36 * Rewrite the code to parse /proc/self/auxv instead of 37 * the "Features" field in /proc/cpuinfo. 38 * 39 * Dynamically allocate the buffer that hold the content 40 * of /proc/cpuinfo to deal with newer hardware. 41 * 42 * NDK r7c: Fix CPU count computation. The old method only reported the 43 * number of _active_ CPUs when the library was initialized, 44 * which could be less than the real total. 45 * 46 * NDK r5: Handle buggy kernels which report a CPU Architecture number of 7 47 * for an ARMv6 CPU (see below). 48 * 49 * Handle kernels that only report 'neon', and not 'vfpv3' 50 * (VFPv3 is mandated by the ARM architecture is Neon is implemented) 51 * 52 * Handle kernels that only report 'vfpv3d16', and not 'vfpv3' 53 * 54 * Fix x86 compilation. Report ANDROID_CPU_FAMILY_X86 in 55 * android_getCpuFamily(). 56 * 57 * NDK r4: Initial release 58 */ 59 #include <sys/system_properties.h> 60 #ifdef __arm__ 61 #include <machine/cpu-features.h> 62 #endif 63 #include <pthread.h> 64 #include "cpu-features.h" 65 #include <stdio.h> 66 #include <stdlib.h> 67 #include <fcntl.h> 68 #include <errno.h> 69 70 static pthread_once_t g_once; 71 static int g_inited; 72 static AndroidCpuFamily g_cpuFamily; 73 static uint64_t g_cpuFeatures; 74 static int g_cpuCount; 75 76 #ifdef __arm__ 77 static uint32_t g_cpuIdArm; 78 #endif 79 80 static const int android_cpufeatures_debug = 0; 81 82 #ifdef __arm__ 83 # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_ARM 84 #elif defined __i386__ 85 # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_X86 86 #else 87 # define DEFAULT_CPU_FAMILY ANDROID_CPU_FAMILY_UNKNOWN 88 #endif 89 90 #define D(...) \ 91 do { \ 92 if (android_cpufeatures_debug) { \ 93 printf(__VA_ARGS__); fflush(stdout); \ 94 } \ 95 } while (0) 96 97 #ifdef __i386__ 98 static __inline__ void x86_cpuid(int func, int values[4]) 99 { 100 int a, b, c, d; 101 /* We need to preserve ebx since we're compiling PIC code */ 102 /* this means we can't use "=b" for the second output register */ 103 __asm__ __volatile__ ( \ 104 "push %%ebx\n" 105 "cpuid\n" \ 106 "mov %%ebx, %1\n" 107 "pop %%ebx\n" 108 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \ 109 : "a" (func) \ 110 ); 111 values[0] = a; 112 values[1] = b; 113 values[2] = c; 114 values[3] = d; 115 } 116 #endif 117 118 /* Get the size of a file by reading it until the end. This is needed 119 * because files under /proc do not always return a valid size when 120 * using fseek(0, SEEK_END) + ftell(). Nor can they be mmap()-ed. 121 */ 122 static int 123 get_file_size(const char* pathname) 124 { 125 int fd, ret, result = 0; 126 char buffer[256]; 127 128 fd = open(pathname, O_RDONLY); 129 if (fd < 0) { 130 D("Can't open %s: %s\n", pathname, strerror(errno)); 131 return -1; 132 } 133 134 for (;;) { 135 int ret = read(fd, buffer, sizeof buffer); 136 if (ret < 0) { 137 if (errno == EINTR) 138 continue; 139 D("Error while reading %s: %s\n", pathname, strerror(errno)); 140 break; 141 } 142 if (ret == 0) 143 break; 144 145 result += ret; 146 } 147 close(fd); 148 return result; 149 } 150 151 /* Read the content of /proc/cpuinfo into a user-provided buffer. 152 * Return the length of the data, or -1 on error. Does *not* 153 * zero-terminate the content. Will not read more 154 * than 'buffsize' bytes. 155 */ 156 static int 157 read_file(const char* pathname, char* buffer, size_t buffsize) 158 { 159 int fd, count; 160 161 fd = open(pathname, O_RDONLY); 162 if (fd < 0) { 163 D("Could not open %s: %s\n", pathname, strerror(errno)); 164 return -1; 165 } 166 count = 0; 167 while (count < (int)buffsize) { 168 int ret = read(fd, buffer + count, buffsize - count); 169 if (ret < 0) { 170 if (errno == EINTR) 171 continue; 172 D("Error while reading from %s: %s\n", pathname, strerror(errno)); 173 if (count == 0) 174 count = -1; 175 break; 176 } 177 if (ret == 0) 178 break; 179 count += ret; 180 } 181 close(fd); 182 return count; 183 } 184 185 /* Extract the content of a the first occurence of a given field in 186 * the content of /proc/cpuinfo and return it as a heap-allocated 187 * string that must be freed by the caller. 188 * 189 * Return NULL if not found 190 */ 191 static char* 192 extract_cpuinfo_field(const char* buffer, int buflen, const char* field) 193 { 194 int fieldlen = strlen(field); 195 const char* bufend = buffer + buflen; 196 char* result = NULL; 197 int len, ignore; 198 const char *p, *q; 199 200 /* Look for first field occurence, and ensures it starts the line. */ 201 p = buffer; 202 for (;;) { 203 p = memmem(p, bufend-p, field, fieldlen); 204 if (p == NULL) 205 goto EXIT; 206 207 if (p == buffer || p[-1] == '\n') 208 break; 209 210 p += fieldlen; 211 } 212 213 /* Skip to the first column followed by a space */ 214 p += fieldlen; 215 p = memchr(p, ':', bufend-p); 216 if (p == NULL || p[1] != ' ') 217 goto EXIT; 218 219 /* Find the end of the line */ 220 p += 2; 221 q = memchr(p, '\n', bufend-p); 222 if (q == NULL) 223 q = bufend; 224 225 /* Copy the line into a heap-allocated buffer */ 226 len = q-p; 227 result = malloc(len+1); 228 if (result == NULL) 229 goto EXIT; 230 231 memcpy(result, p, len); 232 result[len] = '\0'; 233 234 EXIT: 235 return result; 236 } 237 238 /* Checks that a space-separated list of items contains one given 'item'. 239 * Returns 1 if found, 0 otherwise. 240 */ 241 static int 242 has_list_item(const char* list, const char* item) 243 { 244 const char* p = list; 245 int itemlen = strlen(item); 246 247 if (list == NULL) 248 return 0; 249 250 while (*p) { 251 const char* q; 252 253 /* skip spaces */ 254 while (*p == ' ' || *p == '\t') 255 p++; 256 257 /* find end of current list item */ 258 q = p; 259 while (*q && *q != ' ' && *q != '\t') 260 q++; 261 262 if (itemlen == q-p && !memcmp(p, item, itemlen)) 263 return 1; 264 265 /* skip to next item */ 266 p = q; 267 } 268 return 0; 269 } 270 271 /* Parse a number starting from 'input', but not going further 272 * than 'limit'. Return the value into '*result'. 273 * 274 * NOTE: Does not skip over leading spaces, or deal with sign characters. 275 * NOTE: Ignores overflows. 276 * 277 * The function returns NULL in case of error (bad format), or the new 278 * position after the decimal number in case of success (which will always 279 * be <= 'limit'). 280 */ 281 static const char* 282 parse_number(const char* input, const char* limit, int base, int* result) 283 { 284 const char* p = input; 285 int val = 0; 286 while (p < limit) { 287 int d = (*p - '0'); 288 if ((unsigned)d >= 10U) { 289 d = (*p - 'a'); 290 if ((unsigned)d >= 6U) 291 d = (*p - 'A'); 292 if ((unsigned)d >= 6U) 293 break; 294 d += 10; 295 } 296 if (d >= base) 297 break; 298 val = val*base + d; 299 p++; 300 } 301 if (p == input) 302 return NULL; 303 304 *result = val; 305 return p; 306 } 307 308 static const char* 309 parse_decimal(const char* input, const char* limit, int* result) 310 { 311 return parse_number(input, limit, 10, result); 312 } 313 314 static const char* 315 parse_hexadecimal(const char* input, const char* limit, int* result) 316 { 317 return parse_number(input, limit, 16, result); 318 } 319 320 /* This small data type is used to represent a CPU list / mask, as read 321 * from sysfs on Linux. See http://www.kernel.org/doc/Documentation/cputopology.txt 322 * 323 * For now, we don't expect more than 32 cores on mobile devices, so keep 324 * everything simple. 325 */ 326 typedef struct { 327 uint32_t mask; 328 } CpuList; 329 330 static __inline__ void 331 cpulist_init(CpuList* list) { 332 list->mask = 0; 333 } 334 335 static __inline__ void 336 cpulist_and(CpuList* list1, CpuList* list2) { 337 list1->mask &= list2->mask; 338 } 339 340 static __inline__ void 341 cpulist_set(CpuList* list, int index) { 342 if ((unsigned)index < 32) { 343 list->mask |= (uint32_t)(1U << index); 344 } 345 } 346 347 static __inline__ int 348 cpulist_count(CpuList* list) { 349 return __builtin_popcount(list->mask); 350 } 351 352 /* Parse a textual list of cpus and store the result inside a CpuList object. 353 * Input format is the following: 354 * - comma-separated list of items (no spaces) 355 * - each item is either a single decimal number (cpu index), or a range made 356 * of two numbers separated by a single dash (-). Ranges are inclusive. 357 * 358 * Examples: 0 359 * 2,4-127,128-143 360 * 0-1 361 */ 362 static void 363 cpulist_parse(CpuList* list, const char* line, int line_len) 364 { 365 const char* p = line; 366 const char* end = p + line_len; 367 const char* q; 368 369 /* NOTE: the input line coming from sysfs typically contains a 370 * trailing newline, so take care of it in the code below 371 */ 372 while (p < end && *p != '\n') 373 { 374 int val, start_value, end_value; 375 376 /* Find the end of current item, and put it into 'q' */ 377 q = memchr(p, ',', end-p); 378 if (q == NULL) { 379 q = end; 380 } 381 382 /* Get first value */ 383 p = parse_decimal(p, q, &start_value); 384 if (p == NULL) 385 goto BAD_FORMAT; 386 387 end_value = start_value; 388 389 /* If we're not at the end of the item, expect a dash and 390 * and integer; extract end value. 391 */ 392 if (p < q && *p == '-') { 393 p = parse_decimal(p+1, q, &end_value); 394 if (p == NULL) 395 goto BAD_FORMAT; 396 } 397 398 /* Set bits CPU list bits */ 399 for (val = start_value; val <= end_value; val++) { 400 cpulist_set(list, val); 401 } 402 403 /* Jump to next item */ 404 p = q; 405 if (p < end) 406 p++; 407 } 408 409 BAD_FORMAT: 410 ; 411 } 412 413 /* Read a CPU list from one sysfs file */ 414 static void 415 cpulist_read_from(CpuList* list, const char* filename) 416 { 417 char file[64]; 418 int filelen; 419 420 cpulist_init(list); 421 422 filelen = read_file(filename, file, sizeof file); 423 if (filelen < 0) { 424 D("Could not read %s: %s\n", filename, strerror(errno)); 425 return; 426 } 427 428 cpulist_parse(list, file, filelen); 429 } 430 431 // See <asm/hwcap.h> kernel header. 432 #define HWCAP_VFP (1 << 6) 433 #define HWCAP_IWMMXT (1 << 9) 434 #define HWCAP_NEON (1 << 12) 435 #define HWCAP_VFPv3 (1 << 13) 436 #define HWCAP_VFPv3D16 (1 << 14) 437 #define HWCAP_VFPv4 (1 << 16) 438 #define HWCAP_IDIVA (1 << 17) 439 #define HWCAP_IDIVT (1 << 18) 440 441 #define AT_HWCAP 16 442 443 #if defined(__arm__) 444 /* Compute the ELF HWCAP flags. 445 */ 446 static uint32_t 447 get_elf_hwcap(const char* cpuinfo, int cpuinfo_len) 448 { 449 /* IMPORTANT: 450 * Accessing /proc/self/auxv doesn't work anymore on all 451 * platform versions. More specifically, when running inside 452 * a regular application process, most of /proc/self/ will be 453 * non-readable, including /proc/self/auxv. This doesn't 454 * happen however if the application is debuggable, or when 455 * running under the "shell" UID, which is why this was not 456 * detected appropriately. 457 */ 458 #if 0 459 uint32_t result = 0; 460 const char filepath[] = "/proc/self/auxv"; 461 int fd = open(filepath, O_RDONLY); 462 if (fd < 0) { 463 D("Could not open %s: %s\n", filepath, strerror(errno)); 464 return 0; 465 } 466 467 struct { uint32_t tag; uint32_t value; } entry; 468 469 for (;;) { 470 int ret = read(fd, (char*)&entry, sizeof entry); 471 if (ret < 0) { 472 if (errno == EINTR) 473 continue; 474 D("Error while reading %s: %s\n", filepath, strerror(errno)); 475 break; 476 } 477 // Detect end of list. 478 if (ret == 0 || (entry.tag == 0 && entry.value == 0)) 479 break; 480 if (entry.tag == AT_HWCAP) { 481 result = entry.value; 482 break; 483 } 484 } 485 close(fd); 486 return result; 487 #else 488 // Recreate ELF hwcaps by parsing /proc/cpuinfo Features tag. 489 uint32_t hwcaps = 0; 490 491 char* cpuFeatures = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "Features"); 492 493 if (cpuFeatures != NULL) { 494 D("Found cpuFeatures = '%s'\n", cpuFeatures); 495 496 if (has_list_item(cpuFeatures, "vfp")) 497 hwcaps |= HWCAP_VFP; 498 if (has_list_item(cpuFeatures, "vfpv3")) 499 hwcaps |= HWCAP_VFPv3; 500 if (has_list_item(cpuFeatures, "vfpv3d16")) 501 hwcaps |= HWCAP_VFPv3D16; 502 if (has_list_item(cpuFeatures, "vfpv4")) 503 hwcaps |= HWCAP_VFPv4; 504 if (has_list_item(cpuFeatures, "neon")) 505 hwcaps |= HWCAP_NEON; 506 if (has_list_item(cpuFeatures, "idiva")) 507 hwcaps |= HWCAP_IDIVA; 508 if (has_list_item(cpuFeatures, "idivt")) 509 hwcaps |= HWCAP_IDIVT; 510 if (has_list_item(cpuFeatures, "idiv")) 511 hwcaps |= HWCAP_IDIVA | HWCAP_IDIVT; 512 if (has_list_item(cpuFeatures, "iwmmxt")) 513 hwcaps |= HWCAP_IWMMXT; 514 515 free(cpuFeatures); 516 } 517 return hwcaps; 518 #endif 519 } 520 #endif /* __arm__ */ 521 522 /* Return the number of cpus present on a given device. 523 * 524 * To handle all weird kernel configurations, we need to compute the 525 * intersection of the 'present' and 'possible' CPU lists and count 526 * the result. 527 */ 528 static int 529 get_cpu_count(void) 530 { 531 CpuList cpus_present[1]; 532 CpuList cpus_possible[1]; 533 534 cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present"); 535 cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible"); 536 537 /* Compute the intersection of both sets to get the actual number of 538 * CPU cores that can be used on this device by the kernel. 539 */ 540 cpulist_and(cpus_present, cpus_possible); 541 542 return cpulist_count(cpus_present); 543 } 544 545 static void 546 android_cpuInitFamily(void) 547 { 548 #if defined(__ARM_ARCH__) 549 g_cpuFamily = ANDROID_CPU_FAMILY_ARM; 550 #elif defined(__i386__) 551 g_cpuFamily = ANDROID_CPU_FAMILY_X86; 552 #elif defined(_MIPS_ARCH) 553 g_cpuFamily = ANDROID_CPU_FAMILY_MIPS; 554 #else 555 g_cpuFamily = ANDROID_CPU_FAMILY_UNKNOWN; 556 #endif 557 } 558 559 static void 560 android_cpuInit(void) 561 { 562 char* cpuinfo = NULL; 563 int cpuinfo_len; 564 565 android_cpuInitFamily(); 566 567 g_cpuFeatures = 0; 568 g_cpuCount = 1; 569 g_inited = 1; 570 571 cpuinfo_len = get_file_size("/proc/cpuinfo"); 572 if (cpuinfo_len < 0) { 573 D("cpuinfo_len cannot be computed!"); 574 return; 575 } 576 cpuinfo = malloc(cpuinfo_len); 577 if (cpuinfo == NULL) { 578 D("cpuinfo buffer could not be allocated"); 579 return; 580 } 581 cpuinfo_len = read_file("/proc/cpuinfo", cpuinfo, cpuinfo_len); 582 D("cpuinfo_len is (%d):\n%.*s\n", cpuinfo_len, 583 cpuinfo_len >= 0 ? cpuinfo_len : 0, cpuinfo); 584 585 if (cpuinfo_len < 0) /* should not happen */ { 586 free(cpuinfo); 587 return; 588 } 589 590 /* Count the CPU cores, the value may be 0 for single-core CPUs */ 591 g_cpuCount = get_cpu_count(); 592 if (g_cpuCount == 0) { 593 g_cpuCount = 1; 594 } 595 596 D("found cpuCount = %d\n", g_cpuCount); 597 598 #ifdef __ARM_ARCH__ 599 { 600 char* features = NULL; 601 char* architecture = NULL; 602 603 /* Extract architecture from the "CPU Architecture" field. 604 * The list is well-known, unlike the the output of 605 * the 'Processor' field which can vary greatly. 606 * 607 * See the definition of the 'proc_arch' array in 608 * $KERNEL/arch/arm/kernel/setup.c and the 'c_show' function in 609 * same file. 610 */ 611 char* cpuArch = extract_cpuinfo_field(cpuinfo, cpuinfo_len, "CPU architecture"); 612 613 if (cpuArch != NULL) { 614 char* end; 615 long archNumber; 616 int hasARMv7 = 0; 617 618 D("found cpuArch = '%s'\n", cpuArch); 619 620 /* read the initial decimal number, ignore the rest */ 621 archNumber = strtol(cpuArch, &end, 10); 622 623 /* Here we assume that ARMv8 will be upwards compatible with v7 624 * in the future. Unfortunately, there is no 'Features' field to 625 * indicate that Thumb-2 is supported. 626 */ 627 if (end > cpuArch && archNumber >= 7) { 628 hasARMv7 = 1; 629 } 630 631 /* Unfortunately, it seems that certain ARMv6-based CPUs 632 * report an incorrect architecture number of 7! 633 * 634 * See http://code.google.com/p/android/issues/detail?id=10812 635 * 636 * We try to correct this by looking at the 'elf_format' 637 * field reported by the 'Processor' field, which is of the 638 * form of "(v7l)" for an ARMv7-based CPU, and "(v6l)" for 639 * an ARMv6-one. 640 */ 641 if (hasARMv7) { 642 char* cpuProc = extract_cpuinfo_field(cpuinfo, cpuinfo_len, 643 "Processor"); 644 if (cpuProc != NULL) { 645 D("found cpuProc = '%s'\n", cpuProc); 646 if (has_list_item(cpuProc, "(v6l)")) { 647 D("CPU processor and architecture mismatch!!\n"); 648 hasARMv7 = 0; 649 } 650 free(cpuProc); 651 } 652 } 653 654 if (hasARMv7) { 655 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_ARMv7; 656 } 657 658 /* The LDREX / STREX instructions are available from ARMv6 */ 659 if (archNumber >= 6) { 660 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_LDREX_STREX; 661 } 662 663 free(cpuArch); 664 } 665 666 /* Extract the list of CPU features from ELF hwcaps */ 667 uint32_t hwcaps = get_elf_hwcap(cpuinfo, cpuinfo_len); 668 669 if (hwcaps != 0) { 670 int has_vfp = (hwcaps & HWCAP_VFP); 671 int has_vfpv3 = (hwcaps & HWCAP_VFPv3); 672 int has_vfpv3d16 = (hwcaps & HWCAP_VFPv3D16); 673 int has_vfpv4 = (hwcaps & HWCAP_VFPv4); 674 int has_neon = (hwcaps & HWCAP_NEON); 675 int has_idiva = (hwcaps & HWCAP_IDIVA); 676 int has_idivt = (hwcaps & HWCAP_IDIVT); 677 int has_iwmmxt = (hwcaps & HWCAP_IWMMXT); 678 679 // The kernel does a poor job at ensuring consistency when 680 // describing CPU features. So lots of guessing is needed. 681 682 // 'vfpv4' implies VFPv3|VFP_FMA|FP16 683 if (has_vfpv4) 684 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 685 ANDROID_CPU_ARM_FEATURE_VFP_FP16 | 686 ANDROID_CPU_ARM_FEATURE_VFP_FMA; 687 688 // 'vfpv3' or 'vfpv3d16' imply VFPv3. Note that unlike GCC, 689 // a value of 'vfpv3' doesn't necessarily mean that the D32 690 // feature is present, so be conservative. All CPUs in the 691 // field that support D32 also support NEON, so this should 692 // not be a problem in practice. 693 if (has_vfpv3 || has_vfpv3d16) 694 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 695 696 // 'vfp' is super ambiguous. Depending on the kernel, it can 697 // either mean VFPv2 or VFPv3. Make it depend on ARMv7. 698 if (has_vfp) { 699 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_ARMv7) 700 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3; 701 else 702 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2; 703 } 704 705 // Neon implies VFPv3|D32, and if vfpv4 is detected, NEON_FMA 706 if (has_neon) { 707 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv3 | 708 ANDROID_CPU_ARM_FEATURE_NEON | 709 ANDROID_CPU_ARM_FEATURE_VFP_D32; 710 if (has_vfpv4) 711 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_NEON_FMA; 712 } 713 714 // VFPv3 implies VFPv2 and ARMv7 715 if (g_cpuFeatures & ANDROID_CPU_ARM_FEATURE_VFPv3) 716 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_VFPv2 | 717 ANDROID_CPU_ARM_FEATURE_ARMv7; 718 719 if (has_idiva) 720 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_ARM; 721 if (has_idivt) 722 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2; 723 724 if (has_iwmmxt) 725 g_cpuFeatures |= ANDROID_CPU_ARM_FEATURE_iWMMXt; 726 } 727 728 /* Extract the cpuid value from various fields */ 729 // The CPUID value is broken up in several entries in /proc/cpuinfo. 730 // This table is used to rebuild it from the entries. 731 static const struct CpuIdEntry { 732 const char* field; 733 char format; 734 char bit_lshift; 735 char bit_length; 736 } cpu_id_entries[] = { 737 { "CPU implementer", 'x', 24, 8 }, 738 { "CPU variant", 'x', 20, 4 }, 739 { "CPU part", 'x', 4, 12 }, 740 { "CPU revision", 'd', 0, 4 }, 741 }; 742 size_t i; 743 D("Parsing /proc/cpuinfo to recover CPUID\n"); 744 for (i = 0; 745 i < sizeof(cpu_id_entries)/sizeof(cpu_id_entries[0]); 746 ++i) { 747 const struct CpuIdEntry* entry = &cpu_id_entries[i]; 748 char* value = extract_cpuinfo_field(cpuinfo, 749 cpuinfo_len, 750 entry->field); 751 if (value == NULL) 752 continue; 753 754 D("field=%s value='%s'\n", entry->field, value); 755 char* value_end = value + strlen(value); 756 int val = 0; 757 const char* start = value; 758 const char* p; 759 if (value[0] == '0' && (value[1] == 'x' || value[1] == 'X')) { 760 start += 2; 761 p = parse_hexadecimal(start, value_end, &val); 762 } else if (entry->format == 'x') 763 p = parse_hexadecimal(value, value_end, &val); 764 else 765 p = parse_decimal(value, value_end, &val); 766 767 if (p > (const char*)start) { 768 val &= ((1 << entry->bit_length)-1); 769 val <<= entry->bit_lshift; 770 g_cpuIdArm |= (uint32_t) val; 771 } 772 773 free(value); 774 } 775 776 // Handle kernel configuration bugs that prevent the correct 777 // reporting of CPU features. 778 static const struct CpuFix { 779 uint32_t cpuid; 780 uint64_t or_flags; 781 } cpu_fixes[] = { 782 /* The Nexus 4 (Qualcomm Krait) kernel configuration 783 * forgets to report IDIV support. */ 784 { 0x510006f2, ANDROID_CPU_ARM_FEATURE_IDIV_ARM | 785 ANDROID_CPU_ARM_FEATURE_IDIV_THUMB2 }, 786 }; 787 size_t n; 788 for (n = 0; n < sizeof(cpu_fixes)/sizeof(cpu_fixes[0]); ++n) { 789 const struct CpuFix* entry = &cpu_fixes[n]; 790 791 if (g_cpuIdArm == entry->cpuid) 792 g_cpuFeatures |= entry->or_flags; 793 } 794 795 } 796 #endif /* __ARM_ARCH__ */ 797 798 #ifdef __i386__ 799 int regs[4]; 800 801 /* According to http://en.wikipedia.org/wiki/CPUID */ 802 #define VENDOR_INTEL_b 0x756e6547 803 #define VENDOR_INTEL_c 0x6c65746e 804 #define VENDOR_INTEL_d 0x49656e69 805 806 x86_cpuid(0, regs); 807 int vendorIsIntel = (regs[1] == VENDOR_INTEL_b && 808 regs[2] == VENDOR_INTEL_c && 809 regs[3] == VENDOR_INTEL_d); 810 811 x86_cpuid(1, regs); 812 if ((regs[2] & (1 << 9)) != 0) { 813 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_SSSE3; 814 } 815 if ((regs[2] & (1 << 23)) != 0) { 816 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_POPCNT; 817 } 818 if (vendorIsIntel && (regs[2] & (1 << 22)) != 0) { 819 g_cpuFeatures |= ANDROID_CPU_X86_FEATURE_MOVBE; 820 } 821 #endif 822 823 free(cpuinfo); 824 } 825 826 827 AndroidCpuFamily 828 android_getCpuFamily(void) 829 { 830 pthread_once(&g_once, android_cpuInit); 831 return g_cpuFamily; 832 } 833 834 835 uint64_t 836 android_getCpuFeatures(void) 837 { 838 pthread_once(&g_once, android_cpuInit); 839 return g_cpuFeatures; 840 } 841 842 843 int 844 android_getCpuCount(void) 845 { 846 pthread_once(&g_once, android_cpuInit); 847 return g_cpuCount; 848 } 849 850 static void 851 android_cpuInitDummy(void) 852 { 853 g_inited = 1; 854 } 855 856 int 857 android_setCpu(int cpu_count, uint64_t cpu_features) 858 { 859 /* Fail if the library was already initialized. */ 860 if (g_inited) 861 return 0; 862 863 android_cpuInitFamily(); 864 g_cpuCount = (cpu_count <= 0 ? 1 : cpu_count); 865 g_cpuFeatures = cpu_features; 866 pthread_once(&g_once, android_cpuInitDummy); 867 868 return 1; 869 } 870 871 #ifdef __arm__ 872 uint32_t 873 android_getCpuIdArm(void) 874 { 875 pthread_once(&g_once, android_cpuInit); 876 return g_cpuIdArm; 877 } 878 879 int 880 android_setCpuArm(int cpu_count, uint64_t cpu_features, uint32_t cpu_id) 881 { 882 if (!android_setCpu(cpu_count, cpu_features)) 883 return 0; 884 885 g_cpuIdArm = cpu_id; 886 return 1; 887 } 888 #endif /* __arm__ */ 889 890 /* 891 * Technical note: Making sense of ARM's FPU architecture versions. 892 * 893 * FPA was ARM's first attempt at an FPU architecture. There is no Android 894 * device that actually uses it since this technology was already obsolete 895 * when the project started. If you see references to FPA instructions 896 * somewhere, you can be sure that this doesn't apply to Android at all. 897 * 898 * FPA was followed by "VFP", soon renamed "VFPv1" due to the emergence of 899 * new versions / additions to it. ARM considers this obsolete right now, 900 * and no known Android device implements it either. 901 * 902 * VFPv2 added a few instructions to VFPv1, and is an *optional* extension 903 * supported by some ARMv5TE, ARMv6 and ARMv6T2 CPUs. Note that a device 904 * supporting the 'armeabi' ABI doesn't necessarily support these. 905 * 906 * VFPv3-D16 adds a few instructions on top of VFPv2 and is typically used 907 * on ARMv7-A CPUs which implement a FPU. Note that it is also mandated 908 * by the Android 'armeabi-v7a' ABI. The -D16 suffix in its name means 909 * that it provides 16 double-precision FPU registers (d0-d15) and 32 910 * single-precision ones (s0-s31) which happen to be mapped to the same 911 * register banks. 912 * 913 * VFPv3-D32 is the name of an extension to VFPv3-D16 that provides 16 914 * additional double precision registers (d16-d31). Note that there are 915 * still only 32 single precision registers. 916 * 917 * VFPv3xD is a *subset* of VFPv3-D16 that only provides single-precision 918 * registers. It is only used on ARMv7-M (i.e. on micro-controllers) which 919 * are not supported by Android. Note that it is not compatible with VFPv2. 920 * 921 * NOTE: The term 'VFPv3' usually designate either VFPv3-D16 or VFPv3-D32 922 * depending on context. For example GCC uses it for VFPv3-D32, but 923 * the Linux kernel code uses it for VFPv3-D16 (especially in 924 * /proc/cpuinfo). Always try to use the full designation when 925 * possible. 926 * 927 * NEON, a.k.a. "ARM Advanced SIMD" is an extension that provides 928 * instructions to perform parallel computations on vectors of 8, 16, 929 * 32, 64 and 128 bit quantities. NEON requires VFPv32-D32 since all 930 * NEON registers are also mapped to the same register banks. 931 * 932 * VFPv4-D16, adds a few instructions on top of VFPv3-D16 in order to 933 * perform fused multiply-accumulate on VFP registers, as well as 934 * half-precision (16-bit) conversion operations. 935 * 936 * VFPv4-D32 is VFPv4-D16 with 32, instead of 16, FPU double precision 937 * registers. 938 * 939 * VPFv4-NEON is VFPv4-D32 with NEON instructions. It also adds fused 940 * multiply-accumulate instructions that work on the NEON registers. 941 * 942 * NOTE: Similarly, "VFPv4" might either reference VFPv4-D16 or VFPv4-D32 943 * depending on context. 944 * 945 * The following information was determined by scanning the binutils-2.22 946 * sources: 947 * 948 * Basic VFP instruction subsets: 949 * 950 * #define FPU_VFP_EXT_V1xD 0x08000000 // Base VFP instruction set. 951 * #define FPU_VFP_EXT_V1 0x04000000 // Double-precision insns. 952 * #define FPU_VFP_EXT_V2 0x02000000 // ARM10E VFPr1. 953 * #define FPU_VFP_EXT_V3xD 0x01000000 // VFPv3 single-precision. 954 * #define FPU_VFP_EXT_V3 0x00800000 // VFPv3 double-precision. 955 * #define FPU_NEON_EXT_V1 0x00400000 // Neon (SIMD) insns. 956 * #define FPU_VFP_EXT_D32 0x00200000 // Registers D16-D31. 957 * #define FPU_VFP_EXT_FP16 0x00100000 // Half-precision extensions. 958 * #define FPU_NEON_EXT_FMA 0x00080000 // Neon fused multiply-add 959 * #define FPU_VFP_EXT_FMA 0x00040000 // VFP fused multiply-add 960 * 961 * FPU types (excluding NEON) 962 * 963 * FPU_VFP_V1xD (EXT_V1xD) 964 * | 965 * +--------------------------+ 966 * | | 967 * FPU_VFP_V1 (+EXT_V1) FPU_VFP_V3xD (+EXT_V2+EXT_V3xD) 968 * | | 969 * | | 970 * FPU_VFP_V2 (+EXT_V2) FPU_VFP_V4_SP_D16 (+EXT_FP16+EXT_FMA) 971 * | 972 * FPU_VFP_V3D16 (+EXT_Vx3D+EXT_V3) 973 * | 974 * +--------------------------+ 975 * | | 976 * FPU_VFP_V3 (+EXT_D32) FPU_VFP_V4D16 (+EXT_FP16+EXT_FMA) 977 * | | 978 * | FPU_VFP_V4 (+EXT_D32) 979 * | 980 * FPU_VFP_HARD (+EXT_FMA+NEON_EXT_FMA) 981 * 982 * VFP architectures: 983 * 984 * ARCH_VFP_V1xD (EXT_V1xD) 985 * | 986 * +------------------+ 987 * | | 988 * | ARCH_VFP_V3xD (+EXT_V2+EXT_V3xD) 989 * | | 990 * | ARCH_VFP_V3xD_FP16 (+EXT_FP16) 991 * | | 992 * | ARCH_VFP_V4_SP_D16 (+EXT_FMA) 993 * | 994 * ARCH_VFP_V1 (+EXT_V1) 995 * | 996 * ARCH_VFP_V2 (+EXT_V2) 997 * | 998 * ARCH_VFP_V3D16 (+EXT_V3xD+EXT_V3) 999 * | 1000 * +-------------------+ 1001 * | | 1002 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 1003 * | 1004 * +-------------------+ 1005 * | | 1006 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 1007 * | | 1008 * | ARCH_VFP_V4 (+EXT_D32) 1009 * | | 1010 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 1011 * | 1012 * ARCH_VFP_V3 (+EXT_D32) 1013 * | 1014 * +-------------------+ 1015 * | | 1016 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1017 * | 1018 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1019 * | 1020 * ARCH_NEON_FP16 (+EXT_FP16) 1021 * 1022 * -fpu=<name> values and their correspondance with FPU architectures above: 1023 * 1024 * {"vfp", FPU_ARCH_VFP_V2}, 1025 * {"vfp9", FPU_ARCH_VFP_V2}, 1026 * {"vfp3", FPU_ARCH_VFP_V3}, // For backwards compatbility. 1027 * {"vfp10", FPU_ARCH_VFP_V2}, 1028 * {"vfp10-r0", FPU_ARCH_VFP_V1}, 1029 * {"vfpxd", FPU_ARCH_VFP_V1xD}, 1030 * {"vfpv2", FPU_ARCH_VFP_V2}, 1031 * {"vfpv3", FPU_ARCH_VFP_V3}, 1032 * {"vfpv3-fp16", FPU_ARCH_VFP_V3_FP16}, 1033 * {"vfpv3-d16", FPU_ARCH_VFP_V3D16}, 1034 * {"vfpv3-d16-fp16", FPU_ARCH_VFP_V3D16_FP16}, 1035 * {"vfpv3xd", FPU_ARCH_VFP_V3xD}, 1036 * {"vfpv3xd-fp16", FPU_ARCH_VFP_V3xD_FP16}, 1037 * {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1}, 1038 * {"neon-fp16", FPU_ARCH_NEON_FP16}, 1039 * {"vfpv4", FPU_ARCH_VFP_V4}, 1040 * {"vfpv4-d16", FPU_ARCH_VFP_V4D16}, 1041 * {"fpv4-sp-d16", FPU_ARCH_VFP_V4_SP_D16}, 1042 * {"neon-vfpv4", FPU_ARCH_NEON_VFP_V4}, 1043 * 1044 * 1045 * Simplified diagram that only includes FPUs supported by Android: 1046 * Only ARCH_VFP_V3D16 is actually mandated by the armeabi-v7a ABI, 1047 * all others are optional and must be probed at runtime. 1048 * 1049 * ARCH_VFP_V3D16 (EXT_V1xD+EXT_V1+EXT_V2+EXT_V3xD+EXT_V3) 1050 * | 1051 * +-------------------+ 1052 * | | 1053 * | ARCH_VFP_V3D16_FP16 (+EXT_FP16) 1054 * | 1055 * +-------------------+ 1056 * | | 1057 * | ARCH_VFP_V4_D16 (+EXT_FP16+EXT_FMA) 1058 * | | 1059 * | ARCH_VFP_V4 (+EXT_D32) 1060 * | | 1061 * | ARCH_NEON_VFP_V4 (+EXT_NEON+EXT_NEON_FMA) 1062 * | 1063 * ARCH_VFP_V3 (+EXT_D32) 1064 * | 1065 * +-------------------+ 1066 * | | 1067 * | ARCH_VFP_V3_FP16 (+EXT_FP16) 1068 * | 1069 * ARCH_VFP_V3_PLUS_NEON_V1 (+EXT_NEON) 1070 * | 1071 * ARCH_NEON_FP16 (+EXT_FP16) 1072 * 1073 */ 1074