1 /* 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "libyuv/cpu_id.h" 12 13 #ifdef _MSC_VER 14 #include <intrin.h> // For __cpuid() 15 #endif 16 #if !defined(__CLR_VER) && defined(_M_X64) && \ 17 defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) 18 #include <immintrin.h> // For _xgetbv() 19 #endif 20 21 #include <stdlib.h> // For getenv() 22 23 // For ArmCpuCaps() but unittested on all platforms 24 #include <stdio.h> 25 #include <string.h> 26 27 #include "libyuv/basic_types.h" // For CPU_X86 28 29 // TODO(fbarchard): Use cpuid.h when gcc 4.4 is used on OSX and Linux. 30 #if (defined(__pic__) || defined(__APPLE__)) && defined(__i386__) 31 static __inline void __cpuid(int cpu_info[4], int info_type) { 32 asm volatile ( // NOLINT 33 "mov %%ebx, %%edi \n" 34 "cpuid \n" 35 "xchg %%edi, %%ebx \n" 36 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 37 : "a"(info_type)); 38 } 39 #elif defined(__i386__) || defined(__x86_64__) 40 static __inline void __cpuid(int cpu_info[4], int info_type) { 41 asm volatile ( // NOLINT 42 "cpuid \n" 43 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 44 : "a"(info_type)); 45 } 46 #endif 47 48 #ifdef __cplusplus 49 namespace libyuv { 50 extern "C" { 51 #endif 52 53 // Low level cpuid for X86. Returns zeros on other CPUs. 54 #if !defined(__CLR_VER) && (defined(_M_IX86) || defined(_M_X64) || \ 55 defined(__i386__) || defined(__x86_64__)) 56 LIBYUV_API 57 void CpuId(int cpu_info[4], int info_type) { 58 __cpuid(cpu_info, info_type); 59 } 60 #else 61 LIBYUV_API 62 void CpuId(int cpu_info[4], int) { 63 cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; 64 } 65 #endif 66 67 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. 68 #if !defined(__CLR_VER) && defined(_M_X64) && \ 69 defined(_MSC_VER) && (_MSC_FULL_VER >= 160040219) 70 #define HAS_XGETBV 71 static uint32 XGetBV(unsigned int xcr) { 72 return static_cast<uint32>(_xgetbv(xcr)); 73 } 74 #elif !defined(__CLR_VER) && defined(_M_IX86) 75 #define HAS_XGETBV 76 __declspec(naked) __declspec(align(16)) 77 static uint32 XGetBV(unsigned int xcr) { 78 __asm { 79 mov ecx, [esp + 4] // xcr 80 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // xgetbv for vs2005. 81 ret 82 } 83 } 84 #elif defined(__i386__) || defined(__x86_64__) 85 #define HAS_XGETBV 86 static uint32 XGetBV(unsigned int xcr) { 87 uint32 xcr_feature_mask; 88 asm volatile ( // NOLINT 89 ".byte 0x0f, 0x01, 0xd0\n" 90 : "=a"(xcr_feature_mask) 91 : "c"(xcr) 92 : "memory", "cc", "edx"); // edx unused. 93 return xcr_feature_mask; 94 } 95 #endif 96 #ifdef HAS_XGETBV 97 static const int kXCR_XFEATURE_ENABLED_MASK = 0; 98 #endif 99 100 // based on libvpx arm_cpudetect.c 101 // For Arm, but public to allow testing on any CPU 102 LIBYUV_API 103 int ArmCpuCaps(const char* cpuinfo_name) { 104 int flags = 0; 105 FILE* fin = fopen(cpuinfo_name, "r"); 106 if (fin) { 107 char buf[512]; 108 while (fgets(buf, 511, fin)) { 109 if (memcmp(buf, "Features", 8) == 0) { 110 flags |= kCpuInitialized; 111 char* p = strstr(buf, " neon"); 112 if (p && (p[5] == ' ' || p[5] == '\n')) { 113 flags |= kCpuHasNEON; 114 break; 115 } 116 } 117 } 118 fclose(fin); 119 } 120 return flags; 121 } 122 123 // CPU detect function for SIMD instruction sets. 124 LIBYUV_API 125 int cpu_info_ = 0; 126 127 // Test environment variable for disabling CPU features. Any non-zero value 128 // to disable. Zero ignored to make it easy to set the variable on/off. 129 static bool TestEnv(const char* name) { 130 const char* var = getenv(name); 131 if (var) { 132 if (var[0] != '0') { 133 return true; 134 } 135 } 136 return false; 137 } 138 139 LIBYUV_API 140 int InitCpuFlags(void) { 141 #if !defined(__CLR_VER) && defined(CPU_X86) 142 int cpu_info[4]; 143 __cpuid(cpu_info, 1); 144 cpu_info_ = ((cpu_info[3] & 0x04000000) ? kCpuHasSSE2 : 0) | 145 ((cpu_info[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | 146 ((cpu_info[2] & 0x00080000) ? kCpuHasSSE41 : 0) | 147 ((cpu_info[2] & 0x00100000) ? kCpuHasSSE42 : 0) | 148 (((cpu_info[2] & 0x18000000) == 0x18000000) ? kCpuHasAVX : 0) | 149 kCpuInitialized | kCpuHasX86; 150 #ifdef HAS_XGETBV 151 if (cpu_info_ & kCpuHasAVX) { 152 __cpuid(cpu_info, 7); 153 if ((cpu_info[1] & 0x00000020) && 154 ((XGetBV(kXCR_XFEATURE_ENABLED_MASK) & 0x06) == 0x06)) { 155 cpu_info_ |= kCpuHasAVX2; 156 } 157 } 158 #endif 159 // environment variable overrides for testing. 160 if (TestEnv("LIBYUV_DISABLE_X86")) { 161 cpu_info_ &= ~kCpuHasX86; 162 } 163 if (TestEnv("LIBYUV_DISABLE_SSE2")) { 164 cpu_info_ &= ~kCpuHasSSE2; 165 } 166 if (TestEnv("LIBYUV_DISABLE_SSSE3")) { 167 cpu_info_ &= ~kCpuHasSSSE3; 168 } 169 if (TestEnv("LIBYUV_DISABLE_SSE41")) { 170 cpu_info_ &= ~kCpuHasSSE41; 171 } 172 if (TestEnv("LIBYUV_DISABLE_SSE42")) { 173 cpu_info_ &= ~kCpuHasSSE42; 174 } 175 if (TestEnv("LIBYUV_DISABLE_AVX")) { 176 cpu_info_ &= ~kCpuHasAVX; 177 } 178 if (TestEnv("LIBYUV_DISABLE_AVX2")) { 179 cpu_info_ &= ~kCpuHasAVX2; 180 } 181 if (TestEnv("LIBYUV_DISABLE_ASM")) { 182 cpu_info_ = kCpuInitialized; 183 } 184 #elif defined(__arm__) 185 #if defined(__linux__) && (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) 186 // linux arm parse text file for neon detect. 187 cpu_info_ = ArmCpuCaps("/proc/cpuinfo"); 188 #elif defined(__ARM_NEON__) 189 // gcc -mfpu=neon defines __ARM_NEON__ 190 // Enable Neon if you want support for Neon and Arm, and use MaskCpuFlags 191 // to disable Neon on devices that do not have it. 192 cpu_info_ = kCpuHasNEON; 193 #endif 194 cpu_info_ |= kCpuInitialized | kCpuHasARM; 195 if (TestEnv("LIBYUV_DISABLE_NEON")) { 196 cpu_info_ &= ~kCpuHasNEON; 197 } 198 if (TestEnv("LIBYUV_DISABLE_ASM")) { 199 cpu_info_ = kCpuInitialized; 200 } 201 #endif // __arm__ 202 return cpu_info_; 203 } 204 205 LIBYUV_API 206 void MaskCpuFlags(int enable_flags) { 207 InitCpuFlags(); 208 cpu_info_ = (cpu_info_ & enable_flags) | kCpuInitialized; 209 } 210 211 #ifdef __cplusplus 212 } // extern "C" 213 } // namespace libyuv 214 #endif 215