1 // Copyright 2011 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // CPU detection 11 // 12 // Author: Christian Duvivier (cduvivier (at) google.com) 13 14 #include "src/dsp/dsp.h" 15 16 #if defined(WEBP_HAVE_NEON_RTCD) 17 #include <stdio.h> 18 #include <string.h> 19 #endif 20 21 #if defined(WEBP_ANDROID_NEON) 22 #include <cpu-features.h> 23 #endif 24 25 //------------------------------------------------------------------------------ 26 // SSE2 detection. 27 // 28 29 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC. 30 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__) 31 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 32 __asm__ volatile ( 33 "mov %%ebx, %%edi\n" 34 "cpuid\n" 35 "xchg %%edi, %%ebx\n" 36 : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 37 : "a"(info_type), "c"(0)); 38 } 39 #elif defined(__x86_64__) && \ 40 (defined(__code_model_medium__) || defined(__code_model_large__)) && \ 41 defined(__PIC__) 42 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 43 __asm__ volatile ( 44 "xchg{q}\t{%%rbx}, %q1\n" 45 "cpuid\n" 46 "xchg{q}\t{%%rbx}, %q1\n" 47 : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]), 48 "=d"(cpu_info[3]) 49 : "a"(info_type), "c"(0)); 50 } 51 #elif defined(__i386__) || defined(__x86_64__) 52 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) { 53 __asm__ volatile ( 54 "cpuid\n" 55 : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3]) 56 : "a"(info_type), "c"(0)); 57 } 58 #elif (defined(_M_X64) || defined(_M_IX86)) && \ 59 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729 // >= VS2008 SP1 60 #include <intrin.h> 61 #define GetCPUInfo(info, type) __cpuidex(info, type, 0) // set ecx=0 62 #elif defined(WEBP_MSC_SSE2) 63 #define GetCPUInfo __cpuid 64 #endif 65 66 // NaCl has no support for xgetbv or the raw opcode. 67 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) 68 static WEBP_INLINE uint64_t xgetbv(void) { 69 const uint32_t ecx = 0; 70 uint32_t eax, edx; 71 // Use the raw opcode for xgetbv for compatibility with older toolchains. 72 __asm__ volatile ( 73 ".byte 0x0f, 0x01, 0xd0\n" 74 : "=a"(eax), "=d"(edx) : "c" (ecx)); 75 return ((uint64_t)edx << 32) | eax; 76 } 77 #elif (defined(_M_X64) || defined(_M_IX86)) && \ 78 defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 79 #include <immintrin.h> 80 #define xgetbv() _xgetbv(0) 81 #elif defined(_MSC_VER) && defined(_M_IX86) 82 static WEBP_INLINE uint64_t xgetbv(void) { 83 uint32_t eax_, edx_; 84 __asm { 85 xor ecx, ecx // ecx = 0 86 // Use the raw opcode for xgetbv for compatibility with older toolchains. 87 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 88 mov eax_, eax 89 mov edx_, edx 90 } 91 return ((uint64_t)edx_ << 32) | eax_; 92 } 93 #else 94 #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. 95 #endif 96 97 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2) 98 99 // helper function for run-time detection of slow SSSE3 platforms 100 static int CheckSlowModel(int info) { 101 // Table listing display models with longer latencies for the bsr instruction 102 // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb. 103 // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual. 104 static const uint8_t kSlowModels[] = { 105 0x37, 0x4a, 0x4d, // Silvermont Microarchitecture 106 0x1c, 0x26, 0x27 // Atom Microarchitecture 107 }; 108 const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf); 109 const uint32_t family = (info >> 8) & 0xf; 110 if (family == 0x06) { 111 size_t i; 112 for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) { 113 if (model == kSlowModels[i]) return 1; 114 } 115 } 116 return 0; 117 } 118 119 static int x86CPUInfo(CPUFeature feature) { 120 int max_cpuid_value; 121 int cpu_info[4]; 122 int is_intel = 0; 123 124 // get the highest feature value cpuid supports 125 GetCPUInfo(cpu_info, 0); 126 max_cpuid_value = cpu_info[0]; 127 if (max_cpuid_value < 1) { 128 return 0; 129 } else { 130 const int VENDOR_ID_INTEL_EBX = 0x756e6547; // uneG 131 const int VENDOR_ID_INTEL_EDX = 0x49656e69; // Ieni 132 const int VENDOR_ID_INTEL_ECX = 0x6c65746e; // letn 133 is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX && 134 cpu_info[2] == VENDOR_ID_INTEL_ECX && 135 cpu_info[3] == VENDOR_ID_INTEL_EDX); // genuine Intel? 136 } 137 138 GetCPUInfo(cpu_info, 1); 139 if (feature == kSSE2) { 140 return !!(cpu_info[3] & (1 << 26)); 141 } 142 if (feature == kSSE3) { 143 return !!(cpu_info[2] & (1 << 0)); 144 } 145 if (feature == kSlowSSSE3) { 146 if (is_intel && (cpu_info[2] & (1 << 9))) { // SSSE3? 147 return CheckSlowModel(cpu_info[0]); 148 } 149 return 0; 150 } 151 152 if (feature == kSSE4_1) { 153 return !!(cpu_info[2] & (1 << 19)); 154 } 155 if (feature == kAVX) { 156 // bits 27 (OSXSAVE) & 28 (256-bit AVX) 157 if ((cpu_info[2] & 0x18000000) == 0x18000000) { 158 // XMM state and YMM state enabled by the OS. 159 return (xgetbv() & 0x6) == 0x6; 160 } 161 } 162 if (feature == kAVX2) { 163 if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) { 164 GetCPUInfo(cpu_info, 7); 165 return !!(cpu_info[1] & (1 << 5)); 166 } 167 } 168 return 0; 169 } 170 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo; 171 #elif defined(WEBP_ANDROID_NEON) // NB: needs to be before generic NEON test. 172 static int AndroidCPUInfo(CPUFeature feature) { 173 const AndroidCpuFamily cpu_family = android_getCpuFamily(); 174 const uint64_t cpu_features = android_getCpuFeatures(); 175 if (feature == kNEON) { 176 return (cpu_family == ANDROID_CPU_FAMILY_ARM && 177 0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON)); 178 } 179 return 0; 180 } 181 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo; 182 #elif defined(WEBP_USE_NEON) 183 // define a dummy function to enable turning off NEON at runtime by setting 184 // VP8DecGetCPUInfo = NULL 185 static int armCPUInfo(CPUFeature feature) { 186 if (feature != kNEON) return 0; 187 #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD) 188 { 189 int has_neon = 0; 190 char line[200]; 191 FILE* const cpuinfo = fopen("/proc/cpuinfo", "r"); 192 if (cpuinfo == NULL) return 0; 193 while (fgets(line, sizeof(line), cpuinfo)) { 194 if (!strncmp(line, "Features", 8)) { 195 if (strstr(line, " neon ") != NULL) { 196 has_neon = 1; 197 break; 198 } 199 } 200 } 201 fclose(cpuinfo); 202 return has_neon; 203 } 204 #else 205 return 1; 206 #endif 207 } 208 VP8CPUInfo VP8GetCPUInfo = armCPUInfo; 209 #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \ 210 defined(WEBP_USE_MSA) 211 static int mipsCPUInfo(CPUFeature feature) { 212 if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) { 213 return 1; 214 } else { 215 return 0; 216 } 217 218 } 219 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo; 220 #else 221 VP8CPUInfo VP8GetCPUInfo = NULL; 222 #endif 223