Home | History | Annotate | Download | only in dsp
      1 // Copyright 2011 Google Inc. All Rights Reserved.
      2 //
      3 // Use of this source code is governed by a BSD-style license
      4 // that can be found in the COPYING file in the root of the source
      5 // tree. An additional intellectual property rights grant can be found
      6 // in the file PATENTS. All contributing project authors may
      7 // be found in the AUTHORS file in the root of the source tree.
      8 // -----------------------------------------------------------------------------
      9 //
     10 // CPU detection
     11 //
     12 // Author: Christian Duvivier (cduvivier (at) google.com)
     13 
     14 #include "src/dsp/dsp.h"
     15 
     16 #if defined(WEBP_HAVE_NEON_RTCD)
     17 #include <stdio.h>
     18 #include <string.h>
     19 #endif
     20 
     21 #if defined(WEBP_ANDROID_NEON)
     22 #include <cpu-features.h>
     23 #endif
     24 
     25 //------------------------------------------------------------------------------
     26 // SSE2 detection.
     27 //
     28 
     29 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
     30 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
     31 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
     32   __asm__ volatile (
     33     "mov %%ebx, %%edi\n"
     34     "cpuid\n"
     35     "xchg %%edi, %%ebx\n"
     36     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     37     : "a"(info_type), "c"(0));
     38 }
     39 #elif defined(__x86_64__) && \
     40       (defined(__code_model_medium__) || defined(__code_model_large__)) && \
     41       defined(__PIC__)
     42 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
     43   __asm__ volatile (
     44     "xchg{q}\t{%%rbx}, %q1\n"
     45     "cpuid\n"
     46     "xchg{q}\t{%%rbx}, %q1\n"
     47     : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
     48       "=d"(cpu_info[3])
     49     : "a"(info_type), "c"(0));
     50 }
     51 #elif defined(__i386__) || defined(__x86_64__)
     52 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
     53   __asm__ volatile (
     54     "cpuid\n"
     55     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
     56     : "a"(info_type), "c"(0));
     57 }
     58 #elif (defined(_M_X64) || defined(_M_IX86)) && \
     59       defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
     60 #include <intrin.h>
     61 #define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
     62 #elif defined(WEBP_MSC_SSE2)
     63 #define GetCPUInfo __cpuid
     64 #endif
     65 
     66 // NaCl has no support for xgetbv or the raw opcode.
     67 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
     68 static WEBP_INLINE uint64_t xgetbv(void) {
     69   const uint32_t ecx = 0;
     70   uint32_t eax, edx;
     71   // Use the raw opcode for xgetbv for compatibility with older toolchains.
     72   __asm__ volatile (
     73     ".byte 0x0f, 0x01, 0xd0\n"
     74     : "=a"(eax), "=d"(edx) : "c" (ecx));
     75   return ((uint64_t)edx << 32) | eax;
     76 }
     77 #elif (defined(_M_X64) || defined(_M_IX86)) && \
     78       defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
     79 #include <immintrin.h>
     80 #define xgetbv() _xgetbv(0)
     81 #elif defined(_MSC_VER) && defined(_M_IX86)
     82 static WEBP_INLINE uint64_t xgetbv(void) {
     83   uint32_t eax_, edx_;
     84   __asm {
     85     xor ecx, ecx  // ecx = 0
     86     // Use the raw opcode for xgetbv for compatibility with older toolchains.
     87     __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
     88     mov eax_, eax
     89     mov edx_, edx
     90   }
     91   return ((uint64_t)edx_ << 32) | eax_;
     92 }
     93 #else
     94 #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
     95 #endif
     96 
     97 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_MSC_SSE2)
     98 
     99 // helper function for run-time detection of slow SSSE3 platforms
    100 static int CheckSlowModel(int info) {
    101   // Table listing display models with longer latencies for the bsr instruction
    102   // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
    103   // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
    104   static const uint8_t kSlowModels[] = {
    105     0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
    106     0x1c, 0x26, 0x27   // Atom Microarchitecture
    107   };
    108   const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
    109   const uint32_t family = (info >> 8) & 0xf;
    110   if (family == 0x06) {
    111     size_t i;
    112     for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
    113       if (model == kSlowModels[i]) return 1;
    114     }
    115   }
    116   return 0;
    117 }
    118 
    119 static int x86CPUInfo(CPUFeature feature) {
    120   int max_cpuid_value;
    121   int cpu_info[4];
    122   int is_intel = 0;
    123 
    124   // get the highest feature value cpuid supports
    125   GetCPUInfo(cpu_info, 0);
    126   max_cpuid_value = cpu_info[0];
    127   if (max_cpuid_value < 1) {
    128     return 0;
    129   } else {
    130     const int VENDOR_ID_INTEL_EBX = 0x756e6547;  // uneG
    131     const int VENDOR_ID_INTEL_EDX = 0x49656e69;  // Ieni
    132     const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn
    133     is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
    134                 cpu_info[2] == VENDOR_ID_INTEL_ECX &&
    135                 cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel?
    136   }
    137 
    138   GetCPUInfo(cpu_info, 1);
    139   if (feature == kSSE2) {
    140     return !!(cpu_info[3] & (1 << 26));
    141   }
    142   if (feature == kSSE3) {
    143     return !!(cpu_info[2] & (1 << 0));
    144   }
    145   if (feature == kSlowSSSE3) {
    146     if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
    147       return CheckSlowModel(cpu_info[0]);
    148     }
    149     return 0;
    150   }
    151 
    152   if (feature == kSSE4_1) {
    153     return !!(cpu_info[2] & (1 << 19));
    154   }
    155   if (feature == kAVX) {
    156     // bits 27 (OSXSAVE) & 28 (256-bit AVX)
    157     if ((cpu_info[2] & 0x18000000) == 0x18000000) {
    158       // XMM state and YMM state enabled by the OS.
    159       return (xgetbv() & 0x6) == 0x6;
    160     }
    161   }
    162   if (feature == kAVX2) {
    163     if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
    164       GetCPUInfo(cpu_info, 7);
    165       return !!(cpu_info[1] & (1 << 5));
    166     }
    167   }
    168   return 0;
    169 }
    170 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
    171 #elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
    172 static int AndroidCPUInfo(CPUFeature feature) {
    173   const AndroidCpuFamily cpu_family = android_getCpuFamily();
    174   const uint64_t cpu_features = android_getCpuFeatures();
    175   if (feature == kNEON) {
    176     return (cpu_family == ANDROID_CPU_FAMILY_ARM &&
    177             0 != (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON));
    178   }
    179   return 0;
    180 }
    181 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
    182 #elif defined(WEBP_USE_NEON)
    183 // define a dummy function to enable turning off NEON at runtime by setting
    184 // VP8DecGetCPUInfo = NULL
    185 static int armCPUInfo(CPUFeature feature) {
    186   if (feature != kNEON) return 0;
    187 #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
    188   {
    189     int has_neon = 0;
    190     char line[200];
    191     FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
    192     if (cpuinfo == NULL) return 0;
    193     while (fgets(line, sizeof(line), cpuinfo)) {
    194       if (!strncmp(line, "Features", 8)) {
    195         if (strstr(line, " neon ") != NULL) {
    196           has_neon = 1;
    197           break;
    198         }
    199       }
    200     }
    201     fclose(cpuinfo);
    202     return has_neon;
    203   }
    204 #else
    205   return 1;
    206 #endif
    207 }
    208 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
    209 #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
    210       defined(WEBP_USE_MSA)
    211 static int mipsCPUInfo(CPUFeature feature) {
    212   if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
    213     return 1;
    214   } else {
    215     return 0;
    216   }
    217 
    218 }
    219 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
    220 #else
    221 VP8CPUInfo VP8GetCPUInfo = NULL;
    222 #endif
    223