Home | History | Annotate | Download | only in platform
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/platform/cpu_info.h"
     17 #include "tensorflow/core/platform/logging.h"
     18 #include "tensorflow/core/platform/platform.h"
     19 #include "tensorflow/core/platform/types.h"
     20 #if defined(PLATFORM_IS_X86)
     21 #include <mutex>  // NOLINT
     22 #endif
     23 
     24 // SIMD extension querying is only available on x86.
     25 #ifdef PLATFORM_IS_X86
     26 #ifdef PLATFORM_WINDOWS
     27 // Visual Studio defines a builtin function for CPUID, so use that if possible.
     28 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
     29   {                                        \
     30     int cpu_info[4] = {-1};                \
     31     __cpuidex(cpu_info, a_inp, c_inp);     \
     32     a = cpu_info[0];                       \
     33     b = cpu_info[1];                       \
     34     c = cpu_info[2];                       \
     35     d = cpu_info[3];                       \
     36   }
     37 #else
     38 // Otherwise use gcc-format assembler to implement the underlying instructions.
     39 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
     40   asm("mov %%rbx, %%rdi\n"                 \
     41       "cpuid\n"                            \
     42       "xchg %%rdi, %%rbx\n"                \
     43       : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
     44       : "a"(a_inp), "2"(c_inp))
     45 #endif
     46 #endif
     47 
     48 namespace tensorflow {
     49 namespace port {
     50 namespace {
     51 
     52 #ifdef PLATFORM_IS_X86
     53 class CPUIDInfo;
     54 void InitCPUIDInfo();
     55 
     56 CPUIDInfo *cpuid = nullptr;
     57 
     58 #ifdef PLATFORM_WINDOWS
     59 // Visual Studio defines a builtin function, so use that if possible.
     60 int GetXCR0EAX() { return _xgetbv(0); }
     61 #else
     62 int GetXCR0EAX() {
     63   int eax, edx;
     64   asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
     65   return eax;
     66 }
     67 #endif
     68 
     69 // Structure for basic CPUID info
     70 class CPUIDInfo {
     71  public:
     72   CPUIDInfo()
     73       : have_adx_(0),
     74         have_aes_(0),
     75         have_avx_(0),
     76         have_avx2_(0),
     77         have_avx512f_(0),
     78         have_avx512cd_(0),
     79         have_avx512er_(0),
     80         have_avx512pf_(0),
     81         have_avx512vl_(0),
     82         have_avx512bw_(0),
     83         have_avx512dq_(0),
     84         have_avx512vbmi_(0),
     85         have_avx512ifma_(0),
     86         have_avx512_4vnniw_(0),
     87         have_avx512_4fmaps_(0),
     88         have_bmi1_(0),
     89         have_bmi2_(0),
     90         have_cmov_(0),
     91         have_cmpxchg16b_(0),
     92         have_cmpxchg8b_(0),
     93         have_f16c_(0),
     94         have_fma_(0),
     95         have_mmx_(0),
     96         have_pclmulqdq_(0),
     97         have_popcnt_(0),
     98         have_prefetchw_(0),
     99         have_prefetchwt1_(0),
    100         have_rdrand_(0),
    101         have_rdseed_(0),
    102         have_smap_(0),
    103         have_sse_(0),
    104         have_sse2_(0),
    105         have_sse3_(0),
    106         have_sse4_1_(0),
    107         have_sse4_2_(0),
    108         have_ssse3_(0),
    109         have_hypervisor_(0) {}
    110 
    111   static void Initialize() {
    112     // Initialize cpuid struct
    113     CHECK(cpuid == nullptr) << __func__ << " ran more than once";
    114     cpuid = new CPUIDInfo;
    115 
    116     uint32 eax, ebx, ecx, edx;
    117 
    118     // Get vendor string (issue CPUID with eax = 0)
    119     GETCPUID(eax, ebx, ecx, edx, 0, 0);
    120     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
    121     cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
    122     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
    123 
    124     // To get general information and extended features we send eax = 1 and
    125     // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
    126     // (See Intel 64 and IA-32 Architectures Software Developer's Manual
    127     // Volume 2A: Instruction Set Reference, A-M CPUID).
    128     GETCPUID(eax, ebx, ecx, edx, 1, 0);
    129 
    130     cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
    131     cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
    132 
    133     cpuid->have_aes_ = (ecx >> 25) & 0x1;
    134     cpuid->have_cmov_ = (edx >> 15) & 0x1;
    135     cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
    136     cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
    137     cpuid->have_mmx_ = (edx >> 23) & 0x1;
    138     cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
    139     cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
    140     cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
    141     cpuid->have_sse2_ = (edx >> 26) & 0x1;
    142     cpuid->have_sse3_ = ecx & 0x1;
    143     cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
    144     cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
    145     cpuid->have_sse_ = (edx >> 25) & 0x1;
    146     cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
    147     cpuid->have_hypervisor_ = (ecx >> 31) & 1;
    148 
    149     const uint64 xcr0_xmm_mask = 0x2;
    150     const uint64 xcr0_ymm_mask = 0x4;
    151     const uint64 xcr0_maskreg_mask = 0x20;
    152     const uint64 xcr0_zmm0_15_mask = 0x40;
    153     const uint64 xcr0_zmm16_31_mask = 0x80;
    154 
    155     const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
    156     const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
    157                                     xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
    158 
    159     const bool have_avx =
    160         // Does the OS support XGETBV instruction use by applications?
    161         ((ecx >> 27) & 0x1) &&
    162         // Does the OS save/restore XMM and YMM state?
    163         ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
    164         // Is AVX supported in hardware?
    165         ((ecx >> 28) & 0x1);
    166 
    167     const bool have_avx512 =
    168         // Does the OS support XGETBV instruction use by applications?
    169         ((ecx >> 27) & 0x1) &&
    170         // Does the OS save/restore ZMM state?
    171         ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
    172 
    173     cpuid->have_avx_ = have_avx;
    174     cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
    175     cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
    176 
    177     // Get standard level 7 structured extension features (issue CPUID with
    178     // eax = 7 and ecx= 0), which is required to check for AVX2 support as
    179     // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
    180     // Architectures Software Developer's Manual Volume 2A: Instruction Set
    181     // Reference, A-M CPUID).
    182     GETCPUID(eax, ebx, ecx, edx, 7, 0);
    183 
    184     cpuid->have_adx_ = (ebx >> 19) & 0x1;
    185     cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
    186     cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
    187     cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
    188     cpuid->have_prefetchwt1_ = ecx & 0x1;
    189     cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
    190     cpuid->have_smap_ = (ebx >> 20) & 0x1;
    191 
    192     cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
    193     cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
    194     cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
    195     cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
    196     cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
    197     cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
    198     cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
    199     cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
    200     cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
    201     cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
    202     cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
    203   }
    204 
    205   static bool TestFeature(CPUFeature feature) {
    206     InitCPUIDInfo();
    207     // clang-format off
    208     switch (feature) {
    209       case ADX:           return cpuid->have_adx_;
    210       case AES:           return cpuid->have_aes_;
    211       case AVX2:          return cpuid->have_avx2_;
    212       case AVX:           return cpuid->have_avx_;
    213       case AVX512F:       return cpuid->have_avx512f_;
    214       case AVX512CD:      return cpuid->have_avx512cd_;
    215       case AVX512PF:      return cpuid->have_avx512pf_;
    216       case AVX512ER:      return cpuid->have_avx512er_;
    217       case AVX512VL:      return cpuid->have_avx512vl_;
    218       case AVX512BW:      return cpuid->have_avx512bw_;
    219       case AVX512DQ:      return cpuid->have_avx512dq_;
    220       case AVX512VBMI:    return cpuid->have_avx512vbmi_;
    221       case AVX512IFMA:    return cpuid->have_avx512ifma_;
    222       case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
    223       case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
    224       case BMI1:          return cpuid->have_bmi1_;
    225       case BMI2:          return cpuid->have_bmi2_;
    226       case CMOV:          return cpuid->have_cmov_;
    227       case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
    228       case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
    229       case F16C:          return cpuid->have_f16c_;
    230       case FMA:           return cpuid->have_fma_;
    231       case MMX:           return cpuid->have_mmx_;
    232       case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
    233       case POPCNT:        return cpuid->have_popcnt_;
    234       case PREFETCHW:     return cpuid->have_prefetchw_;
    235       case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
    236       case RDRAND:        return cpuid->have_rdrand_;
    237       case RDSEED:        return cpuid->have_rdseed_;
    238       case SMAP:          return cpuid->have_smap_;
    239       case SSE2:          return cpuid->have_sse2_;
    240       case SSE3:          return cpuid->have_sse3_;
    241       case SSE4_1:        return cpuid->have_sse4_1_;
    242       case SSE4_2:        return cpuid->have_sse4_2_;
    243       case SSE:           return cpuid->have_sse_;
    244       case SSSE3:         return cpuid->have_ssse3_;
    245       case HYPERVISOR:    return cpuid->have_hypervisor_;
    246       default:
    247         break;
    248     }
    249     // clang-format on
    250     return false;
    251   }
    252 
    253   string vendor_str() const { return vendor_str_; }
    254   int family() const { return family_; }
    255   int model_num() { return model_num_; }
    256 
    257  private:
    258   int have_adx_ : 1;
    259   int have_aes_ : 1;
    260   int have_avx_ : 1;
    261   int have_avx2_ : 1;
    262   int have_avx512f_ : 1;
    263   int have_avx512cd_ : 1;
    264   int have_avx512er_ : 1;
    265   int have_avx512pf_ : 1;
    266   int have_avx512vl_ : 1;
    267   int have_avx512bw_ : 1;
    268   int have_avx512dq_ : 1;
    269   int have_avx512vbmi_ : 1;
    270   int have_avx512ifma_ : 1;
    271   int have_avx512_4vnniw_ : 1;
    272   int have_avx512_4fmaps_ : 1;
    273   int have_bmi1_ : 1;
    274   int have_bmi2_ : 1;
    275   int have_cmov_ : 1;
    276   int have_cmpxchg16b_ : 1;
    277   int have_cmpxchg8b_ : 1;
    278   int have_f16c_ : 1;
    279   int have_fma_ : 1;
    280   int have_mmx_ : 1;
    281   int have_pclmulqdq_ : 1;
    282   int have_popcnt_ : 1;
    283   int have_prefetchw_ : 1;
    284   int have_prefetchwt1_ : 1;
    285   int have_rdrand_ : 1;
    286   int have_rdseed_ : 1;
    287   int have_smap_ : 1;
    288   int have_sse_ : 1;
    289   int have_sse2_ : 1;
    290   int have_sse3_ : 1;
    291   int have_sse4_1_ : 1;
    292   int have_sse4_2_ : 1;
    293   int have_ssse3_ : 1;
    294   int have_hypervisor_ : 1;
    295   string vendor_str_;
    296   int family_;
    297   int model_num_;
    298 };
    299 
    300 std::once_flag cpuid_once_flag;
    301 
    302 void InitCPUIDInfo() {
    303   // This ensures that CPUIDInfo::Initialize() is called exactly
    304   // once regardless of how many threads concurrently call us
    305   std::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
    306 }
    307 
    308 #endif  // PLATFORM_IS_X86
    309 
    310 }  // namespace
    311 
    312 bool TestCPUFeature(CPUFeature feature) {
    313 #ifdef PLATFORM_IS_X86
    314   return CPUIDInfo::TestFeature(feature);
    315 #else
    316   return false;
    317 #endif
    318 }
    319 
    320 std::string CPUVendorIDString() {
    321 #ifdef PLATFORM_IS_X86
    322   InitCPUIDInfo();
    323   return cpuid->vendor_str();
    324 #else
    325   return "";
    326 #endif
    327 }
    328 
    329 int CPUFamily() {
    330 #ifdef PLATFORM_IS_X86
    331   InitCPUIDInfo();
    332   return cpuid->family();
    333 #else
    334   return 0;
    335 #endif
    336 }
    337 
    338 int CPUModelNum() {
    339 #ifdef PLATFORM_IS_X86
    340   InitCPUIDInfo();
    341   return cpuid->model_num();
    342 #else
    343   return 0;
    344 #endif
    345 }
    346 
    347 }  // namespace port
    348 }  // namespace tensorflow
    349