1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/platform/cpu_info.h" 17 #include "tensorflow/core/platform/logging.h" 18 #include "tensorflow/core/platform/platform.h" 19 #include "tensorflow/core/platform/types.h" 20 #if defined(PLATFORM_IS_X86) 21 #include <mutex> // NOLINT 22 #endif 23 24 // SIMD extension querying is only available on x86. 25 #ifdef PLATFORM_IS_X86 26 #ifdef PLATFORM_WINDOWS 27 // Visual Studio defines a builtin function for CPUID, so use that if possible. 28 #define GETCPUID(a, b, c, d, a_inp, c_inp) \ 29 { \ 30 int cpu_info[4] = {-1}; \ 31 __cpuidex(cpu_info, a_inp, c_inp); \ 32 a = cpu_info[0]; \ 33 b = cpu_info[1]; \ 34 c = cpu_info[2]; \ 35 d = cpu_info[3]; \ 36 } 37 #else 38 // Otherwise use gcc-format assembler to implement the underlying instructions. 39 #define GETCPUID(a, b, c, d, a_inp, c_inp) \ 40 asm("mov %%rbx, %%rdi\n" \ 41 "cpuid\n" \ 42 "xchg %%rdi, %%rbx\n" \ 43 : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \ 44 : "a"(a_inp), "2"(c_inp)) 45 #endif 46 #endif 47 48 namespace tensorflow { 49 namespace port { 50 namespace { 51 52 #ifdef PLATFORM_IS_X86 53 class CPUIDInfo; 54 void InitCPUIDInfo(); 55 56 CPUIDInfo *cpuid = nullptr; 57 58 #ifdef PLATFORM_WINDOWS 59 // Visual Studio defines a builtin function, so use that if possible. 60 int GetXCR0EAX() { return _xgetbv(0); } 61 #else 62 int GetXCR0EAX() { 63 int eax, edx; 64 asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0)); 65 return eax; 66 } 67 #endif 68 69 // Structure for basic CPUID info 70 class CPUIDInfo { 71 public: 72 CPUIDInfo() 73 : have_adx_(0), 74 have_aes_(0), 75 have_avx_(0), 76 have_avx2_(0), 77 have_avx512f_(0), 78 have_avx512cd_(0), 79 have_avx512er_(0), 80 have_avx512pf_(0), 81 have_avx512vl_(0), 82 have_avx512bw_(0), 83 have_avx512dq_(0), 84 have_avx512vbmi_(0), 85 have_avx512ifma_(0), 86 have_avx512_4vnniw_(0), 87 have_avx512_4fmaps_(0), 88 have_bmi1_(0), 89 have_bmi2_(0), 90 have_cmov_(0), 91 have_cmpxchg16b_(0), 92 have_cmpxchg8b_(0), 93 have_f16c_(0), 94 have_fma_(0), 95 have_mmx_(0), 96 have_pclmulqdq_(0), 97 have_popcnt_(0), 98 have_prefetchw_(0), 99 have_prefetchwt1_(0), 100 have_rdrand_(0), 101 have_rdseed_(0), 102 have_smap_(0), 103 have_sse_(0), 104 have_sse2_(0), 105 have_sse3_(0), 106 have_sse4_1_(0), 107 have_sse4_2_(0), 108 have_ssse3_(0), 109 have_hypervisor_(0) {} 110 111 static void Initialize() { 112 // Initialize cpuid struct 113 CHECK(cpuid == nullptr) << __func__ << " ran more than once"; 114 cpuid = new CPUIDInfo; 115 116 uint32 eax, ebx, ecx, edx; 117 118 // Get vendor string (issue CPUID with eax = 0) 119 GETCPUID(eax, ebx, ecx, edx, 0, 0); 120 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4); 121 cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4); 122 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4); 123 124 // To get general information and extended features we send eax = 1 and 125 // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx. 126 // (See Intel 64 and IA-32 Architectures Software Developer's Manual 127 // Volume 2A: Instruction Set Reference, A-M CPUID). 128 GETCPUID(eax, ebx, ecx, edx, 1, 0); 129 130 cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf); 131 cpuid->family_ = static_cast<int>((eax >> 8) & 0xf); 132 133 cpuid->have_aes_ = (ecx >> 25) & 0x1; 134 cpuid->have_cmov_ = (edx >> 15) & 0x1; 135 cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1; 136 cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1; 137 cpuid->have_mmx_ = (edx >> 23) & 0x1; 138 cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1; 139 cpuid->have_popcnt_ = (ecx >> 23) & 0x1; 140 cpuid->have_rdrand_ = (ecx >> 30) & 0x1; 141 cpuid->have_sse2_ = (edx >> 26) & 0x1; 142 cpuid->have_sse3_ = ecx & 0x1; 143 cpuid->have_sse4_1_ = (ecx >> 19) & 0x1; 144 cpuid->have_sse4_2_ = (ecx >> 20) & 0x1; 145 cpuid->have_sse_ = (edx >> 25) & 0x1; 146 cpuid->have_ssse3_ = (ecx >> 9) & 0x1; 147 cpuid->have_hypervisor_ = (ecx >> 31) & 1; 148 149 const uint64 xcr0_xmm_mask = 0x2; 150 const uint64 xcr0_ymm_mask = 0x4; 151 const uint64 xcr0_maskreg_mask = 0x20; 152 const uint64 xcr0_zmm0_15_mask = 0x40; 153 const uint64 xcr0_zmm16_31_mask = 0x80; 154 155 const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask; 156 const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask | 157 xcr0_zmm0_15_mask | xcr0_zmm16_31_mask; 158 159 const bool have_avx = 160 // Does the OS support XGETBV instruction use by applications? 161 ((ecx >> 27) & 0x1) && 162 // Does the OS save/restore XMM and YMM state? 163 ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) && 164 // Is AVX supported in hardware? 165 ((ecx >> 28) & 0x1); 166 167 const bool have_avx512 = 168 // Does the OS support XGETBV instruction use by applications? 169 ((ecx >> 27) & 0x1) && 170 // Does the OS save/restore ZMM state? 171 ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask); 172 173 cpuid->have_avx_ = have_avx; 174 cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1); 175 cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1); 176 177 // Get standard level 7 structured extension features (issue CPUID with 178 // eax = 7 and ecx= 0), which is required to check for AVX2 support as 179 // well as other Haswell (and beyond) features. (See Intel 64 and IA-32 180 // Architectures Software Developer's Manual Volume 2A: Instruction Set 181 // Reference, A-M CPUID). 182 GETCPUID(eax, ebx, ecx, edx, 7, 0); 183 184 cpuid->have_adx_ = (ebx >> 19) & 0x1; 185 cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1); 186 cpuid->have_bmi1_ = (ebx >> 3) & 0x1; 187 cpuid->have_bmi2_ = (ebx >> 8) & 0x1; 188 cpuid->have_prefetchwt1_ = ecx & 0x1; 189 cpuid->have_rdseed_ = (ebx >> 18) & 0x1; 190 cpuid->have_smap_ = (ebx >> 20) & 0x1; 191 192 cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1); 193 cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1); 194 cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1); 195 cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1); 196 cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1); 197 cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1); 198 cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1); 199 cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1); 200 cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1); 201 cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1); 202 cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1); 203 } 204 205 static bool TestFeature(CPUFeature feature) { 206 InitCPUIDInfo(); 207 // clang-format off 208 switch (feature) { 209 case ADX: return cpuid->have_adx_; 210 case AES: return cpuid->have_aes_; 211 case AVX2: return cpuid->have_avx2_; 212 case AVX: return cpuid->have_avx_; 213 case AVX512F: return cpuid->have_avx512f_; 214 case AVX512CD: return cpuid->have_avx512cd_; 215 case AVX512PF: return cpuid->have_avx512pf_; 216 case AVX512ER: return cpuid->have_avx512er_; 217 case AVX512VL: return cpuid->have_avx512vl_; 218 case AVX512BW: return cpuid->have_avx512bw_; 219 case AVX512DQ: return cpuid->have_avx512dq_; 220 case AVX512VBMI: return cpuid->have_avx512vbmi_; 221 case AVX512IFMA: return cpuid->have_avx512ifma_; 222 case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_; 223 case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_; 224 case BMI1: return cpuid->have_bmi1_; 225 case BMI2: return cpuid->have_bmi2_; 226 case CMOV: return cpuid->have_cmov_; 227 case CMPXCHG16B: return cpuid->have_cmpxchg16b_; 228 case CMPXCHG8B: return cpuid->have_cmpxchg8b_; 229 case F16C: return cpuid->have_f16c_; 230 case FMA: return cpuid->have_fma_; 231 case MMX: return cpuid->have_mmx_; 232 case PCLMULQDQ: return cpuid->have_pclmulqdq_; 233 case POPCNT: return cpuid->have_popcnt_; 234 case PREFETCHW: return cpuid->have_prefetchw_; 235 case PREFETCHWT1: return cpuid->have_prefetchwt1_; 236 case RDRAND: return cpuid->have_rdrand_; 237 case RDSEED: return cpuid->have_rdseed_; 238 case SMAP: return cpuid->have_smap_; 239 case SSE2: return cpuid->have_sse2_; 240 case SSE3: return cpuid->have_sse3_; 241 case SSE4_1: return cpuid->have_sse4_1_; 242 case SSE4_2: return cpuid->have_sse4_2_; 243 case SSE: return cpuid->have_sse_; 244 case SSSE3: return cpuid->have_ssse3_; 245 case HYPERVISOR: return cpuid->have_hypervisor_; 246 default: 247 break; 248 } 249 // clang-format on 250 return false; 251 } 252 253 string vendor_str() const { return vendor_str_; } 254 int family() const { return family_; } 255 int model_num() { return model_num_; } 256 257 private: 258 int have_adx_ : 1; 259 int have_aes_ : 1; 260 int have_avx_ : 1; 261 int have_avx2_ : 1; 262 int have_avx512f_ : 1; 263 int have_avx512cd_ : 1; 264 int have_avx512er_ : 1; 265 int have_avx512pf_ : 1; 266 int have_avx512vl_ : 1; 267 int have_avx512bw_ : 1; 268 int have_avx512dq_ : 1; 269 int have_avx512vbmi_ : 1; 270 int have_avx512ifma_ : 1; 271 int have_avx512_4vnniw_ : 1; 272 int have_avx512_4fmaps_ : 1; 273 int have_bmi1_ : 1; 274 int have_bmi2_ : 1; 275 int have_cmov_ : 1; 276 int have_cmpxchg16b_ : 1; 277 int have_cmpxchg8b_ : 1; 278 int have_f16c_ : 1; 279 int have_fma_ : 1; 280 int have_mmx_ : 1; 281 int have_pclmulqdq_ : 1; 282 int have_popcnt_ : 1; 283 int have_prefetchw_ : 1; 284 int have_prefetchwt1_ : 1; 285 int have_rdrand_ : 1; 286 int have_rdseed_ : 1; 287 int have_smap_ : 1; 288 int have_sse_ : 1; 289 int have_sse2_ : 1; 290 int have_sse3_ : 1; 291 int have_sse4_1_ : 1; 292 int have_sse4_2_ : 1; 293 int have_ssse3_ : 1; 294 int have_hypervisor_ : 1; 295 string vendor_str_; 296 int family_; 297 int model_num_; 298 }; 299 300 std::once_flag cpuid_once_flag; 301 302 void InitCPUIDInfo() { 303 // This ensures that CPUIDInfo::Initialize() is called exactly 304 // once regardless of how many threads concurrently call us 305 std::call_once(cpuid_once_flag, CPUIDInfo::Initialize); 306 } 307 308 #endif // PLATFORM_IS_X86 309 310 } // namespace 311 312 bool TestCPUFeature(CPUFeature feature) { 313 #ifdef PLATFORM_IS_X86 314 return CPUIDInfo::TestFeature(feature); 315 #else 316 return false; 317 #endif 318 } 319 320 std::string CPUVendorIDString() { 321 #ifdef PLATFORM_IS_X86 322 InitCPUIDInfo(); 323 return cpuid->vendor_str(); 324 #else 325 return ""; 326 #endif 327 } 328 329 int CPUFamily() { 330 #ifdef PLATFORM_IS_X86 331 InitCPUIDInfo(); 332 return cpuid->family(); 333 #else 334 return 0; 335 #endif 336 } 337 338 int CPUModelNum() { 339 #ifdef PLATFORM_IS_X86 340 InitCPUIDInfo(); 341 return cpuid->model_num(); 342 #else 343 return 0; 344 #endif 345 } 346 347 } // namespace port 348 } // namespace tensorflow 349