1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/grappler/clusters/utils.h" 17 18 #include "third_party/eigen3/Eigen/Core" 19 20 #if GOOGLE_CUDA 21 #include "cuda/include/cuda.h" 22 #include "cuda/include/cuda_runtime_api.h" 23 #include "cuda/include/cudnn.h" 24 #endif 25 26 #ifdef EIGEN_USE_LIBXSMM 27 #include "include/libxsmm.h" 28 #endif 29 30 #include "tensorflow/core/lib/strings/numbers.h" 31 #include "tensorflow/core/lib/strings/strcat.h" 32 #include "tensorflow/core/platform/cpu_info.h" 33 #include "tensorflow/core/platform/mem.h" 34 35 namespace tensorflow { 36 namespace grappler { 37 38 DeviceProperties GetLocalCPUInfo() { 39 DeviceProperties device; 40 device.set_type("CPU"); 41 42 device.set_vendor(port::CPUVendorIDString()); 43 // Combine cpu family and model into the model string. 44 device.set_model( 45 strings::StrCat((port::CPUFamily() << 4) + port::CPUModelNum())); 46 device.set_frequency(port::NominalCPUFrequency() * 1e-6); 47 device.set_num_cores(port::NumSchedulableCPUs()); 48 device.set_l1_cache_size(Eigen::l1CacheSize()); 49 device.set_l2_cache_size(Eigen::l2CacheSize()); 50 device.set_l3_cache_size(Eigen::l3CacheSize()); 51 52 int64 free_mem = port::AvailableRam(); 53 if (free_mem < INT64_MAX) { 54 device.set_memory_size(free_mem); 55 } 56 57 (*device.mutable_environment())["cpu_instruction_set"] = 58 Eigen::SimdInstructionSetsInUse(); 59 60 (*device.mutable_environment())["eigen"] = strings::StrCat( 61 EIGEN_WORLD_VERSION, ".", EIGEN_MAJOR_VERSION, ".", EIGEN_MINOR_VERSION); 62 #ifdef EIGEN_USE_LIBXSMM 63 (*device.mutable_environment())["libxsmm"] = LIBXSMM_VERSION; 64 #endif 65 66 return device; 67 } 68 69 DeviceProperties GetLocalGPUInfo(int gpu_id) { 70 DeviceProperties device; 71 device.set_type("GPU"); 72 73 #if GOOGLE_CUDA 74 cudaDeviceProp properties; 75 cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id); 76 if (error == cudaSuccess) { 77 device.set_vendor("NVidia"); 78 device.set_model(properties.name); 79 device.set_frequency(properties.clockRate * 1e-3); 80 device.set_num_cores(properties.multiProcessorCount); 81 device.set_num_registers(properties.regsPerMultiprocessor); 82 // For compute capability less than 5, l1 cache size is configurable to 83 // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For 84 // compute capability larger or equal to 5, l1 cache (unified with texture 85 // cache) size is 24 KB. This number may need to be updated for future 86 // compute capabilities. 87 device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); 88 device.set_l2_cache_size(properties.l2CacheSize); 89 device.set_l3_cache_size(0); 90 device.set_shared_memory_size_per_multiprocessor( 91 properties.sharedMemPerMultiprocessor); 92 device.set_memory_size(properties.totalGlobalMem); 93 // 8 is the number of bits per byte. 2 is accounted for 94 // double data rate (DDR). 95 device.set_bandwidth(properties.memoryBusWidth / 8 * 96 properties.memoryClockRate * 2); 97 } 98 99 (*device.mutable_environment())["architecture"] = 100 strings::StrCat(properties.major, ".", properties.minor); 101 (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION); 102 (*device.mutable_environment())["cudnn"] = strings::StrCat(CUDNN_VERSION); 103 #endif 104 105 return device; 106 } 107 108 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { 109 if (device.type == "CPU") { 110 return GetLocalCPUInfo(); 111 } else if (device.type == "GPU") { 112 if (device.has_id) { 113 return GetLocalGPUInfo(device.id); 114 } else { 115 return GetLocalGPUInfo(0); 116 } 117 } 118 DeviceProperties result; 119 result.set_type("UNKNOWN"); 120 return result; 121 } 122 123 } // end namespace grappler 124 } // end namespace tensorflow 125