Home | History | Annotate | Download | only in clusters
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/grappler/clusters/utils.h"
     17 
     18 #include "third_party/eigen3/Eigen/Core"
     19 
     20 #if GOOGLE_CUDA
     21 #include "cuda/include/cuda.h"
     22 #include "cuda/include/cuda_runtime_api.h"
     23 #include "cuda/include/cudnn.h"
     24 #endif
     25 
     26 #ifdef EIGEN_USE_LIBXSMM
     27 #include "include/libxsmm.h"
     28 #endif
     29 
     30 #include "tensorflow/core/lib/strings/numbers.h"
     31 #include "tensorflow/core/lib/strings/strcat.h"
     32 #include "tensorflow/core/platform/cpu_info.h"
     33 #include "tensorflow/core/platform/mem.h"
     34 
     35 namespace tensorflow {
     36 namespace grappler {
     37 
     38 DeviceProperties GetLocalCPUInfo() {
     39   DeviceProperties device;
     40   device.set_type("CPU");
     41 
     42   device.set_vendor(port::CPUVendorIDString());
     43   // Combine cpu family and model into the model string.
     44   device.set_model(
     45       strings::StrCat((port::CPUFamily() << 4) + port::CPUModelNum()));
     46   device.set_frequency(port::NominalCPUFrequency() * 1e-6);
     47   device.set_num_cores(port::NumSchedulableCPUs());
     48   device.set_l1_cache_size(Eigen::l1CacheSize());
     49   device.set_l2_cache_size(Eigen::l2CacheSize());
     50   device.set_l3_cache_size(Eigen::l3CacheSize());
     51 
     52   int64 free_mem = port::AvailableRam();
     53   if (free_mem < INT64_MAX) {
     54     device.set_memory_size(free_mem);
     55   }
     56 
     57   (*device.mutable_environment())["cpu_instruction_set"] =
     58       Eigen::SimdInstructionSetsInUse();
     59 
     60   (*device.mutable_environment())["eigen"] = strings::StrCat(
     61       EIGEN_WORLD_VERSION, ".", EIGEN_MAJOR_VERSION, ".", EIGEN_MINOR_VERSION);
     62 #ifdef EIGEN_USE_LIBXSMM
     63   (*device.mutable_environment())["libxsmm"] = LIBXSMM_VERSION;
     64 #endif
     65 
     66   return device;
     67 }
     68 
     69 DeviceProperties GetLocalGPUInfo(int gpu_id) {
     70   DeviceProperties device;
     71   device.set_type("GPU");
     72 
     73 #if GOOGLE_CUDA
     74   cudaDeviceProp properties;
     75   cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id);
     76   if (error == cudaSuccess) {
     77     device.set_vendor("NVidia");
     78     device.set_model(properties.name);
     79     device.set_frequency(properties.clockRate * 1e-3);
     80     device.set_num_cores(properties.multiProcessorCount);
     81     device.set_num_registers(properties.regsPerMultiprocessor);
     82     // For compute capability less than 5, l1 cache size is configurable to
     83     // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For
     84     // compute capability larger or equal to 5, l1 cache (unified with texture
     85     // cache) size is 24 KB. This number may need to be updated for future
     86     // compute capabilities.
     87     device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024);
     88     device.set_l2_cache_size(properties.l2CacheSize);
     89     device.set_l3_cache_size(0);
     90     device.set_shared_memory_size_per_multiprocessor(
     91         properties.sharedMemPerMultiprocessor);
     92     device.set_memory_size(properties.totalGlobalMem);
     93     // 8 is the number of bits per byte. 2 is accounted for
     94     // double data rate (DDR).
     95     device.set_bandwidth(properties.memoryBusWidth / 8 *
     96                          properties.memoryClockRate * 2);
     97   }
     98 
     99   (*device.mutable_environment())["architecture"] =
    100       strings::StrCat(properties.major, ".", properties.minor);
    101   (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION);
    102   (*device.mutable_environment())["cudnn"] = strings::StrCat(CUDNN_VERSION);
    103 #endif
    104 
    105   return device;
    106 }
    107 
    108 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
    109   if (device.type == "CPU") {
    110     return GetLocalCPUInfo();
    111   } else if (device.type == "GPU") {
    112     if (device.has_id) {
    113       return GetLocalGPUInfo(device.id);
    114     } else {
    115       return GetLocalGPUInfo(0);
    116     }
    117   }
    118   DeviceProperties result;
    119   result.set_type("UNKNOWN");
    120   return result;
    121 }
    122 
    123 }  // end namespace grappler
    124 }  // end namespace tensorflow
    125