Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
     17 #define TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
     18 
     19 #if GOOGLE_CUDA
     20 
     21 #include <unordered_map>
     22 
     23 #include "tensorflow/core/lib/strings/str_util.h"
     24 #include "tensorflow/core/lib/strings/strcat.h"
     25 #include "tensorflow/core/lib/strings/stringprintf.h"
     26 #include "tensorflow/core/platform/logging.h"
     27 #include "tensorflow/core/platform/stream_executor.h"
     28 
     29 namespace tensorflow {
     30 
     31 template <typename T>
     32 inline perftools::gputools::DeviceMemory<T> AsDeviceMemory(const T* cuda_memory,
     33                                                            uint64 size) {
     34   perftools::gputools::DeviceMemoryBase wrapped(const_cast<T*>(cuda_memory),
     35                                                 size * sizeof(T));
     36   perftools::gputools::DeviceMemory<T> typed(wrapped);
     37   return typed;
     38 }
     39 
     40 // A helper class that looks up the best autotuned config from parameters.
     41 // Due to the noisy nature of autotune, especially with multiple devices, it
     42 // only accepts a config if its margin exceeds a threshold.
     43 // For the same shape configs, if a new best config matches the previous best,
     44 // they get promoted; otherwise, the winner gets demoted. This process stops
     45 // when the winner's score exceeds the threshold.
     46 // In a bad case when two configs are very close to each other and flips
     47 // back and forth randomly, the expected number of experiments before autotune
     48 // settles is O(threshold ^ 2). So we recommend that number of warmup runs
     49 // for any benchmarks.
     50 template <typename Parameters, typename Config>
     51 class AutoTuneMap {
     52  public:
     53   bool Find(const Parameters& params, Config* config) const {
     54     mutex_lock lock(mu_);
     55     auto iter = params_config_map_.find(params);
     56     if (iter == params_config_map_.end() ||
     57         (iter->second.score < min_score_threshold_ &&
     58          iter->second.count <= max_autotune_count_)) {
     59       return false;
     60     }
     61     *config = iter->second.config;
     62     return true;
     63   }
     64   void Insert(const Parameters& params, const Config& config) {
     65     mutex_lock lock(mu_);
     66     auto iter = params_config_map_.find(params);
     67     int new_score = 0;
     68     if (iter == params_config_map_.end()) {
     69       // Create a new entry if params is new.
     70       VLOG(1) << GetActionSummary("creates", params, config);
     71       params_config_map_.insert(
     72           std::make_pair(params, ValueType{config, 1, 1}));
     73       new_score = 1;
     74     } else if (iter->second.score < min_score_threshold_ &&
     75                iter->second.count <= max_autotune_count_) {
     76       DCHECK_GT(iter->second.score, 0);
     77       if (iter->second.config != config) {
     78         // If it is different from the current winner, demotes the winner.
     79         VLOG(1) << GetActionSummary("demotes", params, config);
     80         new_score = --iter->second.score;
     81         ++iter->second.count;
     82         if (new_score <= 0) {
     83           VLOG(1) << GetActionSummary("erases", params, config);
     84           params_config_map_.erase(iter);
     85         }
     86       } else {
     87         // If it is the same as the current winner, promotes the winner.
     88         VLOG(1) << GetActionSummary("promotes", params, config);
     89         new_score = ++iter->second.score;
     90         ++iter->second.count;
     91       }
     92     }
     93     if (new_score >= min_score_threshold_) {
     94       VLOG(1) << GetActionSummary("accepts", params, config);
     95     }
     96   }
     97 
     98  private:
     99   AutoTuneMap(const string& name) : name_(name) {
    100     min_score_threshold_ = 1;
    101     int min_warmup_iterations = 10;
    102     const char* threshold_str = getenv("TF_AUTOTUNE_THRESHOLD");
    103     if (threshold_str != nullptr) {
    104       strings::safe_strto32(threshold_str, &min_score_threshold_);
    105     }
    106     const char* min_warmup_iteration_str =
    107         getenv("TF_AUTOTUNE_MIN_WARMUP_ITERATIONS");
    108     if (min_warmup_iteration_str != nullptr) {
    109       strings::safe_strto32(min_warmup_iteration_str, &min_warmup_iterations);
    110     }
    111     min_score_threshold_ = std::max(min_score_threshold_, 1);
    112     max_autotune_count_ = std::max(
    113         5 * min_score_threshold_ * min_score_threshold_, min_warmup_iterations);
    114   }
    115 
    116   template <class Group, class Params, class Cfg>
    117   friend class AutoTuneSingleton;
    118 
    119   struct Hasher {
    120     std::size_t operator()(const Parameters& parameter) const {
    121       return parameter.hash();
    122     }
    123   };
    124 
    125   string GetActionSummary(StringPiece action, const Parameters& params,
    126                           const Config& config) {
    127     return strings::Printf("autotune_map %s %s: %s -> (%s)", name_.c_str(),
    128                            action.ToString().c_str(), params.ToString().c_str(),
    129                            config.ToString().c_str());
    130   }
    131 
    132   mutable mutex mu_;
    133   struct ValueType {
    134     Config config;
    135     int32 score;
    136     int32 count;
    137   };
    138   std::unordered_map<Parameters, ValueType, Hasher> params_config_map_
    139       GUARDED_BY(mu_);
    140   string name_;
    141   int32 min_score_threshold_;
    142   int32 max_autotune_count_;
    143 
    144   TF_DISALLOW_COPY_AND_ASSIGN(AutoTuneMap);
    145 };
    146 
    147 // A Singleton helper that manages the global autotune results by groups.
    148 // The caller specified arbitrary Group type that can distinguish between
    149 // different autotune results, even if their Parameters and Configs are the
    150 // same.
    151 template <class Group, typename Parameters, typename Config>
    152 class AutoTuneSingleton {
    153  public:
    154   typedef AutoTuneMap<Parameters, Config> AutoTuneType;
    155   static AutoTuneType* GetInstance() {
    156     static AutoTuneType* instance = new AutoTuneType(Group::name());
    157     return instance;
    158   }
    159 };
    160 
    161 }  // namespace tensorflow
    162 
    163 #endif  // GOOGLE_CUDA
    164 
    165 #endif  // TENSORFLOW_CORE_KERNELS_GPU_UTILS_H_
    166