Home | History | Annotate | Download | only in stream_executor
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/stream_executor/dnn.h"
     17 
     18 #include "tensorflow/stream_executor/lib/strcat.h"
     19 #include "tensorflow/stream_executor/lib/stringprintf.h"
     20 
     21 namespace perftools {
     22 namespace gputools {
     23 namespace dnn {
     24 
     25 bool DnnSupport::GetConvolveAlgorithms(
     26     bool with_winograd_nonfused, int cc_major, int cc_minor,
     27     std::vector<AlgorithmDesc>* out_algorithms) {
     28   return false;
     29 }
     30 
     31 bool DnnSupport::GetConvolveBackwardDataAlgorithms(
     32     bool with_winograd_nonfused, int cc_major, int cc_minor,
     33     std::vector<AlgorithmDesc>* out_algorithms) {
     34   return false;
     35 }
     36 
     37 bool DnnSupport::GetConvolveBackwardFilterAlgorithms(
     38     bool with_winograd_nonfused, int cc_major, int cc_minor,
     39     std::vector<AlgorithmDesc>* out_algorithms) {
     40   return false;
     41 }
     42 
     43 string QuantizedActivationModeString(QuantizedActivationMode mode) {
     44   switch (mode) {
     45     case dnn::QuantizedActivationMode::k8Bit:
     46       return "uint8";
     47     case dnn::QuantizedActivationMode::k16Bit:
     48       return "uint16";
     49     case dnn::QuantizedActivationMode::k32Bit:
     50       return "int32";
     51     default:
     52       LOG(FATAL) << "Unknown quantized_activation_mode "
     53                  << static_cast<int32>(mode);
     54   }
     55   return "unknown quantized_activation_mode";
     56 }
     57 
     58 string ActivationModeString(ActivationMode mode) {
     59   switch (mode) {
     60     case ActivationMode::kSigmoid:
     61       return "sigmoid";
     62     case ActivationMode::kRelu:
     63       return "relu";
     64     case ActivationMode::kRelu6:
     65       return "relu6";
     66     case ActivationMode::kReluX:
     67       return "reluX";
     68     case ActivationMode::kTanh:
     69       return "tanh";
     70     case ActivationMode::kBandPass:
     71       return "bandpass";
     72     default:
     73       LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
     74   }
     75   return "unknown activation_mode";
     76 }
     77 
     78 string ElementwiseOperationString(ElementwiseOperation op) {
     79   switch (op) {
     80     case ElementwiseOperation::kAdd:
     81       return "add";
     82     case ElementwiseOperation::kMultiply:
     83       return "multiply";
     84     default:
     85       LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
     86   }
     87   return "unknown element wise op";
     88 }
     89 
     90 string DataLayoutString(DataLayout layout) {
     91   switch (layout) {
     92     case DataLayout::kYXDepthBatch:
     93       return "YXDepthBatch";
     94     case DataLayout::kYXBatchDepth:
     95       return "YXBatchDepth";
     96     case DataLayout::kBatchYXDepth:
     97       return "BatchYXDepth";
     98     case DataLayout::kBatchDepthYX:
     99       return "BatchDepthYX";
    100     case DataLayout::kBatchDepthYX4:
    101       return "BatchDepthYX4";
    102     default:
    103       LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
    104   }
    105   return "unknown data layout";
    106 }
    107 
    108 string FilterLayoutString(FilterLayout layout) {
    109   switch (layout) {
    110     case FilterLayout::kOutputInputYX:
    111       return "OutputInputYX";
    112     case FilterLayout::kOutputInputYX4:
    113       return "OutputInputYX4";
    114     case FilterLayout::kInputYXOutput:
    115       return "InputYXOutput";
    116     case FilterLayout::kYXInputOutput:
    117       return "YXInputOutput";
    118     default:
    119       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
    120   }
    121   return "unknown filter layout";
    122 }
    123 
    124 string PadAlignmentString(PadAlignment alignment) {
    125   switch (alignment) {
    126     case PadAlignment::kDefault:
    127       return "default";
    128     case PadAlignment::kCudnnPadding:
    129       return "cuDNN padding";
    130     case PadAlignment::kTensorFlowPadding:
    131       return "TensorFlow padding";
    132   }
    133   return "unknown pad alignment";
    134 }
    135 
    136 string ShortPoolingModeString(PoolingMode mode) {
    137   switch (mode) {
    138     case PoolingMode::kMaximum:
    139       return "Max";
    140     case PoolingMode::kAverage:
    141       return "Avg";
    142     default:
    143       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
    144   }
    145   return "unknown filter layout";
    146 }
    147 
    148 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
    149                                         const int data_dims) {
    150   int depth_idx, batch_idx, spatial_idx;
    151   switch (layout) {
    152     case DataLayout::kYXBatchDepth:
    153       depth_idx = data_dims - 1;
    154       batch_idx = data_dims - 2;
    155       spatial_idx = 0;
    156       break;
    157 
    158     case DataLayout::kYXDepthBatch:
    159       depth_idx = data_dims - 2;
    160       batch_idx = data_dims - 1;
    161       spatial_idx = 0;
    162       break;
    163 
    164     case DataLayout::kBatchYXDepth:
    165       depth_idx = data_dims - 1;
    166       batch_idx = 0;
    167       spatial_idx = 1;
    168       break;
    169 
    170     case DataLayout::kBatchDepthYX:
    171     case DataLayout::kBatchDepthYX4:
    172       depth_idx = 1;
    173       batch_idx = 0;
    174       spatial_idx = 2;
    175       break;
    176   }
    177 
    178   return std::make_tuple(depth_idx, batch_idx, spatial_idx);
    179 }
    180 
    181 std::vector<int64> ReorderDims(const std::vector<int64>& input,
    182                                const DataLayout& from, const DataLayout& to) {
    183   if (from == to) return input;
    184 
    185   int d_idx_from, b_idx_from, spatial_idx_from;
    186   int d_idx_to, b_idx_to, spatial_idx_to;
    187 
    188   std::tie(d_idx_from, b_idx_from, spatial_idx_from) =
    189       GetDimIndices(from, input.size());
    190   std::tie(d_idx_to, b_idx_to, spatial_idx_to) =
    191       GetDimIndices(to, input.size());
    192 
    193   std::vector<int64> reordered(input.size());
    194   reordered[b_idx_to] = input[b_idx_from];
    195   reordered[d_idx_to] = input[d_idx_from];
    196 
    197   for (size_t i = 0; i < input.size() - 2;
    198        i++, spatial_idx_from++, spatial_idx_to++) {
    199     reordered[spatial_idx_to] = input[spatial_idx_from];
    200   }
    201 
    202   return reordered;
    203 }
    204 
    205 // -- AlgorithmConfig
    206 
    207 string AlgorithmConfig::ToString() const {
    208   return port::StrCat(algorithm_.algo_id(), ", ",
    209                       algorithm_no_scratch_.algo_id());
    210 }
    211 
    212 // -- BatchDescriptor
    213 
    214 BatchDescriptor::BatchDescriptor(int ndims)
    215     : count_(0),
    216       feature_map_count_(0),
    217       spatial_size_(ndims, 0),
    218       value_max_(0.0),
    219       value_min_(0.0),
    220       layout_(DataLayout::kYXDepthBatch),
    221       ndims_(ndims),
    222       quantized_activation_mode_(QuantizedActivationMode::k8Bit) {}
    223 
    224 BatchDescriptor::BatchDescriptor() : BatchDescriptor(/*ndims=*/2) {}
    225 
    226 std::vector<int64> BatchDescriptor::full_dims(const DataLayout& layout) const {
    227   std::vector<int64> bdyx_dims(ndims_ + 2);
    228   bdyx_dims[0] = count();
    229   bdyx_dims[1] = feature_map_count();
    230   std::copy(spatial_size_.begin(), spatial_size_.end(), bdyx_dims.begin() + 2);
    231   return ReorderDims(bdyx_dims, DataLayout::kBatchDepthYX, layout);
    232 }
    233 
    234 std::vector<int64> BatchDescriptor::full_strides(
    235     const DataLayout& layout) const {
    236   if (layout_ == DataLayout::kBatchDepthYX4) {
    237     LOG(FATAL)
    238         << "Cannot compute full strides for batch descriptor " << ToString()
    239         << ", because its layout is kBatchDepthYX4. In fact, "
    240            "cudnnSetTensorNdDescriptor doesn't work for kBatchDepthYX4 at all. "
    241            "Use cudnnSetTensor4DDescriptor to set cudnnTensorDescriptor_t "
    242            "instead.";
    243   }
    244   std::vector<int64> phys_dims = full_dims(layout_);
    245   std::vector<int64> phys_strides(phys_dims.size());
    246   phys_strides[ndims_ + 1] = 1;
    247   for (int i = ndims_; i >= 0; i--) {
    248     phys_strides[i] = phys_strides[i + 1] * phys_dims[i + 1];
    249   }
    250   return ReorderDims(phys_strides, layout_, layout);
    251 }
    252 
    253 void BatchDescriptor::CloneFrom(const BatchDescriptor& other) {
    254   count_ = other.count_;
    255   feature_map_count_ = other.feature_map_count_;
    256   spatial_size_ = other.spatial_size_;
    257   value_max_ = other.value_max_;
    258   value_min_ = other.value_min_;
    259   layout_ = other.layout_;
    260   ndims_ = other.ndims_;
    261   quantized_activation_mode_ = other.quantized_activation_mode_;
    262 }
    263 
    264 string BatchDescriptor::ToString() const {
    265   string spatial;
    266   for (int i = 0; i < ndims_; i++) {
    267     port::Appendf(&spatial, "%lld ", spatial_size_[i]);
    268   }
    269   return port::Printf(
    270       "{count: %lld feature_map_count: %lld spatial: %s "
    271       "value_min: %f value_max: %f layout: %s}",
    272       count_, feature_map_count_, spatial.c_str(), value_min_, value_max_,
    273       DataLayoutString(layout_).c_str());
    274 }
    275 
    276 string BatchDescriptor::ToShortString() const {
    277   // All the constituent strings are less than 15 characters, so the
    278   // small string optimization ensures that there will be at most one
    279   // heap memory allocation.
    280   string depth = port::StrCat("d", feature_map_count());
    281   string batch = port::StrCat("b", count());
    282 
    283   string spatial = "s";
    284   for (int i = 0; i < ndims_; i++) {
    285     port::Appendf(&spatial, "%lld ", spatial_size_[i]);
    286   }
    287 
    288   string suffix;
    289   if (value_min() != value_max()) {
    290     port::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]");
    291   }
    292   if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) {
    293     suffix += "_16bit";
    294   }
    295 
    296   switch (layout()) {
    297     case DataLayout::kYXDepthBatch:
    298       return port::StrCat(spatial, depth, batch, suffix);
    299     case DataLayout::kYXBatchDepth:
    300       return port::StrCat(spatial, batch, depth, suffix);
    301     case DataLayout::kBatchYXDepth:
    302       return port::StrCat(batch, spatial, depth, suffix);
    303     case DataLayout::kBatchDepthYX:
    304       return port::StrCat(batch, depth, spatial, suffix);
    305     case DataLayout::kBatchDepthYX4:
    306       return port::StrCat(batch, depth, spatial, suffix, "(VECT_C)");
    307     default:
    308       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout());
    309       return "";  // Avoid return warning (unreachable)
    310   }
    311 }
    312 
    313 int64 BatchDescriptor::NodesPerFeatureMap() const {
    314   int64 ret = 1;
    315   for (int i = 0; i < ndims_; i++) {
    316     ret *= spatial_size_[i];
    317   }
    318   return ret;
    319 }
    320 
    321 int64 BatchDescriptor::NodesAcrossFeatureMaps() const {
    322   return NodesPerFeatureMap() * feature_map_count_;
    323 }
    324 
    325 int64 BatchDescriptor::ElementCount() const {
    326   return count_ * feature_map_count_ * NodesPerFeatureMap();
    327 }
    328 
    329 int64 BatchDescriptor::FullyConnectedWeightCount(
    330     const BatchDescriptor& input, const BatchDescriptor& output) {
    331   return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps();
    332 }
    333 
    334 int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) {
    335   return output.NodesAcrossFeatureMaps();
    336 }
    337 
    338 BatchDescriptor BatchDescriptor::DepthConcatenateOutputDescriptor(
    339     port::ArraySlice<dnn::BatchDescriptor> inputs) {
    340   if (inputs.empty()) {
    341     return BatchDescriptor();
    342   }
    343   int feature_map_count = 0;
    344   for (const auto& dimensions : inputs) {
    345     feature_map_count += dimensions.feature_map_count();
    346   }
    347   BatchDescriptor output = inputs[0];
    348   output.set_feature_map_count(feature_map_count);
    349   return output;
    350 }
    351 
    352 // -- FilterDescriptor
    353 
    354 FilterDescriptor::FilterDescriptor(int ndims)
    355     : output_feature_map_count_(0),
    356       input_feature_map_count_(0),
    357       input_filter_dims_(ndims, 0),
    358       ndims_(ndims),
    359       layout_(FilterLayout::kOutputInputYX) {}
    360 
    361 FilterDescriptor::FilterDescriptor() : FilterDescriptor(/*ndims=*/2) {}
    362 
    363 FilterDescriptor::~FilterDescriptor() {}
    364 
    365 void FilterDescriptor::CloneFrom(const FilterDescriptor& other) {
    366   set_output_feature_map_count(other.output_feature_map_count())
    367       .set_input_feature_map_count(other.input_feature_map_count())
    368       .set_layout(other.layout());
    369   input_filter_dims_ = other.input_filter_dims_;
    370   ndims_ = other.ndims_;
    371 }
    372 
    373 string FilterDescriptor::ToString() const {
    374   string desc = port::Printf(
    375       "{output_feature_map_count: %lld input_feature_map_count: %lld "
    376       "layout: %s shape: ",
    377       output_feature_map_count_, input_feature_map_count_,
    378       FilterLayoutString(layout_).c_str());
    379   for (int i = 0; i < ndims_; i++) {
    380     port::Appendf(&desc, "%lld ", input_filter_dims_[i]);
    381   }
    382   port::StrAppend(&desc, "}");
    383 
    384   return desc;
    385 }
    386 
    387 string FilterDescriptor::ToShortString() const {
    388   // All the constituent strings are less than 15 characters, so the
    389   // small string optimization ensures that there will be at most one
    390   // heap memory allocation.
    391   string od = port::StrCat("od", output_feature_map_count_);
    392   string id = port::StrCat("id", input_feature_map_count_);
    393 
    394   string spatial = "s";
    395   for (int i = 0; i < ndims_; i++) {
    396     port::Appendf(&spatial, "%lld ", input_filter_dims_[i]);
    397   }
    398 
    399   switch (layout_) {
    400     case FilterLayout::kOutputInputYX:
    401       return port::StrCat(od, id, spatial);
    402     case FilterLayout::kOutputInputYX4:
    403       return port::StrCat(od, id, spatial, "(VECT_C)");
    404     case FilterLayout::kInputYXOutput:
    405       return port::StrCat(id, spatial, od);
    406     case FilterLayout::kYXInputOutput:
    407       return port::StrCat(spatial, id, od);
    408     default:
    409       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout_);
    410       return "";  // Avoid return warning (unreachable)
    411   }
    412 }
    413 
    414 int64 FilterDescriptor::ComputeWeightCount() const {
    415   int64 ret = output_feature_map_count_ * input_feature_map_count_;
    416   for (int i = 0; i < ndims_; i++) {
    417     ret *= input_filter_dims_[i];
    418   }
    419   return ret;
    420 }
    421 
    422 // -- ConvolutionDescriptor
    423 
    424 ConvolutionDescriptor::ConvolutionDescriptor(int ndims)
    425     : zero_padding_(ndims, 0),
    426       filter_strides_(ndims, 1),
    427       dilation_rates_(ndims, 1),
    428       pad_alignment_(PadAlignment::kDefault),
    429       ndims_(ndims) {}
    430 
    431 ConvolutionDescriptor::ConvolutionDescriptor()
    432     : ConvolutionDescriptor(/*ndims=*/2) {}
    433 
    434 ConvolutionDescriptor::~ConvolutionDescriptor() {}
    435 
    436 string ConvolutionDescriptor::ToString() const {
    437   string padding;
    438   string strides;
    439   string dilations;
    440   for (int i = 0; i < ndims_; i++) {
    441     port::Appendf(&padding, "%lld ", zero_padding_[i]);
    442     port::Appendf(&strides, "%lld ", filter_strides_[i]);
    443     port::Appendf(&dilations, "%lld ", dilation_rates_[i]);
    444   }
    445 
    446   return port::Printf(
    447       "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: "
    448       "%s}",
    449       padding.c_str(), PadAlignmentString(pad_alignment_).c_str(),
    450       strides.c_str(), dilations.c_str());
    451 }
    452 
    453 string ConvolutionDescriptor::ToShortString() const {
    454   string desc;
    455   for (int i = 0; i < ndims_; i++) {
    456     if (i > 0) port::Appendf(&desc, "_");
    457     port::Appendf(&desc, "p%d:%lld", i, zero_padding_[i]);
    458   }
    459   for (int i = 0; i < ndims_; i++) {
    460     port::Appendf(&desc, "_s%d:%lld", i, filter_strides_[i]);
    461   }
    462   for (int i = 0; i < ndims_; i++) {
    463     port::Appendf(&desc, "_d%d:%lld", i, dilation_rates_[i]);
    464   }
    465   return desc;
    466 }
    467 
    468 // -- PoolingDescriptor
    469 
    470 PoolingDescriptor::PoolingDescriptor(int ndims)
    471     : mode_(dnn::PoolingMode::kMaximum),
    472       ndims_(ndims),
    473       propagate_nans_(false),
    474       window_(ndims, 0),
    475       padding_(ndims, 0),
    476       strides_(ndims, 1) {}
    477 
    478 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
    479 
    480 void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
    481   mode_ = other.mode_;
    482   ndims_ = other.ndims_;
    483   window_ = other.window_;
    484   padding_ = other.padding_;
    485   strides_ = other.strides_;
    486   propagate_nans_ = other.propagate_nans_;
    487 }
    488 
    489 string PoolingDescriptor::ToString() const {
    490   const char* mode_string =
    491       mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage";
    492 
    493   string window, strides, padding;
    494   for (int i = 0; i < ndims_; i++) {
    495     port::Appendf(&window, "%lld ", window_[i]);
    496     port::Appendf(&strides, "%lld ", strides_[i]);
    497     port::Appendf(&padding, "%lld", padding_[i]);
    498   }
    499 
    500   const char* propagate_string = propagate_nans_ ? "Yes" : "No";
    501 
    502   return port::Printf(
    503       "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
    504       mode_string, window.c_str(), strides.c_str(), padding.c_str(),
    505       propagate_string);
    506 }
    507 
    508 string PoolingDescriptor::ToShortString() const {
    509   string window, strides, padding;
    510   for (int i = 0; i < ndims_; i++) {
    511     port::Appendf(&window, "_w%d:%lld", i, window_[i]);
    512     port::Appendf(&strides, "_s%d:%lld", i, strides_[i]);
    513     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
    514   }
    515   return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
    516                       window, strides, padding,
    517                       propagate_nans_ ? "propagate_nans" : "ignore_nans");
    518 }
    519 
    520 // -- NormalizeDescriptor
    521 
    522 NormalizeDescriptor::NormalizeDescriptor()
    523     : bias_(0.0),
    524       range_(0),
    525       alpha_(0.0),
    526       beta_(0.0),
    527       wrap_around_(false),
    528       segment_size_(0) {}
    529 
    530 void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) {
    531   bias_ = other.bias_;
    532   range_ = other.range_;
    533   alpha_ = other.alpha_;
    534   beta_ = other.beta_;
    535   wrap_around_ = other.wrap_around_;
    536   segment_size_ = other.segment_size_;
    537 }
    538 
    539 string NormalizeDescriptor::ToString() const {
    540   return port::Printf(
    541       "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d "
    542       "segment_size: %d}",
    543       bias_, range_, alpha_, beta_, wrap_around_, segment_size_);
    544 }
    545 
    546 string NormalizeDescriptor::ToShortString() const {
    547   return port::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_,
    548                       "_beta:", beta_, "_wrap:", wrap_around_, "_size:",
    549                       segment_size_);
    550 }
    551 
    552 }  // namespace dnn
    553 }  // namespace gputools
    554 }  // namespace perftools
    555