Home | History | Annotate | Download | only in stream_executor
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/stream_executor/dnn.h"
     17 
     18 #include "absl/strings/str_cat.h"
     19 #include "tensorflow/core/lib/hash/hash.h"
     20 #include "tensorflow/stream_executor/lib/stringprintf.h"
     21 
     22 namespace stream_executor {
     23 namespace dnn {
     24 
     25 uint64 AlgorithmDesc::hash() const {
     26   return ::tensorflow::Hash64Combine(algo_id(), tensor_ops_enabled());
     27 }
     28 
     29 bool DnnSupport::GetConvolveAlgorithms(
     30     bool with_winograd_nonfused, int cc_major, int cc_minor,
     31     std::vector<AlgorithmDesc>* out_algorithms) {
     32   return false;
     33 }
     34 
     35 bool DnnSupport::GetRnnAlgorithms(std::vector<AlgorithmDesc>* out_algorithms) {
     36   return false;
     37 }
     38 
     39 bool DnnSupport::GetConvolveBackwardDataAlgorithms(
     40     bool with_winograd_nonfused, int cc_major, int cc_minor,
     41     std::vector<AlgorithmDesc>* out_algorithms) {
     42   return false;
     43 }
     44 
     45 bool DnnSupport::GetConvolveBackwardFilterAlgorithms(
     46     bool with_winograd_nonfused, int cc_major, int cc_minor,
     47     std::vector<AlgorithmDesc>* out_algorithms) {
     48   return false;
     49 }
     50 
     51 string QuantizedActivationModeString(QuantizedActivationMode mode) {
     52   switch (mode) {
     53     case dnn::QuantizedActivationMode::k8Bit:
     54       return "uint8";
     55     case dnn::QuantizedActivationMode::k16Bit:
     56       return "uint16";
     57     case dnn::QuantizedActivationMode::k32Bit:
     58       return "int32";
     59     default:
     60       LOG(FATAL) << "Unknown quantized_activation_mode "
     61                  << static_cast<int32>(mode);
     62   }
     63   return "unknown quantized_activation_mode";
     64 }
     65 
     66 string ActivationModeString(ActivationMode mode) {
     67   switch (mode) {
     68     case ActivationMode::kSigmoid:
     69       return "sigmoid";
     70     case ActivationMode::kRelu:
     71       return "relu";
     72     case ActivationMode::kRelu6:
     73       return "relu6";
     74     case ActivationMode::kReluX:
     75       return "reluX";
     76     case ActivationMode::kTanh:
     77       return "tanh";
     78     case ActivationMode::kBandPass:
     79       return "bandpass";
     80     default:
     81       LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode);
     82   }
     83   return "unknown activation_mode";
     84 }
     85 
     86 string ElementwiseOperationString(ElementwiseOperation op) {
     87   switch (op) {
     88     case ElementwiseOperation::kAdd:
     89       return "add";
     90     case ElementwiseOperation::kMultiply:
     91       return "multiply";
     92     default:
     93       LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op);
     94   }
     95   return "unknown element wise op";
     96 }
     97 
     98 string DataLayoutString(DataLayout layout) {
     99   switch (layout) {
    100     case DataLayout::kYXDepthBatch:
    101       return "YXDepthBatch";
    102     case DataLayout::kYXBatchDepth:
    103       return "YXBatchDepth";
    104     case DataLayout::kBatchYXDepth:
    105       return "BatchYXDepth";
    106     case DataLayout::kBatchDepthYX:
    107       return "BatchDepthYX";
    108     case DataLayout::kBatchDepthYX4:
    109       return "BatchDepthYX4";
    110     default:
    111       LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout);
    112   }
    113   return "unknown data layout";
    114 }
    115 
    116 string FilterLayoutString(FilterLayout layout) {
    117   switch (layout) {
    118     case FilterLayout::kOutputInputYX:
    119       return "OutputInputYX";
    120     case FilterLayout::kOutputYXInput:
    121       return "OutputYXInput";
    122     case FilterLayout::kOutputInputYX4:
    123       return "OutputInputYX4";
    124     case FilterLayout::kInputYXOutput:
    125       return "InputYXOutput";
    126     case FilterLayout::kYXInputOutput:
    127       return "YXInputOutput";
    128     default:
    129       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout);
    130   }
    131   return "unknown filter layout";
    132 }
    133 
    134 string PadAlignmentString(PadAlignment alignment) {
    135   switch (alignment) {
    136     case PadAlignment::kDefault:
    137       return "default";
    138     case PadAlignment::kCudnnPadding:
    139       return "cuDNN padding";
    140     case PadAlignment::kTensorFlowPadding:
    141       return "TensorFlow padding";
    142   }
    143   return "unknown pad alignment";
    144 }
    145 
    146 std::ostream& operator<<(std::ostream& str, dnn::PadAlignment alignment) {
    147   return str << PadAlignmentString(alignment);
    148 }
    149 
    150 string ShortPoolingModeString(PoolingMode mode) {
    151   switch (mode) {
    152     case PoolingMode::kMaximum:
    153       return "Max";
    154     case PoolingMode::kAverage:
    155       return "Avg";
    156     default:
    157       LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode);
    158   }
    159   return "unknown filter layout";
    160 }
    161 
    162 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout,
    163                                         const int data_dims) {
    164   int depth_idx, batch_idx, spatial_idx;
    165   switch (layout) {
    166     case DataLayout::kYXBatchDepth:
    167       depth_idx = data_dims - 1;
    168       batch_idx = data_dims - 2;
    169       spatial_idx = 0;
    170       break;
    171 
    172     case DataLayout::kYXDepthBatch:
    173       depth_idx = data_dims - 2;
    174       batch_idx = data_dims - 1;
    175       spatial_idx = 0;
    176       break;
    177 
    178     case DataLayout::kBatchYXDepth:
    179       depth_idx = data_dims - 1;
    180       batch_idx = 0;
    181       spatial_idx = 1;
    182       break;
    183 
    184     case DataLayout::kBatchDepthYX:
    185     case DataLayout::kBatchDepthYX4:
    186       depth_idx = 1;
    187       batch_idx = 0;
    188       spatial_idx = 2;
    189       break;
    190 
    191     default:
    192       LOG(FATAL) << "Unknown layout " << layout;
    193   }
    194 
    195   return std::make_tuple(depth_idx, batch_idx, spatial_idx);
    196 }
    197 
    198 std::vector<int64> ReorderDims(const std::vector<int64>& input,
    199                                const DataLayout& from, const DataLayout& to) {
    200   if (from == to) return input;
    201 
    202   int d_idx_from, b_idx_from, spatial_idx_from;
    203   int d_idx_to, b_idx_to, spatial_idx_to;
    204 
    205   std::tie(d_idx_from, b_idx_from, spatial_idx_from) =
    206       GetDimIndices(from, input.size());
    207   std::tie(d_idx_to, b_idx_to, spatial_idx_to) =
    208       GetDimIndices(to, input.size());
    209 
    210   std::vector<int64> reordered(input.size());
    211   reordered[b_idx_to] = input[b_idx_from];
    212   reordered[d_idx_to] = input[d_idx_from];
    213 
    214   for (size_t i = 0; i < input.size() - 2;
    215        i++, spatial_idx_from++, spatial_idx_to++) {
    216     reordered[spatial_idx_to] = input[spatial_idx_from];
    217   }
    218 
    219   return reordered;
    220 }
    221 
    222 // -- AlgorithmConfig
    223 
    224 string AlgorithmConfig::ToString() const {
    225   AlgorithmDesc::Index algo_id = -1;
    226   if (algorithm().has_value()) {
    227     algo_id = algorithm()->algo_id();
    228   }
    229   AlgorithmDesc::Index algo_id_no_scratch = -1;
    230   if (algorithm_no_scratch().has_value()) {
    231     algo_id_no_scratch = algorithm_no_scratch()->algo_id();
    232   }
    233   return absl::StrCat(algo_id, ", ", algo_id_no_scratch);
    234 }
    235 
    236 // -- BatchDescriptor
    237 
    238 BatchDescriptor::BatchDescriptor(int ndims)
    239     : value_max_(0.0),
    240       value_min_(0.0),
    241       quantized_activation_mode_(QuantizedActivationMode::k8Bit) {
    242   tensor_.mutable_dimensions()->Resize(ndims + 2, 0);
    243   set_layout(DataLayout::kYXDepthBatch);
    244 }
    245 
    246 BatchDescriptor::BatchDescriptor() : BatchDescriptor(/*ndims=*/2) {}
    247 
    248 std::vector<int64> BatchDescriptor::full_dims(const DataLayout& layout) const {
    249   std::vector<int64> bdyx_dims(ndims() + 2);
    250   bdyx_dims[0] = count();
    251   bdyx_dims[1] = feature_map_count();
    252   std::copy(spatial_size().begin(), spatial_size().end(),
    253             bdyx_dims.begin() + 2);
    254   return ReorderDims(bdyx_dims, DataLayout::kBatchDepthYX, layout);
    255 }
    256 
    257 std::vector<int64> BatchDescriptor::full_strides(
    258     const DataLayout& layout) const {
    259   if (this->layout() == DataLayout::kBatchDepthYX4) {
    260     LOG(FATAL)
    261         << "Cannot compute full strides for batch descriptor " << ToString()
    262         << ", because its layout is kBatchDepthYX4. In fact, "
    263            "cudnnSetTensorNdDescriptor doesn't work for kBatchDepthYX4 at all. "
    264            "Use cudnnSetTensor4DDescriptor to set cudnnTensorDescriptor_t "
    265            "instead.";
    266   }
    267   std::vector<int64> phys_dims = full_dims(this->layout());
    268   std::vector<int64> phys_strides(phys_dims.size());
    269   phys_strides[ndims() + 1] = 1;
    270   for (int i = ndims(); i >= 0; i--) {
    271     phys_strides[i] = phys_strides[i + 1] * phys_dims[i + 1];
    272   }
    273   return ReorderDims(phys_strides, this->layout(), layout);
    274 }
    275 
    276 void BatchDescriptor::CloneFrom(const BatchDescriptor& other) {
    277   tensor_ = other.tensor_;
    278   value_max_ = other.value_max_;
    279   value_min_ = other.value_min_;
    280   quantized_activation_mode_ = other.quantized_activation_mode_;
    281 }
    282 
    283 string BatchDescriptor::ToString() const {
    284   string spatial;
    285   for (int i = 0; i < ndims(); i++) {
    286     port::Appendf(&spatial, "%lld ", spatial_size()[i]);
    287   }
    288   return port::Printf(
    289       "{count: %lld feature_map_count: %lld spatial: %s "
    290       "value_min: %f value_max: %f layout: %s}",
    291       count(), feature_map_count(), spatial.c_str(), value_min_, value_max_,
    292       DataLayoutString(layout()).c_str());
    293 }
    294 
    295 string BatchDescriptor::ToShortString() const {
    296   // All the constituent strings are less than 15 characters, so the
    297   // small string optimization ensures that there will be at most one
    298   // heap memory allocation.
    299   string depth = absl::StrCat("d", feature_map_count());
    300   string batch = absl::StrCat("b", count());
    301 
    302   string spatial = "s";
    303   for (int i = 0; i < ndims(); i++) {
    304     port::Appendf(&spatial, "%lld ", spatial_size()[i]);
    305   }
    306 
    307   string suffix;
    308   if (value_min() != value_max()) {
    309     absl::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]");
    310   }
    311   if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) {
    312     suffix += "_16bit";
    313   }
    314 
    315   switch (layout()) {
    316     case DataLayout::kYXDepthBatch:
    317       return absl::StrCat(spatial, depth, batch, suffix);
    318     case DataLayout::kYXBatchDepth:
    319       return absl::StrCat(spatial, batch, depth, suffix);
    320     case DataLayout::kBatchYXDepth:
    321       return absl::StrCat(batch, spatial, depth, suffix);
    322     case DataLayout::kBatchDepthYX:
    323       return absl::StrCat(batch, depth, spatial, suffix);
    324     case DataLayout::kBatchDepthYX4:
    325       return absl::StrCat(batch, depth, spatial, suffix, "(VECT_C)");
    326     default:
    327       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout());
    328       return "";  // Avoid return warning (unreachable)
    329   }
    330 }
    331 
    332 int64 BatchDescriptor::NodesPerFeatureMap() const {
    333   int64 ret = 1;
    334   for (int i = 0; i < ndims(); i++) {
    335     ret *= spatial_size()[i];
    336   }
    337   return ret;
    338 }
    339 
    340 int64 BatchDescriptor::NodesAcrossFeatureMaps() const {
    341   return NodesPerFeatureMap() * feature_map_count();
    342 }
    343 
    344 int64 BatchDescriptor::ElementCount() const {
    345   return count() * feature_map_count() * NodesPerFeatureMap();
    346 }
    347 
    348 int64 BatchDescriptor::FullyConnectedWeightCount(
    349     const BatchDescriptor& input, const BatchDescriptor& output) {
    350   return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps();
    351 }
    352 
    353 int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) {
    354   return output.NodesAcrossFeatureMaps();
    355 }
    356 
    357 BatchDescriptor BatchDescriptor::DepthConcatenateOutputDescriptor(
    358     port::ArraySlice<dnn::BatchDescriptor> inputs) {
    359   if (inputs.empty()) {
    360     return BatchDescriptor();
    361   }
    362   int feature_map_count = 0;
    363   for (const auto& dimensions : inputs) {
    364     feature_map_count += dimensions.feature_map_count();
    365   }
    366   BatchDescriptor output = inputs[0];
    367   output.set_feature_map_count(feature_map_count);
    368   return output;
    369 }
    370 
    371 TensorDescriptorProto BatchDescriptor::ToProto(DataType data_type) const {
    372   CHECK_EQ(0.0, value_max_);
    373   CHECK_EQ(0.0, value_min_);
    374   CHECK(quantized_activation_mode_ == QuantizedActivationMode::k8Bit);
    375 
    376   TensorDescriptorProto ret = tensor_;
    377   ret.set_data_type(data_type);
    378   return ret;
    379 }
    380 
    381 // -- FilterDescriptor
    382 
    383 FilterDescriptor::FilterDescriptor(int ndims) {
    384   tensor_.mutable_dimensions()->Resize(ndims + 2, 0);
    385   set_layout(FilterLayout::kOutputInputYX);
    386 }
    387 
    388 FilterDescriptor::FilterDescriptor() : FilterDescriptor(/*ndims=*/2) {}
    389 
    390 FilterDescriptor::~FilterDescriptor() {}
    391 
    392 void FilterDescriptor::CloneFrom(const FilterDescriptor& other) {
    393   tensor_ = other.tensor_;
    394 }
    395 
    396 string FilterDescriptor::ToString() const {
    397   string desc = port::Printf(
    398       "{output_feature_map_count: %lld input_feature_map_count: %lld "
    399       "layout: %s shape: ",
    400       output_feature_map_count(), input_feature_map_count(),
    401       FilterLayoutString(layout()).c_str());
    402   for (int i = 0; i < ndims(); i++) {
    403     port::Appendf(&desc, "%lld ", input_filter_dims()[i]);
    404   }
    405   absl::StrAppend(&desc, "}");
    406 
    407   return desc;
    408 }
    409 
    410 string FilterDescriptor::ToShortString() const {
    411   // All the constituent strings are less than 15 characters, so the
    412   // small string optimization ensures that there will be at most one
    413   // heap memory allocation.
    414   string od = absl::StrCat("od", output_feature_map_count());
    415   string id = absl::StrCat("id", input_feature_map_count());
    416 
    417   string spatial = "s";
    418   for (int i = 0; i < ndims(); i++) {
    419     port::Appendf(&spatial, "%lld ", input_filter_dims()[i]);
    420   }
    421 
    422   switch (layout()) {
    423     case FilterLayout::kOutputInputYX:
    424       return absl::StrCat(od, id, spatial);
    425     case FilterLayout::kOutputYXInput:
    426       return absl::StrCat(od, spatial, id);
    427     case FilterLayout::kOutputInputYX4:
    428       return absl::StrCat(od, id, spatial, "(VECT_C)");
    429     case FilterLayout::kInputYXOutput:
    430       return absl::StrCat(id, spatial, od);
    431     case FilterLayout::kYXInputOutput:
    432       return absl::StrCat(spatial, id, od);
    433     default:
    434       LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout());
    435       return "";  // Avoid return warning (unreachable)
    436   }
    437 }
    438 
    439 int64 FilterDescriptor::ComputeWeightCount() const {
    440   int64 ret = output_feature_map_count() * input_feature_map_count();
    441   for (int i = 0; i < ndims(); i++) {
    442     ret *= input_filter_dims()[i];
    443   }
    444   return ret;
    445 }
    446 
    447 TensorDescriptorProto FilterDescriptor::ToProto(DataType data_type) const {
    448   TensorDescriptorProto ret = tensor_;
    449   ret.set_data_type(data_type);
    450   return ret;
    451 }
    452 
    453 // -- ConvolutionDescriptor
    454 
    455 ConvolutionDescriptor::ConvolutionDescriptor(int ndims) {
    456   proto_.mutable_paddings()->Resize(ndims, 0);
    457   proto_.mutable_strides()->Resize(ndims, 1);
    458   proto_.mutable_dilations()->Resize(ndims, 1);
    459   proto_.set_group_count(1);
    460   proto_.set_convolution_mode(ConvolutionMode::CROSS_CORRELATION);
    461 }
    462 
    463 ConvolutionDescriptor::ConvolutionDescriptor()
    464     : ConvolutionDescriptor(/*ndims=*/2) {}
    465 
    466 ConvolutionDescriptor::~ConvolutionDescriptor() {}
    467 
    468 string ConvolutionDescriptor::ToString() const {
    469   string padding;
    470   string strides;
    471   string dilations;
    472   for (int i = 0; i < ndims(); i++) {
    473     port::Appendf(&padding, "%lld ", this->padding()[i]);
    474     port::Appendf(&strides, "%lld ", this->strides()[i]);
    475     port::Appendf(&dilations, "%lld ", this->dilations()[i]);
    476   }
    477 
    478   return port::Printf(
    479       "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: "
    480       "%s}",
    481       padding.c_str(), PadAlignmentString(pad_alignment()).c_str(),
    482       strides.c_str(), dilations.c_str());
    483 }
    484 
    485 string ConvolutionDescriptor::ToShortString() const {
    486   string desc;
    487   for (int i = 0; i < ndims(); i++) {
    488     if (i > 0) port::Appendf(&desc, "_");
    489     port::Appendf(&desc, "p%d:%lld", i, padding()[i]);
    490   }
    491   for (int i = 0; i < ndims(); i++) {
    492     port::Appendf(&desc, "_s%d:%lld", i, strides()[i]);
    493   }
    494   for (int i = 0; i < ndims(); i++) {
    495     port::Appendf(&desc, "_d%d:%lld", i, dilations()[i]);
    496   }
    497   return desc;
    498 }
    499 
    500 // -- PoolingDescriptor
    501 
    502 PoolingDescriptor::PoolingDescriptor(int ndims)
    503     : mode_(dnn::PoolingMode::kMaximum),
    504       ndims_(ndims),
    505       propagate_nans_(false),
    506       window_(ndims, 0),
    507       padding_(ndims, 0),
    508       strides_(ndims, 1) {}
    509 
    510 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {}
    511 
    512 void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) {
    513   mode_ = other.mode_;
    514   ndims_ = other.ndims_;
    515   window_ = other.window_;
    516   padding_ = other.padding_;
    517   strides_ = other.strides_;
    518   propagate_nans_ = other.propagate_nans_;
    519 }
    520 
    521 string PoolingDescriptor::ToString() const {
    522   const char* mode_string =
    523       mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage";
    524 
    525   string window, strides, padding;
    526   for (int i = 0; i < ndims_; i++) {
    527     port::Appendf(&window, "%lld ", window_[i]);
    528     port::Appendf(&strides, "%lld ", strides_[i]);
    529     port::Appendf(&padding, "%lld", padding_[i]);
    530   }
    531 
    532   const char* propagate_string = propagate_nans_ ? "Yes" : "No";
    533 
    534   return port::Printf(
    535       "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}",
    536       mode_string, window.c_str(), strides.c_str(), padding.c_str(),
    537       propagate_string);
    538 }
    539 
    540 string PoolingDescriptor::ToShortString() const {
    541   string window, strides, padding;
    542   for (int i = 0; i < ndims_; i++) {
    543     port::Appendf(&window, "_w%d:%lld", i, window_[i]);
    544     port::Appendf(&strides, "_s%d:%lld", i, strides_[i]);
    545     port::Appendf(&padding, "_p%d:%lld", i, padding_[i]);
    546   }
    547   return absl::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg",
    548                       window, strides, padding,
    549                       propagate_nans_ ? "propagate_nans" : "ignore_nans");
    550 }
    551 
    552 // -- NormalizeDescriptor
    553 
    554 NormalizeDescriptor::NormalizeDescriptor()
    555     : bias_(0.0),
    556       range_(0),
    557       alpha_(0.0),
    558       beta_(0.0),
    559       wrap_around_(false),
    560       segment_size_(0) {}
    561 
    562 void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) {
    563   bias_ = other.bias_;
    564   range_ = other.range_;
    565   alpha_ = other.alpha_;
    566   beta_ = other.beta_;
    567   wrap_around_ = other.wrap_around_;
    568   segment_size_ = other.segment_size_;
    569 }
    570 
    571 string NormalizeDescriptor::ToString() const {
    572   return port::Printf(
    573       "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d "
    574       "segment_size: %d}",
    575       bias_, range_, alpha_, beta_, wrap_around_, segment_size_);
    576 }
    577 
    578 string NormalizeDescriptor::ToShortString() const {
    579   return absl::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_,
    580                       "_beta:", beta_, "_wrap:", wrap_around_,
    581                       "_size:", segment_size_);
    582 }
    583 
    584 bool DnnSupport::IsStatusOk(const port::Status& status, bool report_error) {
    585   if (status.ok()) {
    586     return true;
    587   }
    588   if (report_error) {
    589     LOG(ERROR) << status.error_message();
    590   }
    591   return false;
    592 }
    593 
    594 }  // namespace dnn
    595 }  // namespace stream_executor
    596