1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/stream_executor/dnn.h" 17 18 #include "absl/strings/str_cat.h" 19 #include "tensorflow/core/lib/hash/hash.h" 20 #include "tensorflow/stream_executor/lib/stringprintf.h" 21 22 namespace stream_executor { 23 namespace dnn { 24 25 uint64 AlgorithmDesc::hash() const { 26 return ::tensorflow::Hash64Combine(algo_id(), tensor_ops_enabled()); 27 } 28 29 bool DnnSupport::GetConvolveAlgorithms( 30 bool with_winograd_nonfused, int cc_major, int cc_minor, 31 std::vector<AlgorithmDesc>* out_algorithms) { 32 return false; 33 } 34 35 bool DnnSupport::GetRnnAlgorithms(std::vector<AlgorithmDesc>* out_algorithms) { 36 return false; 37 } 38 39 bool DnnSupport::GetConvolveBackwardDataAlgorithms( 40 bool with_winograd_nonfused, int cc_major, int cc_minor, 41 std::vector<AlgorithmDesc>* out_algorithms) { 42 return false; 43 } 44 45 bool DnnSupport::GetConvolveBackwardFilterAlgorithms( 46 bool with_winograd_nonfused, int cc_major, int cc_minor, 47 std::vector<AlgorithmDesc>* out_algorithms) { 48 return false; 49 } 50 51 string QuantizedActivationModeString(QuantizedActivationMode mode) { 52 switch (mode) { 53 case dnn::QuantizedActivationMode::k8Bit: 54 return "uint8"; 55 case dnn::QuantizedActivationMode::k16Bit: 56 return "uint16"; 57 case dnn::QuantizedActivationMode::k32Bit: 58 return "int32"; 59 default: 60 LOG(FATAL) << "Unknown quantized_activation_mode " 61 << static_cast<int32>(mode); 62 } 63 return "unknown quantized_activation_mode"; 64 } 65 66 string ActivationModeString(ActivationMode mode) { 67 switch (mode) { 68 case ActivationMode::kSigmoid: 69 return "sigmoid"; 70 case ActivationMode::kRelu: 71 return "relu"; 72 case ActivationMode::kRelu6: 73 return "relu6"; 74 case ActivationMode::kReluX: 75 return "reluX"; 76 case ActivationMode::kTanh: 77 return "tanh"; 78 case ActivationMode::kBandPass: 79 return "bandpass"; 80 default: 81 LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode); 82 } 83 return "unknown activation_mode"; 84 } 85 86 string ElementwiseOperationString(ElementwiseOperation op) { 87 switch (op) { 88 case ElementwiseOperation::kAdd: 89 return "add"; 90 case ElementwiseOperation::kMultiply: 91 return "multiply"; 92 default: 93 LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op); 94 } 95 return "unknown element wise op"; 96 } 97 98 string DataLayoutString(DataLayout layout) { 99 switch (layout) { 100 case DataLayout::kYXDepthBatch: 101 return "YXDepthBatch"; 102 case DataLayout::kYXBatchDepth: 103 return "YXBatchDepth"; 104 case DataLayout::kBatchYXDepth: 105 return "BatchYXDepth"; 106 case DataLayout::kBatchDepthYX: 107 return "BatchDepthYX"; 108 case DataLayout::kBatchDepthYX4: 109 return "BatchDepthYX4"; 110 default: 111 LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout); 112 } 113 return "unknown data layout"; 114 } 115 116 string FilterLayoutString(FilterLayout layout) { 117 switch (layout) { 118 case FilterLayout::kOutputInputYX: 119 return "OutputInputYX"; 120 case FilterLayout::kOutputYXInput: 121 return "OutputYXInput"; 122 case FilterLayout::kOutputInputYX4: 123 return "OutputInputYX4"; 124 case FilterLayout::kInputYXOutput: 125 return "InputYXOutput"; 126 case FilterLayout::kYXInputOutput: 127 return "YXInputOutput"; 128 default: 129 LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout); 130 } 131 return "unknown filter layout"; 132 } 133 134 string PadAlignmentString(PadAlignment alignment) { 135 switch (alignment) { 136 case PadAlignment::kDefault: 137 return "default"; 138 case PadAlignment::kCudnnPadding: 139 return "cuDNN padding"; 140 case PadAlignment::kTensorFlowPadding: 141 return "TensorFlow padding"; 142 } 143 return "unknown pad alignment"; 144 } 145 146 std::ostream& operator<<(std::ostream& str, dnn::PadAlignment alignment) { 147 return str << PadAlignmentString(alignment); 148 } 149 150 string ShortPoolingModeString(PoolingMode mode) { 151 switch (mode) { 152 case PoolingMode::kMaximum: 153 return "Max"; 154 case PoolingMode::kAverage: 155 return "Avg"; 156 default: 157 LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode); 158 } 159 return "unknown filter layout"; 160 } 161 162 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout, 163 const int data_dims) { 164 int depth_idx, batch_idx, spatial_idx; 165 switch (layout) { 166 case DataLayout::kYXBatchDepth: 167 depth_idx = data_dims - 1; 168 batch_idx = data_dims - 2; 169 spatial_idx = 0; 170 break; 171 172 case DataLayout::kYXDepthBatch: 173 depth_idx = data_dims - 2; 174 batch_idx = data_dims - 1; 175 spatial_idx = 0; 176 break; 177 178 case DataLayout::kBatchYXDepth: 179 depth_idx = data_dims - 1; 180 batch_idx = 0; 181 spatial_idx = 1; 182 break; 183 184 case DataLayout::kBatchDepthYX: 185 case DataLayout::kBatchDepthYX4: 186 depth_idx = 1; 187 batch_idx = 0; 188 spatial_idx = 2; 189 break; 190 191 default: 192 LOG(FATAL) << "Unknown layout " << layout; 193 } 194 195 return std::make_tuple(depth_idx, batch_idx, spatial_idx); 196 } 197 198 std::vector<int64> ReorderDims(const std::vector<int64>& input, 199 const DataLayout& from, const DataLayout& to) { 200 if (from == to) return input; 201 202 int d_idx_from, b_idx_from, spatial_idx_from; 203 int d_idx_to, b_idx_to, spatial_idx_to; 204 205 std::tie(d_idx_from, b_idx_from, spatial_idx_from) = 206 GetDimIndices(from, input.size()); 207 std::tie(d_idx_to, b_idx_to, spatial_idx_to) = 208 GetDimIndices(to, input.size()); 209 210 std::vector<int64> reordered(input.size()); 211 reordered[b_idx_to] = input[b_idx_from]; 212 reordered[d_idx_to] = input[d_idx_from]; 213 214 for (size_t i = 0; i < input.size() - 2; 215 i++, spatial_idx_from++, spatial_idx_to++) { 216 reordered[spatial_idx_to] = input[spatial_idx_from]; 217 } 218 219 return reordered; 220 } 221 222 // -- AlgorithmConfig 223 224 string AlgorithmConfig::ToString() const { 225 AlgorithmDesc::Index algo_id = -1; 226 if (algorithm().has_value()) { 227 algo_id = algorithm()->algo_id(); 228 } 229 AlgorithmDesc::Index algo_id_no_scratch = -1; 230 if (algorithm_no_scratch().has_value()) { 231 algo_id_no_scratch = algorithm_no_scratch()->algo_id(); 232 } 233 return absl::StrCat(algo_id, ", ", algo_id_no_scratch); 234 } 235 236 // -- BatchDescriptor 237 238 BatchDescriptor::BatchDescriptor(int ndims) 239 : value_max_(0.0), 240 value_min_(0.0), 241 quantized_activation_mode_(QuantizedActivationMode::k8Bit) { 242 tensor_.mutable_dimensions()->Resize(ndims + 2, 0); 243 set_layout(DataLayout::kYXDepthBatch); 244 } 245 246 BatchDescriptor::BatchDescriptor() : BatchDescriptor(/*ndims=*/2) {} 247 248 std::vector<int64> BatchDescriptor::full_dims(const DataLayout& layout) const { 249 std::vector<int64> bdyx_dims(ndims() + 2); 250 bdyx_dims[0] = count(); 251 bdyx_dims[1] = feature_map_count(); 252 std::copy(spatial_size().begin(), spatial_size().end(), 253 bdyx_dims.begin() + 2); 254 return ReorderDims(bdyx_dims, DataLayout::kBatchDepthYX, layout); 255 } 256 257 std::vector<int64> BatchDescriptor::full_strides( 258 const DataLayout& layout) const { 259 if (this->layout() == DataLayout::kBatchDepthYX4) { 260 LOG(FATAL) 261 << "Cannot compute full strides for batch descriptor " << ToString() 262 << ", because its layout is kBatchDepthYX4. In fact, " 263 "cudnnSetTensorNdDescriptor doesn't work for kBatchDepthYX4 at all. " 264 "Use cudnnSetTensor4DDescriptor to set cudnnTensorDescriptor_t " 265 "instead."; 266 } 267 std::vector<int64> phys_dims = full_dims(this->layout()); 268 std::vector<int64> phys_strides(phys_dims.size()); 269 phys_strides[ndims() + 1] = 1; 270 for (int i = ndims(); i >= 0; i--) { 271 phys_strides[i] = phys_strides[i + 1] * phys_dims[i + 1]; 272 } 273 return ReorderDims(phys_strides, this->layout(), layout); 274 } 275 276 void BatchDescriptor::CloneFrom(const BatchDescriptor& other) { 277 tensor_ = other.tensor_; 278 value_max_ = other.value_max_; 279 value_min_ = other.value_min_; 280 quantized_activation_mode_ = other.quantized_activation_mode_; 281 } 282 283 string BatchDescriptor::ToString() const { 284 string spatial; 285 for (int i = 0; i < ndims(); i++) { 286 port::Appendf(&spatial, "%lld ", spatial_size()[i]); 287 } 288 return port::Printf( 289 "{count: %lld feature_map_count: %lld spatial: %s " 290 "value_min: %f value_max: %f layout: %s}", 291 count(), feature_map_count(), spatial.c_str(), value_min_, value_max_, 292 DataLayoutString(layout()).c_str()); 293 } 294 295 string BatchDescriptor::ToShortString() const { 296 // All the constituent strings are less than 15 characters, so the 297 // small string optimization ensures that there will be at most one 298 // heap memory allocation. 299 string depth = absl::StrCat("d", feature_map_count()); 300 string batch = absl::StrCat("b", count()); 301 302 string spatial = "s"; 303 for (int i = 0; i < ndims(); i++) { 304 port::Appendf(&spatial, "%lld ", spatial_size()[i]); 305 } 306 307 string suffix; 308 if (value_min() != value_max()) { 309 absl::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]"); 310 } 311 if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) { 312 suffix += "_16bit"; 313 } 314 315 switch (layout()) { 316 case DataLayout::kYXDepthBatch: 317 return absl::StrCat(spatial, depth, batch, suffix); 318 case DataLayout::kYXBatchDepth: 319 return absl::StrCat(spatial, batch, depth, suffix); 320 case DataLayout::kBatchYXDepth: 321 return absl::StrCat(batch, spatial, depth, suffix); 322 case DataLayout::kBatchDepthYX: 323 return absl::StrCat(batch, depth, spatial, suffix); 324 case DataLayout::kBatchDepthYX4: 325 return absl::StrCat(batch, depth, spatial, suffix, "(VECT_C)"); 326 default: 327 LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout()); 328 return ""; // Avoid return warning (unreachable) 329 } 330 } 331 332 int64 BatchDescriptor::NodesPerFeatureMap() const { 333 int64 ret = 1; 334 for (int i = 0; i < ndims(); i++) { 335 ret *= spatial_size()[i]; 336 } 337 return ret; 338 } 339 340 int64 BatchDescriptor::NodesAcrossFeatureMaps() const { 341 return NodesPerFeatureMap() * feature_map_count(); 342 } 343 344 int64 BatchDescriptor::ElementCount() const { 345 return count() * feature_map_count() * NodesPerFeatureMap(); 346 } 347 348 int64 BatchDescriptor::FullyConnectedWeightCount( 349 const BatchDescriptor& input, const BatchDescriptor& output) { 350 return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps(); 351 } 352 353 int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) { 354 return output.NodesAcrossFeatureMaps(); 355 } 356 357 BatchDescriptor BatchDescriptor::DepthConcatenateOutputDescriptor( 358 port::ArraySlice<dnn::BatchDescriptor> inputs) { 359 if (inputs.empty()) { 360 return BatchDescriptor(); 361 } 362 int feature_map_count = 0; 363 for (const auto& dimensions : inputs) { 364 feature_map_count += dimensions.feature_map_count(); 365 } 366 BatchDescriptor output = inputs[0]; 367 output.set_feature_map_count(feature_map_count); 368 return output; 369 } 370 371 TensorDescriptorProto BatchDescriptor::ToProto(DataType data_type) const { 372 CHECK_EQ(0.0, value_max_); 373 CHECK_EQ(0.0, value_min_); 374 CHECK(quantized_activation_mode_ == QuantizedActivationMode::k8Bit); 375 376 TensorDescriptorProto ret = tensor_; 377 ret.set_data_type(data_type); 378 return ret; 379 } 380 381 // -- FilterDescriptor 382 383 FilterDescriptor::FilterDescriptor(int ndims) { 384 tensor_.mutable_dimensions()->Resize(ndims + 2, 0); 385 set_layout(FilterLayout::kOutputInputYX); 386 } 387 388 FilterDescriptor::FilterDescriptor() : FilterDescriptor(/*ndims=*/2) {} 389 390 FilterDescriptor::~FilterDescriptor() {} 391 392 void FilterDescriptor::CloneFrom(const FilterDescriptor& other) { 393 tensor_ = other.tensor_; 394 } 395 396 string FilterDescriptor::ToString() const { 397 string desc = port::Printf( 398 "{output_feature_map_count: %lld input_feature_map_count: %lld " 399 "layout: %s shape: ", 400 output_feature_map_count(), input_feature_map_count(), 401 FilterLayoutString(layout()).c_str()); 402 for (int i = 0; i < ndims(); i++) { 403 port::Appendf(&desc, "%lld ", input_filter_dims()[i]); 404 } 405 absl::StrAppend(&desc, "}"); 406 407 return desc; 408 } 409 410 string FilterDescriptor::ToShortString() const { 411 // All the constituent strings are less than 15 characters, so the 412 // small string optimization ensures that there will be at most one 413 // heap memory allocation. 414 string od = absl::StrCat("od", output_feature_map_count()); 415 string id = absl::StrCat("id", input_feature_map_count()); 416 417 string spatial = "s"; 418 for (int i = 0; i < ndims(); i++) { 419 port::Appendf(&spatial, "%lld ", input_filter_dims()[i]); 420 } 421 422 switch (layout()) { 423 case FilterLayout::kOutputInputYX: 424 return absl::StrCat(od, id, spatial); 425 case FilterLayout::kOutputYXInput: 426 return absl::StrCat(od, spatial, id); 427 case FilterLayout::kOutputInputYX4: 428 return absl::StrCat(od, id, spatial, "(VECT_C)"); 429 case FilterLayout::kInputYXOutput: 430 return absl::StrCat(id, spatial, od); 431 case FilterLayout::kYXInputOutput: 432 return absl::StrCat(spatial, id, od); 433 default: 434 LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout()); 435 return ""; // Avoid return warning (unreachable) 436 } 437 } 438 439 int64 FilterDescriptor::ComputeWeightCount() const { 440 int64 ret = output_feature_map_count() * input_feature_map_count(); 441 for (int i = 0; i < ndims(); i++) { 442 ret *= input_filter_dims()[i]; 443 } 444 return ret; 445 } 446 447 TensorDescriptorProto FilterDescriptor::ToProto(DataType data_type) const { 448 TensorDescriptorProto ret = tensor_; 449 ret.set_data_type(data_type); 450 return ret; 451 } 452 453 // -- ConvolutionDescriptor 454 455 ConvolutionDescriptor::ConvolutionDescriptor(int ndims) { 456 proto_.mutable_paddings()->Resize(ndims, 0); 457 proto_.mutable_strides()->Resize(ndims, 1); 458 proto_.mutable_dilations()->Resize(ndims, 1); 459 proto_.set_group_count(1); 460 proto_.set_convolution_mode(ConvolutionMode::CROSS_CORRELATION); 461 } 462 463 ConvolutionDescriptor::ConvolutionDescriptor() 464 : ConvolutionDescriptor(/*ndims=*/2) {} 465 466 ConvolutionDescriptor::~ConvolutionDescriptor() {} 467 468 string ConvolutionDescriptor::ToString() const { 469 string padding; 470 string strides; 471 string dilations; 472 for (int i = 0; i < ndims(); i++) { 473 port::Appendf(&padding, "%lld ", this->padding()[i]); 474 port::Appendf(&strides, "%lld ", this->strides()[i]); 475 port::Appendf(&dilations, "%lld ", this->dilations()[i]); 476 } 477 478 return port::Printf( 479 "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: " 480 "%s}", 481 padding.c_str(), PadAlignmentString(pad_alignment()).c_str(), 482 strides.c_str(), dilations.c_str()); 483 } 484 485 string ConvolutionDescriptor::ToShortString() const { 486 string desc; 487 for (int i = 0; i < ndims(); i++) { 488 if (i > 0) port::Appendf(&desc, "_"); 489 port::Appendf(&desc, "p%d:%lld", i, padding()[i]); 490 } 491 for (int i = 0; i < ndims(); i++) { 492 port::Appendf(&desc, "_s%d:%lld", i, strides()[i]); 493 } 494 for (int i = 0; i < ndims(); i++) { 495 port::Appendf(&desc, "_d%d:%lld", i, dilations()[i]); 496 } 497 return desc; 498 } 499 500 // -- PoolingDescriptor 501 502 PoolingDescriptor::PoolingDescriptor(int ndims) 503 : mode_(dnn::PoolingMode::kMaximum), 504 ndims_(ndims), 505 propagate_nans_(false), 506 window_(ndims, 0), 507 padding_(ndims, 0), 508 strides_(ndims, 1) {} 509 510 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} 511 512 void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { 513 mode_ = other.mode_; 514 ndims_ = other.ndims_; 515 window_ = other.window_; 516 padding_ = other.padding_; 517 strides_ = other.strides_; 518 propagate_nans_ = other.propagate_nans_; 519 } 520 521 string PoolingDescriptor::ToString() const { 522 const char* mode_string = 523 mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage"; 524 525 string window, strides, padding; 526 for (int i = 0; i < ndims_; i++) { 527 port::Appendf(&window, "%lld ", window_[i]); 528 port::Appendf(&strides, "%lld ", strides_[i]); 529 port::Appendf(&padding, "%lld", padding_[i]); 530 } 531 532 const char* propagate_string = propagate_nans_ ? "Yes" : "No"; 533 534 return port::Printf( 535 "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", 536 mode_string, window.c_str(), strides.c_str(), padding.c_str(), 537 propagate_string); 538 } 539 540 string PoolingDescriptor::ToShortString() const { 541 string window, strides, padding; 542 for (int i = 0; i < ndims_; i++) { 543 port::Appendf(&window, "_w%d:%lld", i, window_[i]); 544 port::Appendf(&strides, "_s%d:%lld", i, strides_[i]); 545 port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); 546 } 547 return absl::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", 548 window, strides, padding, 549 propagate_nans_ ? "propagate_nans" : "ignore_nans"); 550 } 551 552 // -- NormalizeDescriptor 553 554 NormalizeDescriptor::NormalizeDescriptor() 555 : bias_(0.0), 556 range_(0), 557 alpha_(0.0), 558 beta_(0.0), 559 wrap_around_(false), 560 segment_size_(0) {} 561 562 void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) { 563 bias_ = other.bias_; 564 range_ = other.range_; 565 alpha_ = other.alpha_; 566 beta_ = other.beta_; 567 wrap_around_ = other.wrap_around_; 568 segment_size_ = other.segment_size_; 569 } 570 571 string NormalizeDescriptor::ToString() const { 572 return port::Printf( 573 "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d " 574 "segment_size: %d}", 575 bias_, range_, alpha_, beta_, wrap_around_, segment_size_); 576 } 577 578 string NormalizeDescriptor::ToShortString() const { 579 return absl::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_, 580 "_beta:", beta_, "_wrap:", wrap_around_, 581 "_size:", segment_size_); 582 } 583 584 bool DnnSupport::IsStatusOk(const port::Status& status, bool report_error) { 585 if (status.ok()) { 586 return true; 587 } 588 if (report_error) { 589 LOG(ERROR) << status.error_message(); 590 } 591 return false; 592 } 593 594 } // namespace dnn 595 } // namespace stream_executor 596