1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/stream_executor/dnn.h" 17 18 #include "tensorflow/stream_executor/lib/strcat.h" 19 #include "tensorflow/stream_executor/lib/stringprintf.h" 20 21 namespace perftools { 22 namespace gputools { 23 namespace dnn { 24 25 bool DnnSupport::GetConvolveAlgorithms( 26 bool with_winograd_nonfused, int cc_major, int cc_minor, 27 std::vector<AlgorithmDesc>* out_algorithms) { 28 return false; 29 } 30 31 bool DnnSupport::GetConvolveBackwardDataAlgorithms( 32 bool with_winograd_nonfused, int cc_major, int cc_minor, 33 std::vector<AlgorithmDesc>* out_algorithms) { 34 return false; 35 } 36 37 bool DnnSupport::GetConvolveBackwardFilterAlgorithms( 38 bool with_winograd_nonfused, int cc_major, int cc_minor, 39 std::vector<AlgorithmDesc>* out_algorithms) { 40 return false; 41 } 42 43 string QuantizedActivationModeString(QuantizedActivationMode mode) { 44 switch (mode) { 45 case dnn::QuantizedActivationMode::k8Bit: 46 return "uint8"; 47 case dnn::QuantizedActivationMode::k16Bit: 48 return "uint16"; 49 case dnn::QuantizedActivationMode::k32Bit: 50 return "int32"; 51 default: 52 LOG(FATAL) << "Unknown quantized_activation_mode " 53 << static_cast<int32>(mode); 54 } 55 return "unknown quantized_activation_mode"; 56 } 57 58 string ActivationModeString(ActivationMode mode) { 59 switch (mode) { 60 case ActivationMode::kSigmoid: 61 return "sigmoid"; 62 case ActivationMode::kRelu: 63 return "relu"; 64 case ActivationMode::kRelu6: 65 return "relu6"; 66 case ActivationMode::kReluX: 67 return "reluX"; 68 case ActivationMode::kTanh: 69 return "tanh"; 70 case ActivationMode::kBandPass: 71 return "bandpass"; 72 default: 73 LOG(FATAL) << "Unknown activation_mode " << static_cast<int32>(mode); 74 } 75 return "unknown activation_mode"; 76 } 77 78 string ElementwiseOperationString(ElementwiseOperation op) { 79 switch (op) { 80 case ElementwiseOperation::kAdd: 81 return "add"; 82 case ElementwiseOperation::kMultiply: 83 return "multiply"; 84 default: 85 LOG(FATAL) << "Unknown elementwise op " << static_cast<int32>(op); 86 } 87 return "unknown element wise op"; 88 } 89 90 string DataLayoutString(DataLayout layout) { 91 switch (layout) { 92 case DataLayout::kYXDepthBatch: 93 return "YXDepthBatch"; 94 case DataLayout::kYXBatchDepth: 95 return "YXBatchDepth"; 96 case DataLayout::kBatchYXDepth: 97 return "BatchYXDepth"; 98 case DataLayout::kBatchDepthYX: 99 return "BatchDepthYX"; 100 case DataLayout::kBatchDepthYX4: 101 return "BatchDepthYX4"; 102 default: 103 LOG(FATAL) << "Unknown data layout " << static_cast<int32>(layout); 104 } 105 return "unknown data layout"; 106 } 107 108 string FilterLayoutString(FilterLayout layout) { 109 switch (layout) { 110 case FilterLayout::kOutputInputYX: 111 return "OutputInputYX"; 112 case FilterLayout::kOutputInputYX4: 113 return "OutputInputYX4"; 114 case FilterLayout::kInputYXOutput: 115 return "InputYXOutput"; 116 case FilterLayout::kYXInputOutput: 117 return "YXInputOutput"; 118 default: 119 LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(layout); 120 } 121 return "unknown filter layout"; 122 } 123 124 string PadAlignmentString(PadAlignment alignment) { 125 switch (alignment) { 126 case PadAlignment::kDefault: 127 return "default"; 128 case PadAlignment::kCudnnPadding: 129 return "cuDNN padding"; 130 case PadAlignment::kTensorFlowPadding: 131 return "TensorFlow padding"; 132 } 133 return "unknown pad alignment"; 134 } 135 136 string ShortPoolingModeString(PoolingMode mode) { 137 switch (mode) { 138 case PoolingMode::kMaximum: 139 return "Max"; 140 case PoolingMode::kAverage: 141 return "Avg"; 142 default: 143 LOG(FATAL) << "Unknown filter layout " << static_cast<int32>(mode); 144 } 145 return "unknown filter layout"; 146 } 147 148 std::tuple<int, int, int> GetDimIndices(const DataLayout& layout, 149 const int data_dims) { 150 int depth_idx, batch_idx, spatial_idx; 151 switch (layout) { 152 case DataLayout::kYXBatchDepth: 153 depth_idx = data_dims - 1; 154 batch_idx = data_dims - 2; 155 spatial_idx = 0; 156 break; 157 158 case DataLayout::kYXDepthBatch: 159 depth_idx = data_dims - 2; 160 batch_idx = data_dims - 1; 161 spatial_idx = 0; 162 break; 163 164 case DataLayout::kBatchYXDepth: 165 depth_idx = data_dims - 1; 166 batch_idx = 0; 167 spatial_idx = 1; 168 break; 169 170 case DataLayout::kBatchDepthYX: 171 case DataLayout::kBatchDepthYX4: 172 depth_idx = 1; 173 batch_idx = 0; 174 spatial_idx = 2; 175 break; 176 } 177 178 return std::make_tuple(depth_idx, batch_idx, spatial_idx); 179 } 180 181 std::vector<int64> ReorderDims(const std::vector<int64>& input, 182 const DataLayout& from, const DataLayout& to) { 183 if (from == to) return input; 184 185 int d_idx_from, b_idx_from, spatial_idx_from; 186 int d_idx_to, b_idx_to, spatial_idx_to; 187 188 std::tie(d_idx_from, b_idx_from, spatial_idx_from) = 189 GetDimIndices(from, input.size()); 190 std::tie(d_idx_to, b_idx_to, spatial_idx_to) = 191 GetDimIndices(to, input.size()); 192 193 std::vector<int64> reordered(input.size()); 194 reordered[b_idx_to] = input[b_idx_from]; 195 reordered[d_idx_to] = input[d_idx_from]; 196 197 for (size_t i = 0; i < input.size() - 2; 198 i++, spatial_idx_from++, spatial_idx_to++) { 199 reordered[spatial_idx_to] = input[spatial_idx_from]; 200 } 201 202 return reordered; 203 } 204 205 // -- AlgorithmConfig 206 207 string AlgorithmConfig::ToString() const { 208 return port::StrCat(algorithm_.algo_id(), ", ", 209 algorithm_no_scratch_.algo_id()); 210 } 211 212 // -- BatchDescriptor 213 214 BatchDescriptor::BatchDescriptor(int ndims) 215 : count_(0), 216 feature_map_count_(0), 217 spatial_size_(ndims, 0), 218 value_max_(0.0), 219 value_min_(0.0), 220 layout_(DataLayout::kYXDepthBatch), 221 ndims_(ndims), 222 quantized_activation_mode_(QuantizedActivationMode::k8Bit) {} 223 224 BatchDescriptor::BatchDescriptor() : BatchDescriptor(/*ndims=*/2) {} 225 226 std::vector<int64> BatchDescriptor::full_dims(const DataLayout& layout) const { 227 std::vector<int64> bdyx_dims(ndims_ + 2); 228 bdyx_dims[0] = count(); 229 bdyx_dims[1] = feature_map_count(); 230 std::copy(spatial_size_.begin(), spatial_size_.end(), bdyx_dims.begin() + 2); 231 return ReorderDims(bdyx_dims, DataLayout::kBatchDepthYX, layout); 232 } 233 234 std::vector<int64> BatchDescriptor::full_strides( 235 const DataLayout& layout) const { 236 if (layout_ == DataLayout::kBatchDepthYX4) { 237 LOG(FATAL) 238 << "Cannot compute full strides for batch descriptor " << ToString() 239 << ", because its layout is kBatchDepthYX4. In fact, " 240 "cudnnSetTensorNdDescriptor doesn't work for kBatchDepthYX4 at all. " 241 "Use cudnnSetTensor4DDescriptor to set cudnnTensorDescriptor_t " 242 "instead."; 243 } 244 std::vector<int64> phys_dims = full_dims(layout_); 245 std::vector<int64> phys_strides(phys_dims.size()); 246 phys_strides[ndims_ + 1] = 1; 247 for (int i = ndims_; i >= 0; i--) { 248 phys_strides[i] = phys_strides[i + 1] * phys_dims[i + 1]; 249 } 250 return ReorderDims(phys_strides, layout_, layout); 251 } 252 253 void BatchDescriptor::CloneFrom(const BatchDescriptor& other) { 254 count_ = other.count_; 255 feature_map_count_ = other.feature_map_count_; 256 spatial_size_ = other.spatial_size_; 257 value_max_ = other.value_max_; 258 value_min_ = other.value_min_; 259 layout_ = other.layout_; 260 ndims_ = other.ndims_; 261 quantized_activation_mode_ = other.quantized_activation_mode_; 262 } 263 264 string BatchDescriptor::ToString() const { 265 string spatial; 266 for (int i = 0; i < ndims_; i++) { 267 port::Appendf(&spatial, "%lld ", spatial_size_[i]); 268 } 269 return port::Printf( 270 "{count: %lld feature_map_count: %lld spatial: %s " 271 "value_min: %f value_max: %f layout: %s}", 272 count_, feature_map_count_, spatial.c_str(), value_min_, value_max_, 273 DataLayoutString(layout_).c_str()); 274 } 275 276 string BatchDescriptor::ToShortString() const { 277 // All the constituent strings are less than 15 characters, so the 278 // small string optimization ensures that there will be at most one 279 // heap memory allocation. 280 string depth = port::StrCat("d", feature_map_count()); 281 string batch = port::StrCat("b", count()); 282 283 string spatial = "s"; 284 for (int i = 0; i < ndims_; i++) { 285 port::Appendf(&spatial, "%lld ", spatial_size_[i]); 286 } 287 288 string suffix; 289 if (value_min() != value_max()) { 290 port::StrAppend(&suffix, "[", value_min(), ";", value_max(), "]"); 291 } 292 if (quantized_activation_mode() == QuantizedActivationMode::k16Bit) { 293 suffix += "_16bit"; 294 } 295 296 switch (layout()) { 297 case DataLayout::kYXDepthBatch: 298 return port::StrCat(spatial, depth, batch, suffix); 299 case DataLayout::kYXBatchDepth: 300 return port::StrCat(spatial, batch, depth, suffix); 301 case DataLayout::kBatchYXDepth: 302 return port::StrCat(batch, spatial, depth, suffix); 303 case DataLayout::kBatchDepthYX: 304 return port::StrCat(batch, depth, spatial, suffix); 305 case DataLayout::kBatchDepthYX4: 306 return port::StrCat(batch, depth, spatial, suffix, "(VECT_C)"); 307 default: 308 LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout()); 309 return ""; // Avoid return warning (unreachable) 310 } 311 } 312 313 int64 BatchDescriptor::NodesPerFeatureMap() const { 314 int64 ret = 1; 315 for (int i = 0; i < ndims_; i++) { 316 ret *= spatial_size_[i]; 317 } 318 return ret; 319 } 320 321 int64 BatchDescriptor::NodesAcrossFeatureMaps() const { 322 return NodesPerFeatureMap() * feature_map_count_; 323 } 324 325 int64 BatchDescriptor::ElementCount() const { 326 return count_ * feature_map_count_ * NodesPerFeatureMap(); 327 } 328 329 int64 BatchDescriptor::FullyConnectedWeightCount( 330 const BatchDescriptor& input, const BatchDescriptor& output) { 331 return input.NodesAcrossFeatureMaps() * output.NodesAcrossFeatureMaps(); 332 } 333 334 int64 BatchDescriptor::FullyConnectedBiasCount(const BatchDescriptor& output) { 335 return output.NodesAcrossFeatureMaps(); 336 } 337 338 BatchDescriptor BatchDescriptor::DepthConcatenateOutputDescriptor( 339 port::ArraySlice<dnn::BatchDescriptor> inputs) { 340 if (inputs.empty()) { 341 return BatchDescriptor(); 342 } 343 int feature_map_count = 0; 344 for (const auto& dimensions : inputs) { 345 feature_map_count += dimensions.feature_map_count(); 346 } 347 BatchDescriptor output = inputs[0]; 348 output.set_feature_map_count(feature_map_count); 349 return output; 350 } 351 352 // -- FilterDescriptor 353 354 FilterDescriptor::FilterDescriptor(int ndims) 355 : output_feature_map_count_(0), 356 input_feature_map_count_(0), 357 input_filter_dims_(ndims, 0), 358 ndims_(ndims), 359 layout_(FilterLayout::kOutputInputYX) {} 360 361 FilterDescriptor::FilterDescriptor() : FilterDescriptor(/*ndims=*/2) {} 362 363 FilterDescriptor::~FilterDescriptor() {} 364 365 void FilterDescriptor::CloneFrom(const FilterDescriptor& other) { 366 set_output_feature_map_count(other.output_feature_map_count()) 367 .set_input_feature_map_count(other.input_feature_map_count()) 368 .set_layout(other.layout()); 369 input_filter_dims_ = other.input_filter_dims_; 370 ndims_ = other.ndims_; 371 } 372 373 string FilterDescriptor::ToString() const { 374 string desc = port::Printf( 375 "{output_feature_map_count: %lld input_feature_map_count: %lld " 376 "layout: %s shape: ", 377 output_feature_map_count_, input_feature_map_count_, 378 FilterLayoutString(layout_).c_str()); 379 for (int i = 0; i < ndims_; i++) { 380 port::Appendf(&desc, "%lld ", input_filter_dims_[i]); 381 } 382 port::StrAppend(&desc, "}"); 383 384 return desc; 385 } 386 387 string FilterDescriptor::ToShortString() const { 388 // All the constituent strings are less than 15 characters, so the 389 // small string optimization ensures that there will be at most one 390 // heap memory allocation. 391 string od = port::StrCat("od", output_feature_map_count_); 392 string id = port::StrCat("id", input_feature_map_count_); 393 394 string spatial = "s"; 395 for (int i = 0; i < ndims_; i++) { 396 port::Appendf(&spatial, "%lld ", input_filter_dims_[i]); 397 } 398 399 switch (layout_) { 400 case FilterLayout::kOutputInputYX: 401 return port::StrCat(od, id, spatial); 402 case FilterLayout::kOutputInputYX4: 403 return port::StrCat(od, id, spatial, "(VECT_C)"); 404 case FilterLayout::kInputYXOutput: 405 return port::StrCat(id, spatial, od); 406 case FilterLayout::kYXInputOutput: 407 return port::StrCat(spatial, id, od); 408 default: 409 LOG(FATAL) << "Unknown layout " << static_cast<int32>(layout_); 410 return ""; // Avoid return warning (unreachable) 411 } 412 } 413 414 int64 FilterDescriptor::ComputeWeightCount() const { 415 int64 ret = output_feature_map_count_ * input_feature_map_count_; 416 for (int i = 0; i < ndims_; i++) { 417 ret *= input_filter_dims_[i]; 418 } 419 return ret; 420 } 421 422 // -- ConvolutionDescriptor 423 424 ConvolutionDescriptor::ConvolutionDescriptor(int ndims) 425 : zero_padding_(ndims, 0), 426 filter_strides_(ndims, 1), 427 dilation_rates_(ndims, 1), 428 pad_alignment_(PadAlignment::kDefault), 429 ndims_(ndims) {} 430 431 ConvolutionDescriptor::ConvolutionDescriptor() 432 : ConvolutionDescriptor(/*ndims=*/2) {} 433 434 ConvolutionDescriptor::~ConvolutionDescriptor() {} 435 436 string ConvolutionDescriptor::ToString() const { 437 string padding; 438 string strides; 439 string dilations; 440 for (int i = 0; i < ndims_; i++) { 441 port::Appendf(&padding, "%lld ", zero_padding_[i]); 442 port::Appendf(&strides, "%lld ", filter_strides_[i]); 443 port::Appendf(&dilations, "%lld ", dilation_rates_[i]); 444 } 445 446 return port::Printf( 447 "{zero_padding: %s pad_alignment: %s filter_strides: %s dilation_rates: " 448 "%s}", 449 padding.c_str(), PadAlignmentString(pad_alignment_).c_str(), 450 strides.c_str(), dilations.c_str()); 451 } 452 453 string ConvolutionDescriptor::ToShortString() const { 454 string desc; 455 for (int i = 0; i < ndims_; i++) { 456 if (i > 0) port::Appendf(&desc, "_"); 457 port::Appendf(&desc, "p%d:%lld", i, zero_padding_[i]); 458 } 459 for (int i = 0; i < ndims_; i++) { 460 port::Appendf(&desc, "_s%d:%lld", i, filter_strides_[i]); 461 } 462 for (int i = 0; i < ndims_; i++) { 463 port::Appendf(&desc, "_d%d:%lld", i, dilation_rates_[i]); 464 } 465 return desc; 466 } 467 468 // -- PoolingDescriptor 469 470 PoolingDescriptor::PoolingDescriptor(int ndims) 471 : mode_(dnn::PoolingMode::kMaximum), 472 ndims_(ndims), 473 propagate_nans_(false), 474 window_(ndims, 0), 475 padding_(ndims, 0), 476 strides_(ndims, 1) {} 477 478 PoolingDescriptor::PoolingDescriptor() : PoolingDescriptor(/*ndims=*/2) {} 479 480 void PoolingDescriptor::CloneFrom(const PoolingDescriptor& other) { 481 mode_ = other.mode_; 482 ndims_ = other.ndims_; 483 window_ = other.window_; 484 padding_ = other.padding_; 485 strides_ = other.strides_; 486 propagate_nans_ = other.propagate_nans_; 487 } 488 489 string PoolingDescriptor::ToString() const { 490 const char* mode_string = 491 mode_ == dnn::PoolingMode::kMaximum ? "kMaximum" : "kAverage"; 492 493 string window, strides, padding; 494 for (int i = 0; i < ndims_; i++) { 495 port::Appendf(&window, "%lld ", window_[i]); 496 port::Appendf(&strides, "%lld ", strides_[i]); 497 port::Appendf(&padding, "%lld", padding_[i]); 498 } 499 500 const char* propagate_string = propagate_nans_ ? "Yes" : "No"; 501 502 return port::Printf( 503 "{mode: %s window: %s strides: %s padding: %s propagate NaNs: %s}", 504 mode_string, window.c_str(), strides.c_str(), padding.c_str(), 505 propagate_string); 506 } 507 508 string PoolingDescriptor::ToShortString() const { 509 string window, strides, padding; 510 for (int i = 0; i < ndims_; i++) { 511 port::Appendf(&window, "_w%d:%lld", i, window_[i]); 512 port::Appendf(&strides, "_s%d:%lld", i, strides_[i]); 513 port::Appendf(&padding, "_p%d:%lld", i, padding_[i]); 514 } 515 return port::StrCat(mode_ == dnn::PoolingMode::kMaximum ? "max" : "avg", 516 window, strides, padding, 517 propagate_nans_ ? "propagate_nans" : "ignore_nans"); 518 } 519 520 // -- NormalizeDescriptor 521 522 NormalizeDescriptor::NormalizeDescriptor() 523 : bias_(0.0), 524 range_(0), 525 alpha_(0.0), 526 beta_(0.0), 527 wrap_around_(false), 528 segment_size_(0) {} 529 530 void NormalizeDescriptor::CloneFrom(const NormalizeDescriptor& other) { 531 bias_ = other.bias_; 532 range_ = other.range_; 533 alpha_ = other.alpha_; 534 beta_ = other.beta_; 535 wrap_around_ = other.wrap_around_; 536 segment_size_ = other.segment_size_; 537 } 538 539 string NormalizeDescriptor::ToString() const { 540 return port::Printf( 541 "{bias: %f range: %d alpha: %f beta: %f wrap_around: %d " 542 "segment_size: %d}", 543 bias_, range_, alpha_, beta_, wrap_around_, segment_size_); 544 } 545 546 string NormalizeDescriptor::ToShortString() const { 547 return port::StrCat("bias:", bias_, "_range:", range_, "_alpha:", alpha_, 548 "_beta:", beta_, "_wrap:", wrap_around_, "_size:", 549 segment_size_); 550 } 551 552 } // namespace dnn 553 } // namespace gputools 554 } // namespace perftools 555