Home | History | Annotate | Download | only in stream_executor
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Types to express dimensionality of a kernel launch. Blocks and threads
     17 // are (up to) 3-dimensional.
     18 //
     19 // A thread is conceptually like a SIMD lane. Some number, typically 32
     20 // (though that fact should not be relied on) SIMD lanes are tied together with
     21 // a single PC in a unit called a warp. There is a maximum number of threads
     22 // that can execute in a shared-context entity called a block. Presently, that
     23 // number is 1024 -- again, something that should not be relied on from this
     24 // comment, but checked via perftools::gputools::DeviceDescription.
     25 //
     26 // For additional information, see
     27 // http://docs.nvidia.com/cuda/kepler-tuning-guide/#device-utilization-and-occupancy
     28 //
     29 // Because of that modest thread-per-block limit, a kernel can be launched with
     30 // multiple blocks. Each block is indivisibly scheduled onto a single core.
     31 // Blocks can also be used in a multi-dimensional configuration, and the block
     32 // count has much less modest limits -- typically they're similar to the maximum
     33 // amount of addressable memory.
     34 
     35 #ifndef TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_
     36 #define TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_
     37 
     38 #include "tensorflow/stream_executor/platform/port.h"
     39 
     40 #include "tensorflow/stream_executor/lib/strcat.h"
     41 #include "tensorflow/stream_executor/platform/port.h"
     42 
     43 namespace perftools {
     44 namespace gputools {
     45 
     46 // Basic type that represents a 3-dimensional index space.
     47 struct Dim3D {
     48   uint64 x, y, z;
     49 
     50   Dim3D(uint64 x, uint64 y, uint64 z) : x(x), y(y), z(z) {}
     51 };
     52 
     53 // Thread dimensionality for use in a kernel launch. See file comment for
     54 // details.
     55 struct ThreadDim : public Dim3D {
     56   explicit ThreadDim(uint64 x = 1, uint64 y = 1, uint64 z = 1)
     57       : Dim3D(x, y, z) {}
     58 
     59   // Returns a string representation of the thread dimensionality.
     60   string ToString() const {
     61     return port::StrCat("ThreadDim{", x, ", ", y, ", ", z, "}");
     62   }
     63 };
     64 
     65 // Block dimensionality for use in a kernel launch. See file comment for
     66 // details.
     67 struct BlockDim : public Dim3D {
     68   explicit BlockDim(uint64 x = 1, uint64 y = 1, uint64 z = 1)
     69       : Dim3D(x, y, z) {}
     70 
     71   // Returns a string representation of the block dimensionality.
     72   string ToString() const {
     73     return port::StrCat("BlockDim{", x, ", ", y, ", ", z, "}");
     74   }
     75 };
     76 
     77 }  // namespace gputools
     78 }  // namespace perftools
     79 
     80 #endif  // TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_
     81