Home | History | Annotate | Download | only in framework
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_FRAMEWORK_ALLOCATOR_H_
     17 #define TENSORFLOW_FRAMEWORK_ALLOCATOR_H_
     18 
     19 #include <stdlib.h>
     20 
     21 #include <limits>
     22 
     23 #include "tensorflow/core/framework/numeric_types.h"
     24 #include "tensorflow/core/framework/resource_handle.h"
     25 #include "tensorflow/core/framework/type_traits.h"
     26 #include "tensorflow/core/framework/variant.h"
     27 #include "tensorflow/core/platform/logging.h"
     28 #include "tensorflow/core/platform/types.h"
     29 
     30 namespace tensorflow {
     31 
     32 // Attributes for a single allocation call. Different calls to the same
     33 // allocator could potentially have different allocation attributes.
     34 struct AllocationAttributes {
     35   // If the first attempt to allocate the memory fails, the allocation
     36   // should return immediately without retrying.
     37   // An example use case is optional scratch spaces where a failure
     38   // has only performance impact.
     39   bool no_retry_on_failure = false;
     40   // If a Tensor is allocated without the following set to true, then
     41   // it is logged as an unknown allocation. During execution Tensors
     42   // should be allocated through the OpKernelContext which records
     43   // which Op is performing the allocation, and sets this flag to
     44   // true.
     45   bool allocation_will_be_logged = false;
     46 };
     47 
     48 // Runtime statistics collected by an allocator.
     49 struct AllocatorStats {
     50   int64 num_allocs;        // Number of allocations.
     51   int64 bytes_in_use;      // Number of bytes in use.
     52   int64 max_bytes_in_use;  // The maximum bytes in use.
     53   int64 max_alloc_size;    // The max single allocation seen.
     54 
     55   // The upper limit what the allocator can allocate, if such a limit
     56   // is known. Certain allocator may return 0 to indicate the limit is
     57   // unknown.
     58   int64 bytes_limit;
     59 
     60   AllocatorStats() { Clear(); }
     61 
     62   void Clear();
     63   string DebugString() const;
     64 };
     65 
     66 // Allocator is an abstract interface for allocating and deallocating
     67 // device memory.
     68 class Allocator {
     69  public:
     70 #ifdef EIGEN_VECTORIZE_AVX512
     71   // Align to 64 byte boundary.
     72   static constexpr size_t kAllocatorAlignment = 64;
     73 #else
     74   // Align to 32 byte boundary.
     75   static constexpr size_t kAllocatorAlignment = 32;
     76 #endif
     77 
     78   virtual ~Allocator();
     79 
     80   // Return a string identifying this allocator
     81   virtual string Name() = 0;
     82 
     83   // Return an uninitialized block of memory that is "num_bytes" bytes
     84   // in size.  The returned pointer is guaranteed to be aligned to a
     85   // multiple of "alignment" bytes.
     86   // REQUIRES: "alignment" is a power of 2.
     87   virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = 0;
     88 
     89   // Return an uninitialized block of memory that is "num_bytes" bytes
     90   // in size with specified allocation attributes.  The returned pointer is
     91   // guaranteed to be aligned to a multiple of "alignment" bytes.
     92   // REQUIRES: "alignment" is a power of 2.
     93   virtual void* AllocateRaw(size_t alignment, size_t num_bytes,
     94                             const AllocationAttributes& allocation_attr) {
     95     // The default behavior is to use the implementation without any allocation
     96     // attributes.
     97     return AllocateRaw(alignment, num_bytes);
     98   }
     99 
    100   // Deallocate a block of memory pointer to by "ptr"
    101   // REQUIRES: "ptr" was previously returned by a call to AllocateRaw
    102   virtual void DeallocateRaw(void* ptr) = 0;
    103 
    104   // Convenience functions to do typed allocation.  C++ constructors
    105   // and destructors are invoked for complex types if necessary,
    106   // depending on the concrete Allocator implementation. May return
    107   // NULL if the tensor has too many elements to represent in a single
    108   // allocation.
    109   template <typename T>
    110   T* Allocate(size_t num_elements) {
    111     return Allocate<T>(num_elements, AllocationAttributes());
    112   }
    113 
    114   template <typename T>
    115   T* Allocate(size_t num_elements,
    116               const AllocationAttributes& allocation_attr) {
    117     // TODO(jeff): Do we need to allow clients to pass in alignment
    118     // requirements?
    119 
    120     if (num_elements > (std::numeric_limits<size_t>::max() / sizeof(T))) {
    121       return NULL;
    122     }
    123 
    124     void* p = AllocateRaw(kAllocatorAlignment, sizeof(T) * num_elements,
    125                           allocation_attr);
    126     T* typed_p = reinterpret_cast<T*>(p);
    127     if (typed_p) RunCtor<T>(typed_p, num_elements);
    128     return typed_p;
    129   }
    130 
    131   template <typename T>
    132   void Deallocate(T* ptr, size_t num_elements) {
    133     if (ptr) {
    134       RunDtor<T>(ptr, num_elements);
    135       DeallocateRaw(ptr);
    136     }
    137   }
    138 
    139   // Returns true if this allocator tracks the sizes of allocations.
    140   // RequestedSize and AllocatedSize must be overridden if
    141   // TracksAllocationSizes is overridden to return true.
    142   virtual bool TracksAllocationSizes() { return false; }
    143 
    144   // Returns true if this allocator requires tensors with 0 elements
    145   // to allocate buffers. This is false for most allocators, but may
    146   // be used by special-case allocators that want to track tensor
    147   // usage.
    148   virtual bool ShouldAllocateEmptyTensors() { return false; }
    149 
    150   // Returns the user-requested size of the data allocated at
    151   // 'ptr'.  Note that the actual buffer allocated might be larger
    152   // than requested, but this function returns the size requested by
    153   // the user.
    154   //
    155   // REQUIRES: TracksAllocationSizes() is true.
    156   //
    157   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
    158   // allocated by this allocator.
    159   virtual size_t RequestedSize(const void* ptr) {
    160     CHECK(false) << "allocator doesn't track sizes";
    161     return size_t(0);
    162   }
    163 
    164   // Returns the allocated size of the buffer at 'ptr' if known,
    165   // otherwise returns RequestedSize(ptr). AllocatedSize(ptr) is
    166   // guaranteed to be >= RequestedSize(ptr).
    167   //
    168   // REQUIRES: TracksAllocationSizes() is true.
    169   //
    170   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
    171   // allocated by this allocator.
    172   virtual size_t AllocatedSize(const void* ptr) { return RequestedSize(ptr); }
    173 
    174   // Returns either 0 or an identifier assigned to the buffer at 'ptr'
    175   // when the buffer was returned by AllocateRaw. If non-zero, the
    176   // identifier differs from every other ID assigned by this
    177   // allocator.
    178   //
    179   // REQUIRES: TracksAllocationSizes() is true.
    180   //
    181   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
    182   // allocated by this allocator.
    183   virtual int64 AllocationId(const void* ptr) { return 0; }
    184 
    185   // Returns the allocated size of the buffer at 'ptr' if known,
    186   // otherwise returns 0. This method can be called when
    187   // TracksAllocationSizes() is false, but can be extremely slow.
    188   //
    189   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
    190   // allocated by this allocator.
    191   virtual size_t AllocatedSizeSlow(const void* ptr) {
    192     if (TracksAllocationSizes()) {
    193       return AllocatedSize(ptr);
    194     }
    195     return 0;
    196   }
    197 
    198   // Fills in 'stats' with statistics collected by this allocator.
    199   virtual void GetStats(AllocatorStats* stats) { stats->Clear(); }
    200 
    201   // Clears the internal stats except for the `in_use` field.
    202   virtual void ClearStats() {}
    203 
    204  private:
    205   // No constructors or destructors are run for simple types
    206   template <typename T>
    207   void RunCtor(T* p, size_t n) {
    208     static_assert(is_simple_type<T>::value, "T is not a simple type.");
    209   }
    210 
    211   template <typename T>
    212   void RunDtor(T* p, size_t n) {}
    213 
    214   // custom constructors and destructors that can be overridden for
    215   // non-standard allocators
    216 
    217   // Runs string's default constructor for  p[0], p[1], ..., p[n-1].
    218   virtual void RunStringCtor(string* p, size_t n) {
    219     for (size_t i = 0; i < n; ++p, ++i) new (p) string();
    220   }
    221 
    222   // Runs string's default destructor for  p[0], p[1], ..., p[n-1].
    223   virtual void RunStringDtor(string* p, size_t n) {
    224     for (size_t i = 0; i < n; ++p, ++i) p->~string();
    225   }
    226 
    227   virtual void RunResourceCtor(ResourceHandle* p, size_t n) {
    228     for (size_t i = 0; i < n; ++p, ++i) new (p) ResourceHandle();
    229   }
    230 
    231   // Runs string's default destructor for  p[0], p[1], ..., p[n-1].
    232   virtual void RunResourceDtor(ResourceHandle* p, size_t n) {
    233     for (size_t i = 0; i < n; ++p, ++i) p->~ResourceHandle();
    234   }
    235 
    236   virtual void RunVariantCtor(Variant* p, size_t n) {
    237     for (size_t i = 0; i < n; ++p, ++i) new (p) Variant();
    238   }
    239 
    240   virtual void RunVariantDtor(Variant* p, size_t n) {
    241     for (size_t i = 0; i < n; ++p, ++i) p->~Variant();
    242   }
    243 
    244   // TODO(jeff): Maybe provide some interface to give info about
    245   // current allocation state (total number of bytes available for
    246   // allocation, number of bytes free on device, etc.)
    247 };
    248 
    249 // Allocator-specific constructors and destructors are used for
    250 // strings
    251 template <>
    252 inline void Allocator::RunCtor(string* p, size_t n) {
    253   RunStringCtor(p, n);
    254 }
    255 
    256 template <>
    257 inline void Allocator::RunDtor(string* p, size_t n) {
    258   RunStringDtor(p, n);
    259 }
    260 
    261 template <>
    262 inline void Allocator::RunCtor(ResourceHandle* p, size_t n) {
    263   RunResourceCtor(p, n);
    264 }
    265 
    266 template <>
    267 inline void Allocator::RunDtor(ResourceHandle* p, size_t n) {
    268   RunResourceDtor(p, n);
    269 }
    270 
    271 template <>
    272 inline void Allocator::RunCtor(Variant* p, size_t n) {
    273   RunVariantCtor(p, n);
    274 }
    275 
    276 template <>
    277 inline void Allocator::RunDtor(Variant* p, size_t n) {
    278   RunVariantDtor(p, n);
    279 }
    280 
    281 // An implementation of Allocator that delegates all calls to another Allocator.
    282 //
    283 // Useful to clients who want to override part of the functionality of another
    284 // allocator.
    285 class AllocatorWrapper : public Allocator {
    286  public:
    287   explicit AllocatorWrapper(Allocator* wrapped) : wrapped_(wrapped) {}
    288 
    289   ~AllocatorWrapper() override {}
    290 
    291   // Returns the wrapped allocator to which all calls are delegated.
    292   Allocator* wrapped() const { return wrapped_; }
    293 
    294   string Name() override { return wrapped_->Name(); }
    295 
    296   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
    297     return wrapped_->AllocateRaw(alignment, num_bytes);
    298   }
    299 
    300   void* AllocateRaw(size_t alignment, size_t num_bytes,
    301                     const AllocationAttributes& allocation_attr) override {
    302     return wrapped_->AllocateRaw(alignment, num_bytes, allocation_attr);
    303   }
    304 
    305   void DeallocateRaw(void* ptr) override { wrapped_->DeallocateRaw(ptr); }
    306 
    307   bool TracksAllocationSizes() override {
    308     return wrapped_->TracksAllocationSizes();
    309   }
    310 
    311   bool ShouldAllocateEmptyTensors() override {
    312     return wrapped_->TracksAllocationSizes();
    313   }
    314 
    315   size_t RequestedSize(const void* ptr) override {
    316     return wrapped_->RequestedSize(ptr);
    317   }
    318 
    319   size_t AllocatedSize(const void* ptr) override {
    320     return wrapped_->AllocatedSize(ptr);
    321   }
    322 
    323   int64 AllocationId(const void* ptr) override {
    324     return wrapped_->AllocationId(ptr);
    325   }
    326 
    327   size_t AllocatedSizeSlow(const void* ptr) override {
    328     return wrapped_->AllocatedSizeSlow(ptr);
    329   }
    330 
    331  private:
    332   Allocator* const wrapped_;
    333 };
    334 
    335 // A tensorflow Op may need access to different kinds of memory that
    336 // are not simply a function of the device to which the Op has been
    337 // assigned.  For example, an Op executing on a GPU may still need
    338 // to allocate CPU RAM for some purpose.  Internal to the tensorflow
    339 // runtime we may choose to allocate CPU ram from special regions
    340 // that have been prepared for higher performance in some use
    341 // contexts, e.g. doing DMA with particular devices.  For these
    342 // reasons, the Device interface does not expose just one memory
    343 // Allocator, but instead provides an accessor that takes a
    344 // specification of the desired memory attributes in order to select
    345 // an Allocator.
    346 //
    347 // Example use:
    348 //  // Allocator for ordinary device memory:
    349 //  Allocator* a = allocator(AllocatorAttributes());
    350 // ...
    351 //  // Allocator for CPU RAM, regardless of where Op is executing:
    352 //  AllocatorAttributes attr;
    353 //  attr.set_on_host(true);
    354 //  Allocator* a = allocator(attr);
    355 struct AllocatorAttributes {
    356   void set_on_host(bool v) { value |= (static_cast<int>(v)); }
    357   bool on_host() const { return value & 0x1; }
    358   void set_nic_compatible(bool v) { value |= (static_cast<int>(v) << 1); }
    359   bool nic_compatible() const { return value & (0x1 << 1); }
    360   void set_gpu_compatible(bool v) { value |= (static_cast<int>(v) << 2); }
    361   bool gpu_compatible() const { return value & (0x1 << 2); }
    362   void Merge(AllocatorAttributes other) { value |= other.value; }
    363   // Returns true if the fields set in *this is a subset of or equal to
    364   // those set in other.
    365   bool IsEqualOrLessRestrictiveThan(const AllocatorAttributes& other) const {
    366     return (value | other.value) == other.value;
    367   }
    368 
    369   // NOTE: The upper 8 bits of the value are reserved for
    370   // device-specific uses.  Implementors of a device can interpret these
    371   // upper 8 bits in device-specific ways, and ops implemented for those
    372   // devices are responsible for setting those 8 bits appropriately.
    373   uint32 value = 0;
    374 };
    375 
    376 // Returns a trivial implementation of Allocator which uses the system
    377 // default malloc. The returned allocator is a process singleton.
    378 Allocator* cpu_allocator();
    379 
    380 // If 'enable' is true, the process-wide cpu allocator collects
    381 // AllocatorStats. By default, it's disabled.
    382 void EnableCPUAllocatorStats(bool enable);
    383 
    384 // If 'enable' is true, the process-wide cpu allocator collects full
    385 // statistics. By default, it's disabled.
    386 void EnableCPUAllocatorFullStats(bool enable);
    387 
    388 // Abstract interface of an object that does the underlying suballoc/free of
    389 // memory for a higher-level allocator.
    390 class SubAllocator {
    391  public:
    392   virtual ~SubAllocator() {}
    393   virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
    394   virtual void Free(void* ptr, size_t num_bytes) = 0;
    395 };
    396 
    397 }  // namespace tensorflow
    398 
    399 #endif  // TENSORFLOW_FRAMEWORK_ALLOCATOR_H_
    400