Home | History | Annotate | Download | only in cpu
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // This header declares functions which may be called by the generated code on
     17 // the CPU. Calls to these functions must be resolved explicitly in the JIT in
     18 // xla::cpu::SimpleResolver.  It also defines a per-CpuExecutable context
     19 // which is used to cache expensive state and resources utilized by the
     20 // aforementioned functions.
     21 //
     22 // Other functions are declared in individual libraries as well, such as
     23 // runtime_conv2d and runtime_matmul. As individual libraries, callers for
     24 // ahead-of-time compilation can link only the required subset.
     25 
     26 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_H_
     27 #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_H_
     28 
     29 #include "tensorflow/compiler/xla/service/cpu/xfeed_manager.h"
     30 #include "tensorflow/compiler/xla/types.h"
     31 
     32 namespace xla {
     33 namespace cpu {
     34 namespace runtime {
     35 
     36 // Names of runtime functions. These get resolved from the generated code to the
     37 // right symbol at link time in one of two ways:
     38 // 1. When using the JIT, the symbol resolver (SimpleResolver in
     39 //    third_party/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc) maps
     40 //    this symbol name to
     41 //    the actual symbol.
     42 // 2. When using ahead-of-time compilation, the linker can resolve the name
     43 //    because it is a symbol in the cpu_runtime library.
     44 extern const char* const kEigenMatMulF32SymbolName;
     45 extern const char* const kEigenMatMulF64SymbolName;
     46 extern const char* const kEigenConvF16SymbolName;
     47 extern const char* const kEigenConvF32SymbolName;
     48 extern const char* const kEigenFftSymbolName;
     49 extern const char* const kEigenSingleThreadedMatMulF32SymbolName;
     50 extern const char* const kEigenSingleThreadedMatMulF64SymbolName;
     51 extern const char* const kEigenSingleThreadedConvF16SymbolName;
     52 extern const char* const kEigenSingleThreadedConvF32SymbolName;
     53 extern const char* const kAcquireInfeedBufferForDequeueSymbolName;
     54 extern const char* const kReleaseInfeedBufferAfterDequeueSymbolName;
     55 extern const char* const kAcquireOutfeedBufferForPopulationSymbolName;
     56 extern const char* const kReleaseOutfeedBufferAfterPopulationSymbolName;
     57 extern const char* const kParallelForkJoinSymbolName;
     58 
     59 // All symbol names for XLA CPU runtime functions need to start with this
     60 // prefix.
     61 extern const char* const kXlaCpuRuntimeSymbolNamePrefix;
     62 
     63 // Returns the infeed manager used by the CPU runtime.
     64 XfeedManager* GetXfeedManager();
     65 
     66 }  // namespace runtime
     67 }  // namespace cpu
     68 }  // namespace xla
     69 
     70 extern "C" {
     71 
     72 // Note: in the runtime entry points below, the shape pointer and shape_length
     73 // reflect values that can be deserialized via
     74 // llvm_ir::DecodeSelfDescribingShapeConstant. This is the way we pass reified
     75 // type information from the generated program to the runtime, which helps check
     76 // the type safety and contract for the emitted-code/runtime communication.
     77 
     78 // Blocks until the next infeed buffer is ready to be dequeued, then
     79 // returns it. Fails catastrophically if the next enqueued buffer is
     80 // not of the correct length in bytes. Checking the shape rather than
     81 // the length would be more exact, but the length check is chosen as a
     82 // tradeoff between error checking and speed/simplicity.
     83 extern void* __xla_cpu_runtime_AcquireInfeedBufferForDequeue(
     84     xla::int32 buffer_length, const void* shape, xla::int32 shape_length);
     85 
     86 // Relinquishes the next infeed buffer that was returned by
     87 // __xla_cpu_runtime_AcquireInfeedBufferForDequeue. Once this call
     88 // completes the data at buffer_ptr may no longer be
     89 // accessed. buffer_length must match the length passed to the call to
     90 // __xla_cpu_runtime_AcquireInfeedBufferForDequeue that returned
     91 // buffer_ptr. This function must be called before the next buffer is
     92 // acquired, i.e., there may only be one outstanding infeed buffer in
     93 // use by the runtime.  TODO(b/31340454) investigate whether or not it
     94 // is worth supporting zero-copy infeed where the buffer is retained
     95 // by the compiled code until it has been used. If zero-copy infeed is
     96 // implemented we will add support for multiple outstanding buffers
     97 // that can be returned out of order.
     98 extern void __xla_cpu_runtime_ReleaseInfeedBufferAfterDequeue(
     99     xla::int32 buffer_length, void* buffer_ptr, const void* shape_ptr,
    100     xla::int32 shape_length);
    101 
    102 // Blocks until the next outfeed buffer is available to be populated, then
    103 // returns it.
    104 extern void* __xla_cpu_runtime_AcquireOutfeedBufferForPopulation(
    105     xla::int32 buffer_length, const void* shape_ptr, xla::int32 shape_length);
    106 
    107 // Relinquishes the outfeed buffer after it has been populated.
    108 // buffer_ptr must have been previously returned by
    109 // __xla_cpu_runtime_AcquireOutfeedBufferForPopulation.
    110 // Once this call completes, buffer_ptr may no longer be accessed.
    111 // buffer_length must match the length passed to the call to
    112 // __xla_cpu_runtime_AcquireInfeedBufferForDequeue that returned
    113 // buffer_ptr. This function must be called before the next buffer is
    114 // acquired, i.e., there may only be one outstanding outfeed buffer in
    115 // use by the runtime.
    116 extern void __xla_cpu_runtime_ReleaseOutfeedBufferAfterPopulation(
    117     xla::int32 buffer_length, void* buffer_ptr, const void* shape_ptr,
    118     xla::int32 shape_length);
    119 
    120 }  // extern "C"
    121 
    122 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_CPU_RUNTIME_H_
    123