Home | History | Annotate | Download | only in llvm_ir
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
     17 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
     18 
     19 #include <memory>
     20 #include <string>
     21 
     22 #include "llvm/IR/BasicBlock.h"
     23 #include "llvm/IR/IRBuilder.h"
     24 #include "llvm/IR/Value.h"
     25 #include "tensorflow/compiler/xla/service/llvm_ir/ir_array.h"
     26 #include "tensorflow/compiler/xla/types.h"
     27 #include "tensorflow/compiler/xla/xla_data.pb.h"
     28 #include "tensorflow/core/lib/core/stringpiece.h"
     29 #include "tensorflow/core/lib/gtl/array_slice.h"
     30 #include "tensorflow/core/lib/strings/strcat.h"
     31 #include "tensorflow/core/platform/macros.h"
     32 #include "tensorflow/core/platform/types.h"
     33 
     34 namespace xla {
     35 namespace llvm_ir {
     36 
     37 // A class for constructing a for-loop in LLVM IR.
     38 class ForLoop {
     39  public:
     40   // Emit a for-loop at the current insert point of the given IRBuilder.
     41   //
     42   // start_index and end_index are the loop bounds (end_index is not inclusive).
     43   // `step` is the increment of the loop index after each iteration.
     44   //
     45   // The current insert basic block of the builder is the preheader to the loop
     46   // (see below for definition of basic block names). All instructions (if any)
     47   // at or after the insert point in the insert basic block are moved to a newly
     48   // created exit basic block. Instructions before the insert point remain in
     49   // the insert BB:
     50   //
     51   //                   +--------------+         +----------------+
     52   //                   |  insert BB   |         |   insert BB    |
     53   //                   |     ...      |         | (preheader BB) |
     54   //                   | %foo = ...   |         |      ...       |
     55   //    insert point ->| %bar = ...   |  ===>   | %foo = ...     |
     56   //                   |     ...      |         +----------------+
     57   //                   +--------------+                 |
     58   //                                                    V
     59   //                                              [[ LOOP BBs ]]
     60   //                                                    |
     61   //                                                    V
     62   //                                             +--------------+
     63   //                                             |   exit BB    |
     64   //                                             | %bar = ...   |
     65   //                                             |     ...      |
     66   //                                             +--------------+
     67   //
     68   // `prefix` is used to disambiguate variable and basic block names emitted in
     69   // LLVM IR. If non-empty, it is prepended to the name of the induction
     70   // variable value and each basic block created for the loop.
     71   //
     72   // If `prevent_unrolling` is true then emit metadata that directs LLVM to not
     73   // unroll the generated loop.
     74   static std::unique_ptr<ForLoop> EmitForLoop(
     75       tensorflow::StringPiece prefix, llvm::Value* start_index,
     76       llvm::Value* end_index, llvm::Value* step, llvm::IRBuilder<>* ir_builder,
     77       bool prevent_unrolling = false, bool prevent_vectorization = false);
     78 
     79   // The names of the blocks follow LLVM's conventions. Control flow amongst the
     80   // blocks for the example C code looks like:
     81   //
     82   //   for (int i = 0; i < n; ++i) {
     83   //     do_stuff(i);
     84   //   }
     85   //
     86   //      +--------------+
     87   //      | preheader BB |
     88   //      |     i = 0    |
     89   //      +--------------+
     90   //              |
     91   //              V
     92   //      +-------------+
     93   //      |  header BB  |<-+
     94   //      | if i < n:   |  |
     95   //      |   goto body |  |
     96   //      | else:       |  |
     97   //      |   goto exit |  |
     98   //      +-------------+  |
     99   //            | |        |
    100   //   +--------+ |        |
    101   //   |          V        |
    102   //   |  +-------------+  |
    103   //   |  |   body BB   |  |
    104   //   |  | dostuff(i)  |--+
    105   //   |  | ++i         |
    106   //   |  +-------------+
    107   //   |
    108   //   |  +-------------+
    109   //   +->|   exit BB   |
    110   //      +-------------+
    111   //
    112   // Caller-emitted code to execute within the loop should be placed within the
    113   // "body" basic block.
    114   //
    115   // Return pointers to various blocks in the loop.
    116   llvm::BasicBlock* GetPreheaderBasicBlock() const { return preheader_bb_; }
    117   llvm::BasicBlock* GetHeaderBasicBlock() const { return header_bb_; }
    118   llvm::BasicBlock* GetBodyBasicBlock() const { return body_bb_; }
    119   llvm::BasicBlock* GetExitBasicBlock() const { return exit_bb_; }
    120 
    121   // Return the Value representing the induction variable in the body basic
    122   // block of the loop.
    123   llvm::Value* GetIndVarValue() const { return indvar_; }
    124 
    125  private:
    126   // Allow ForLoopNest to call this private constructor.
    127   friend class ForLoopNest;
    128 
    129   ForLoop(tensorflow::StringPiece prefix, tensorflow::StringPiece suffix,
    130           llvm::Value* start_index, llvm::Value* end_index, llvm::Value* step,
    131           bool prevent_unrolling, bool prevent_vectorization);
    132 
    133   // Emit the loop at the insert point of the builder.
    134   void Emit(llvm::IRBuilder<>* ir_builder);
    135 
    136   llvm::BasicBlock* CreateLoopBB(tensorflow::StringPiece name,
    137                                  llvm::IRBuilder<>* ir_builder);
    138 
    139   // Creates a name for an LLVM construct, appending prefix_ and suffix_, if
    140   // they are set.
    141   string GetQualifiedName(tensorflow::StringPiece name);
    142 
    143   // Return a list of metadata nodes that should be associated with the
    144   // llvm::Loop for this `ForLoop`.
    145   std::vector<llvm::Metadata*> GetLoopMetadata(llvm::IRBuilder<>* ir_builder);
    146 
    147   string prefix_;
    148   string suffix_;
    149   llvm::Value* start_index_;
    150   llvm::Value* end_index_;
    151   llvm::Value* step_;
    152 
    153   // To improve readability of the IR, we want the basic blocks to appear
    154   // consecutively in the following order: preheader, header, body, loop,
    155   // exit. The member insert_before_bb_ points to where the next basic block
    156   // should be created to ensure this ordering.
    157   llvm::BasicBlock* insert_before_bb_;
    158 
    159   llvm::BasicBlock* preheader_bb_;
    160   llvm::BasicBlock* header_bb_;
    161   llvm::BasicBlock* body_bb_;
    162   llvm::BasicBlock* exit_bb_;
    163   llvm::Value* indvar_;
    164   bool prevent_unrolling_;
    165   bool prevent_vectorization_;
    166 
    167   TF_DISALLOW_COPY_AND_ASSIGN(ForLoop);
    168 };
    169 
    170 // A simple class for constructing nested for-loops.
    171 class ForLoopNest {
    172  public:
    173   explicit ForLoopNest(llvm::IRBuilder<>* ir_builder)
    174       : ForLoopNest(/*name=*/"", ir_builder) {}
    175 
    176   ForLoopNest(tensorflow::StringPiece name, llvm::IRBuilder<>* ir_builder)
    177       : name_(name.ToString()),
    178         outer_loop_preheader_bb_(nullptr),
    179         outer_loop_exit_bb_(nullptr),
    180         inner_loop_body_bb_(nullptr),
    181         ir_builder_(ir_builder) {}
    182 
    183   // Adds a loop to the nest. If no loop has been added yet then emit a loop at
    184   // the current insert point of the given builder. If one or more loops have
    185   // been added then emit loop inside the body of the last added loop.  If
    186   // prevent_unrolling is true, then metadata is emitting directing LLVM to not
    187   // unroll this loop.
    188   std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix,
    189                                    llvm::Value* start_index,
    190                                    llvm::Value* end_index, llvm::Value* stride,
    191                                    bool prevent_unrolling = false,
    192                                    bool prevent_vectorization = false);
    193 
    194   // Like the above, except that it defaults to a stride of one.
    195   std::unique_ptr<ForLoop> AddLoop(tensorflow::StringPiece suffix,
    196                                    llvm::Value* start_index,
    197                                    llvm::Value* end_index,
    198                                    bool prevent_unrolling = false,
    199                                    bool prevent_vectorization = false);
    200 
    201   // A convenient wrapper of the other flavor of AddLoop. The given start and
    202   // end index are constant.
    203   std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index,
    204                                    int64 stride, tensorflow::StringPiece suffix,
    205                                    bool prevent_unrolling = false,
    206                                    bool prevent_vectorization = false);
    207 
    208   // Like the above, except that it defaults to a stride of one.
    209   std::unique_ptr<ForLoop> AddLoop(int64 start_index, int64 end_index,
    210                                    tensorflow::StringPiece suffix,
    211                                    bool prevent_unrolling = false,
    212                                    bool prevent_vectorization = false);
    213 
    214   // Add loops to iterate through the indices within the specified
    215   // shape. The returned index collects the induction variables of the
    216   // loops so that it will iterate through all coordinates within the
    217   // specified shape.
    218   //
    219   // E.g. if you pass in a 2x3 shape, you will get back an index with
    220   // two entries that are induction variables of the two loops that
    221   // will be added. That index will iterate through the 6 coordinates
    222   // within the shape. One possible order for that sequence would be:
    223   //
    224   //   (0,0), (0,1), (0,2), (1,0), (1,1), (1,2)
    225   IrArray::Index AddLoopsForShape(const Shape& shape,
    226                                   tensorflow::StringPiece suffix);
    227 
    228   // Add a loop for each dimension in "dimensions". "suffix" is the
    229   // name suffix of the indvar and basic blocks in this new loop nest.
    230   //
    231   // The return value is an index with the induction variables. The
    232   // size equals the rank of shape and there is a null for each
    233   // dimension that is not in "dimensions".
    234   IrArray::Index AddLoopsForShapeOnDimensions(
    235       const Shape& shape, tensorflow::gtl::ArraySlice<int64> dimensions,
    236       tensorflow::StringPiece suffix);
    237 
    238   // Convenience methods which return particular basic blocks of the outermost
    239   // or innermost loops. These methods return nullptr if no loops have been
    240   // added yet.
    241   llvm::BasicBlock* GetOuterLoopPreheaderBasicBlock() {
    242     return outer_loop_preheader_bb_;
    243   }
    244   llvm::BasicBlock* GetOuterLoopExitBasicBlock() { return outer_loop_exit_bb_; }
    245   llvm::BasicBlock* GetInnerLoopBodyBasicBlock() { return inner_loop_body_bb_; }
    246 
    247  private:
    248   // Human-friendly name of the loop nest.
    249   string name_;
    250 
    251   // The preheader and exit basic block of the outermost loop, or nullptr if no
    252   // loop has been added yet.
    253   llvm::BasicBlock* outer_loop_preheader_bb_;
    254   llvm::BasicBlock* outer_loop_exit_bb_;
    255 
    256   // The body basic block of the most-recently added loop, or nullptr if no loop
    257   // has been added yet.
    258   llvm::BasicBlock* inner_loop_body_bb_;
    259 
    260   llvm::IRBuilder<>* ir_builder_;
    261 
    262   TF_DISALLOW_COPY_AND_ASSIGN(ForLoopNest);
    263 };
    264 
    265 }  // namespace llvm_ir
    266 }  // namespace xla
    267 
    268 #endif  // TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_LLVM_LOOP_H_
    269