Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceTargetLowering.h - Lowering interface -----*- C++ -*-===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Declares the TargetLowering, LoweringContext, and TargetDataLowering
     12 /// classes.
     13 ///
     14 /// TargetLowering is an abstract class used to drive the translation/lowering
     15 /// process. LoweringContext maintains a context for lowering each instruction,
     16 /// offering conveniences such as iterating over non-deleted instructions.
     17 /// TargetDataLowering is an abstract class used to drive the lowering/emission
     18 /// of global initializers, external global declarations, and internal constant
     19 /// pools.
     20 ///
     21 //===----------------------------------------------------------------------===//
     22 
     23 #ifndef SUBZERO_SRC_ICETARGETLOWERING_H
     24 #define SUBZERO_SRC_ICETARGETLOWERING_H
     25 
     26 #include "IceBitVector.h"
     27 #include "IceCfgNode.h"
     28 #include "IceDefs.h"
     29 #include "IceInst.h" // for the names of the Inst subtypes
     30 #include "IceOperand.h"
     31 #include "IceRegAlloc.h"
     32 #include "IceTypes.h"
     33 
     34 #include <utility>
     35 
     36 namespace Ice {
     37 
     38 // UnimplementedError is defined as a macro so that we can get actual line
     39 // numbers.
     40 #define UnimplementedError(Flags)                                              \
     41   do {                                                                         \
     42     if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) {         \
     43       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
     44          better stack traces. */                                               \
     45       llvm_unreachable("Not yet implemented");                                 \
     46       abort();                                                                 \
     47     }                                                                          \
     48   } while (0)
     49 
     50 // UnimplementedLoweringError is similar in style to UnimplementedError.  Given
     51 // a TargetLowering object pointer and an Inst pointer, it adds appropriate
     52 // FakeDef and FakeUse instructions to try maintain liveness consistency.
     53 #define UnimplementedLoweringError(Target, Instr)                              \
     54   do {                                                                         \
     55     if (getFlags().getSkipUnimplemented()) {                                   \
     56       (Target)->addFakeDefUses(Instr);                                         \
     57     } else {                                                                   \
     58       /* Use llvm_unreachable instead of report_fatal_error, which gives       \
     59          better stack traces. */                                               \
     60       llvm_unreachable(                                                        \
     61           (std::string("Not yet implemented: ") + Instr->getInstName())        \
     62               .c_str());                                                       \
     63       abort();                                                                 \
     64     }                                                                          \
     65   } while (0)
     66 
     67 /// LoweringContext makes it easy to iterate through non-deleted instructions in
     68 /// a node, and insert new (lowered) instructions at the current point. Along
     69 /// with the instruction list container and associated iterators, it holds the
     70 /// current node, which is needed when inserting new instructions in order to
     71 /// track whether variables are used as single-block or multi-block.
     72 class LoweringContext {
     73   LoweringContext(const LoweringContext &) = delete;
     74   LoweringContext &operator=(const LoweringContext &) = delete;
     75 
     76 public:
     77   LoweringContext() = default;
     78   ~LoweringContext() = default;
     79   void init(CfgNode *Node);
     80   Inst *getNextInst() const {
     81     if (Next == End)
     82       return nullptr;
     83     return iteratorToInst(Next);
     84   }
     85   Inst *getNextInst(InstList::iterator &Iter) const {
     86     advanceForward(Iter);
     87     if (Iter == End)
     88       return nullptr;
     89     return iteratorToInst(Iter);
     90   }
     91   CfgNode *getNode() const { return Node; }
     92   bool atEnd() const { return Cur == End; }
     93   InstList::iterator getCur() const { return Cur; }
     94   InstList::iterator getNext() const { return Next; }
     95   InstList::iterator getEnd() const { return End; }
     96   void insert(Inst *Instr);
     97   template <typename Inst, typename... Args> Inst *insert(Args &&... A) {
     98     auto *New = Inst::create(Node->getCfg(), std::forward<Args>(A)...);
     99     insert(New);
    100     return New;
    101   }
    102   Inst *getLastInserted() const;
    103   void advanceCur() { Cur = Next; }
    104   void advanceNext() { advanceForward(Next); }
    105   void setCur(InstList::iterator C) { Cur = C; }
    106   void setNext(InstList::iterator N) { Next = N; }
    107   void rewind();
    108   void setInsertPoint(const InstList::iterator &Position) { Next = Position; }
    109   void availabilityReset();
    110   void availabilityUpdate();
    111   Variable *availabilityGet(Operand *Src) const;
    112 
    113 private:
    114   /// Node is the argument to Inst::updateVars().
    115   CfgNode *Node = nullptr;
    116   Inst *LastInserted = nullptr;
    117   /// Cur points to the current instruction being considered. It is guaranteed
    118   /// to point to a non-deleted instruction, or to be End.
    119   InstList::iterator Cur;
    120   /// Next doubles as a pointer to the next valid instruction (if any), and the
    121   /// new-instruction insertion point. It is also updated for the caller in case
    122   /// the lowering consumes more than one high-level instruction. It is
    123   /// guaranteed to point to a non-deleted instruction after Cur, or to be End.
    124   // TODO: Consider separating the notion of "next valid instruction" and "new
    125   // instruction insertion point", to avoid confusion when previously-deleted
    126   // instructions come between the two points.
    127   InstList::iterator Next;
    128   /// Begin is a copy of Insts.begin(), used if iterators are moved backward.
    129   InstList::iterator Begin;
    130   /// End is a copy of Insts.end(), used if Next needs to be advanced.
    131   InstList::iterator End;
    132   /// LastDest and LastSrc capture the parameters of the last "Dest=Src" simple
    133   /// assignment inserted (provided Src is a variable).  This is used for simple
    134   /// availability analysis.
    135   Variable *LastDest = nullptr;
    136   Variable *LastSrc = nullptr;
    137 
    138   void skipDeleted(InstList::iterator &I) const;
    139   void advanceForward(InstList::iterator &I) const;
    140 };
    141 
    142 /// A helper class to advance the LoweringContext at each loop iteration.
    143 class PostIncrLoweringContext {
    144   PostIncrLoweringContext() = delete;
    145   PostIncrLoweringContext(const PostIncrLoweringContext &) = delete;
    146   PostIncrLoweringContext &operator=(const PostIncrLoweringContext &) = delete;
    147 
    148 public:
    149   explicit PostIncrLoweringContext(LoweringContext &Context)
    150       : Context(Context) {}
    151   ~PostIncrLoweringContext() {
    152     Context.advanceCur();
    153     Context.advanceNext();
    154   }
    155 
    156 private:
    157   LoweringContext &Context;
    158 };
    159 
    160 /// TargetLowering is the base class for all backends in Subzero. In addition to
    161 /// implementing the abstract methods in this class, each concrete target must
    162 /// also implement a named constructor in its own namespace. For instance, for
    163 /// X8632 we have:
    164 ///
    165 ///  namespace X8632 {
    166 ///    void createTargetLowering(Cfg *Func);
    167 ///  }
    168 class TargetLowering {
    169   TargetLowering() = delete;
    170   TargetLowering(const TargetLowering &) = delete;
    171   TargetLowering &operator=(const TargetLowering &) = delete;
    172 
    173 public:
    174   static void staticInit(GlobalContext *Ctx);
    175   // Each target must define a public static method:
    176   //   static void staticInit(GlobalContext *Ctx);
    177   static bool shouldBePooled(const class Constant *C);
    178   static Type getPointerType();
    179 
    180   static std::unique_ptr<TargetLowering> createLowering(TargetArch Target,
    181                                                         Cfg *Func);
    182 
    183   virtual std::unique_ptr<Assembler> createAssembler() const = 0;
    184 
    185   void translate() {
    186     switch (Func->getOptLevel()) {
    187     case Opt_m1:
    188       translateOm1();
    189       break;
    190     case Opt_0:
    191       translateO0();
    192       break;
    193     case Opt_1:
    194       translateO1();
    195       break;
    196     case Opt_2:
    197       translateO2();
    198       break;
    199     }
    200   }
    201   virtual void translateOm1() {
    202     Func->setError("Target doesn't specify Om1 lowering steps.");
    203   }
    204   virtual void translateO0() {
    205     Func->setError("Target doesn't specify O0 lowering steps.");
    206   }
    207   virtual void translateO1() {
    208     Func->setError("Target doesn't specify O1 lowering steps.");
    209   }
    210   virtual void translateO2() {
    211     Func->setError("Target doesn't specify O2 lowering steps.");
    212   }
    213 
    214   /// Generates calls to intrinsics for operations the Target can't handle.
    215   void genTargetHelperCalls();
    216   /// Tries to do address mode optimization on a single instruction.
    217   void doAddressOpt();
    218   /// Randomly insert NOPs.
    219   void doNopInsertion(RandomNumberGenerator &RNG);
    220   /// Lowers a single non-Phi instruction.
    221   void lower();
    222   /// Inserts and lowers a single high-level instruction at a specific insertion
    223   /// point.
    224   void lowerInst(CfgNode *Node, InstList::iterator Next, InstHighLevel *Instr);
    225   /// Does preliminary lowering of the set of Phi instructions in the current
    226   /// node. The main intention is to do what's needed to keep the unlowered Phi
    227   /// instructions consistent with the lowered non-Phi instructions, e.g. to
    228   /// lower 64-bit operands on a 32-bit target.
    229   virtual void prelowerPhis() {}
    230   /// Tries to do branch optimization on a single instruction. Returns true if
    231   /// some optimization was done.
    232   virtual bool doBranchOpt(Inst * /*I*/, const CfgNode * /*NextNode*/) {
    233     return false;
    234   }
    235 
    236   virtual SizeT getNumRegisters() const = 0;
    237   /// Returns a variable pre-colored to the specified physical register. This is
    238   /// generally used to get very direct access to the register such as in the
    239   /// prolog or epilog or for marking scratch registers as killed by a call. If
    240   /// a Type is not provided, a target-specific default type is used.
    241   virtual Variable *getPhysicalRegister(RegNumT RegNum,
    242                                         Type Ty = IceType_void) = 0;
    243   /// Returns a printable name for the register.
    244   virtual const char *getRegName(RegNumT RegNum, Type Ty) const = 0;
    245 
    246   virtual bool hasFramePointer() const { return false; }
    247   virtual void setHasFramePointer() = 0;
    248   virtual RegNumT getStackReg() const = 0;
    249   virtual RegNumT getFrameReg() const = 0;
    250   virtual RegNumT getFrameOrStackReg() const = 0;
    251   virtual size_t typeWidthInBytesOnStack(Type Ty) const = 0;
    252   virtual uint32_t getStackAlignment() const = 0;
    253   virtual bool needsStackPointerAlignment() const { return false; }
    254   virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
    255   virtual int32_t getFrameFixedAllocaOffset() const = 0;
    256   virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
    257   // Addressing relative to frame pointer differs in MIPS compared to X86/ARM
    258   // since MIPS decrements its stack pointer prior to saving it in the frame
    259   // pointer register.
    260   virtual uint32_t getFramePointerOffset(uint32_t CurrentOffset,
    261                                          uint32_t Size) const {
    262     return -(CurrentOffset + Size);
    263   }
    264   /// Return whether a 64-bit Variable should be split into a Variable64On32.
    265   virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
    266 
    267   /// Return whether a Vector Variable should be split into a VariableVecOn32.
    268   virtual bool shouldSplitToVariableVecOn32(Type Ty) const {
    269     (void)Ty;
    270     return false;
    271   }
    272 
    273   bool hasComputedFrame() const { return HasComputedFrame; }
    274   /// Returns true if this function calls a function that has the "returns
    275   /// twice" attribute.
    276   bool callsReturnsTwice() const { return CallsReturnsTwice; }
    277   void setCallsReturnsTwice(bool RetTwice) { CallsReturnsTwice = RetTwice; }
    278   SizeT makeNextLabelNumber() { return NextLabelNumber++; }
    279   SizeT makeNextJumpTableNumber() { return NextJumpTableNumber++; }
    280   LoweringContext &getContext() { return Context; }
    281   Cfg *getFunc() const { return Func; }
    282   GlobalContext *getGlobalContext() const { return Ctx; }
    283 
    284   enum RegSet {
    285     RegSet_None = 0,
    286     RegSet_CallerSave = 1 << 0,
    287     RegSet_CalleeSave = 1 << 1,
    288     RegSet_StackPointer = 1 << 2,
    289     RegSet_FramePointer = 1 << 3,
    290     RegSet_All = ~RegSet_None
    291   };
    292   using RegSetMask = uint32_t;
    293 
    294   virtual SmallBitVector getRegisterSet(RegSetMask Include,
    295                                         RegSetMask Exclude) const = 0;
    296   /// Get the set of physical registers available for the specified Variable's
    297   /// register class, applying register restrictions from the command line.
    298   virtual const SmallBitVector &
    299   getRegistersForVariable(const Variable *Var) const = 0;
    300   /// Get the set of *all* physical registers available for the specified
    301   /// Variable's register class, *not* applying register restrictions from the
    302   /// command line.
    303   virtual const SmallBitVector &
    304   getAllRegistersForVariable(const Variable *Var) const = 0;
    305   virtual const SmallBitVector &getAliasesForRegister(RegNumT) const = 0;
    306 
    307   void regAlloc(RegAllocKind Kind);
    308   void postRegallocSplitting(const SmallBitVector &RegMask);
    309 
    310   virtual void
    311   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
    312                                 const SmallBitVector &ExcludeRegisters,
    313                                 uint64_t Salt) const = 0;
    314 
    315   /// Get the minimum number of clusters required for a jump table to be
    316   /// considered.
    317   virtual SizeT getMinJumpTableSize() const = 0;
    318   virtual void emitJumpTable(const Cfg *Func,
    319                              const InstJumpTable *JumpTable) const = 0;
    320 
    321   virtual void emitVariable(const Variable *Var) const = 0;
    322 
    323   void emitWithoutPrefix(const ConstantRelocatable *CR,
    324                          const char *Suffix = "") const;
    325 
    326   virtual void emit(const ConstantInteger32 *C) const = 0;
    327   virtual void emit(const ConstantInteger64 *C) const = 0;
    328   virtual void emit(const ConstantFloat *C) const = 0;
    329   virtual void emit(const ConstantDouble *C) const = 0;
    330   virtual void emit(const ConstantUndef *C) const = 0;
    331   virtual void emit(const ConstantRelocatable *CR) const = 0;
    332 
    333   /// Performs target-specific argument lowering.
    334   virtual void lowerArguments() = 0;
    335 
    336   virtual void initNodeForLowering(CfgNode *) {}
    337   virtual void addProlog(CfgNode *Node) = 0;
    338   virtual void addEpilog(CfgNode *Node) = 0;
    339 
    340   /// Create a properly-typed "mov" instruction.  This is primarily for local
    341   /// variable splitting.
    342   virtual Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) {
    343     // TODO(stichnot): make pure virtual by implementing for all targets
    344     (void)Dest;
    345     (void)SrcVar;
    346     llvm::report_fatal_error("createLoweredMove() unimplemented");
    347     return nullptr;
    348   }
    349 
    350   virtual ~TargetLowering() = default;
    351 
    352 private:
    353   // This control variable is used by AutoBundle (RAII-style bundle
    354   // locking/unlocking) to prevent nested bundles.
    355   bool AutoBundling = false;
    356 
    357   /// This indicates whether we are in the genTargetHelperCalls phase, and
    358   /// therefore can do things like scalarization.
    359   bool GeneratingTargetHelpers = false;
    360 
    361   // _bundle_lock(), and _bundle_unlock(), were made private to force subtargets
    362   // to use the AutoBundle helper.
    363   void
    364   _bundle_lock(InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) {
    365     Context.insert<InstBundleLock>(BundleOption);
    366   }
    367   void _bundle_unlock() { Context.insert<InstBundleUnlock>(); }
    368 
    369 protected:
    370   /// AutoBundle provides RIAA-style bundling. Sub-targets are expected to use
    371   /// it when emitting NaCl Bundles to ensure proper bundle_unlocking, and
    372   /// prevent nested bundles.
    373   ///
    374   /// AutoBundle objects will emit a _bundle_lock during construction (but only
    375   /// if sandboxed code generation was requested), and a bundle_unlock() during
    376   /// destruction. By carefully scoping objects of this type, Subtargets can
    377   /// ensure proper bundle emission.
    378   class AutoBundle {
    379     AutoBundle() = delete;
    380     AutoBundle(const AutoBundle &) = delete;
    381     AutoBundle &operator=(const AutoBundle &) = delete;
    382 
    383   public:
    384     explicit AutoBundle(TargetLowering *Target, InstBundleLock::Option Option =
    385                                                     InstBundleLock::Opt_None);
    386     ~AutoBundle();
    387 
    388   private:
    389     TargetLowering *const Target;
    390     const bool NeedSandboxing;
    391   };
    392 
    393   explicit TargetLowering(Cfg *Func);
    394   // Applies command line filters to TypeToRegisterSet array.
    395   static void filterTypeToRegisterSet(
    396       GlobalContext *Ctx, int32_t NumRegs, SmallBitVector TypeToRegisterSet[],
    397       size_t TypeToRegisterSetSize,
    398       std::function<std::string(RegNumT)> getRegName,
    399       std::function<const char *(RegClass)> getRegClassName);
    400   virtual void lowerAlloca(const InstAlloca *Instr) = 0;
    401   virtual void lowerArithmetic(const InstArithmetic *Instr) = 0;
    402   virtual void lowerAssign(const InstAssign *Instr) = 0;
    403   virtual void lowerBr(const InstBr *Instr) = 0;
    404   virtual void lowerBreakpoint(const InstBreakpoint *Instr) = 0;
    405   virtual void lowerCall(const InstCall *Instr) = 0;
    406   virtual void lowerCast(const InstCast *Instr) = 0;
    407   virtual void lowerFcmp(const InstFcmp *Instr) = 0;
    408   virtual void lowerExtractElement(const InstExtractElement *Instr) = 0;
    409   virtual void lowerIcmp(const InstIcmp *Instr) = 0;
    410   virtual void lowerInsertElement(const InstInsertElement *Instr) = 0;
    411   virtual void lowerIntrinsicCall(const InstIntrinsicCall *Instr) = 0;
    412   virtual void lowerLoad(const InstLoad *Instr) = 0;
    413   virtual void lowerPhi(const InstPhi *Instr) = 0;
    414   virtual void lowerRet(const InstRet *Instr) = 0;
    415   virtual void lowerSelect(const InstSelect *Instr) = 0;
    416   virtual void lowerShuffleVector(const InstShuffleVector *Instr) = 0;
    417   virtual void lowerStore(const InstStore *Instr) = 0;
    418   virtual void lowerSwitch(const InstSwitch *Instr) = 0;
    419   virtual void lowerUnreachable(const InstUnreachable *Instr) = 0;
    420   virtual void lowerOther(const Inst *Instr);
    421 
    422   virtual void genTargetHelperCallFor(Inst *Instr) = 0;
    423   virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
    424 
    425   /// Opportunity to modify other instructions to help Address Optimization
    426   virtual void doAddressOptOther() {}
    427   virtual void doAddressOptLoad() {}
    428   virtual void doAddressOptStore() {}
    429   virtual void doAddressOptLoadSubVector() {}
    430   virtual void doAddressOptStoreSubVector() {}
    431   virtual void doMockBoundsCheck(Operand *) {}
    432   virtual void randomlyInsertNop(float Probability,
    433                                  RandomNumberGenerator &RNG) = 0;
    434   /// This gives the target an opportunity to post-process the lowered expansion
    435   /// before returning.
    436   virtual void postLower() {}
    437 
    438   /// When the SkipUnimplemented flag is set, addFakeDefUses() gets invoked by
    439   /// the UnimplementedLoweringError macro to insert fake uses of all the
    440   /// instruction variables and a fake def of the instruction dest, in order to
    441   /// preserve integrity of liveness analysis.
    442   void addFakeDefUses(const Inst *Instr);
    443 
    444   /// Find (non-SSA) instructions where the Dest variable appears in some source
    445   /// operand, and set the IsDestRedefined flag.  This keeps liveness analysis
    446   /// consistent.
    447   void markRedefinitions();
    448 
    449   /// Make a pass over the Cfg to determine which variables need stack slots and
    450   /// place them in a sorted list (SortedSpilledVariables). Among those, vars,
    451   /// classify the spill variables as local to the basic block vs global
    452   /// (multi-block) in order to compute the parameters GlobalsSize and
    453   /// SpillAreaSizeBytes (represents locals or general vars if the coalescing of
    454   /// locals is disallowed) along with alignments required for variables in each
    455   /// area. We rely on accurate VMetadata in order to classify a variable as
    456   /// global vs local (otherwise the variable is conservatively global). The
    457   /// in-args should be initialized to 0.
    458   ///
    459   /// This is only a pre-pass and the actual stack slot assignment is handled
    460   /// separately.
    461   ///
    462   /// There may be target-specific Variable types, which will be handled by
    463   /// TargetVarHook. If the TargetVarHook returns true, then the variable is
    464   /// skipped and not considered with the rest of the spilled variables.
    465   void getVarStackSlotParams(VarList &SortedSpilledVariables,
    466                              SmallBitVector &RegsUsed, size_t *GlobalsSize,
    467                              size_t *SpillAreaSizeBytes,
    468                              uint32_t *SpillAreaAlignmentBytes,
    469                              uint32_t *LocalsSlotsAlignmentBytes,
    470                              std::function<bool(Variable *)> TargetVarHook);
    471 
    472   /// Calculate the amount of padding needed to align the local and global areas
    473   /// to the required alignment. This assumes the globals/locals layout used by
    474   /// getVarStackSlotParams and assignVarStackSlots.
    475   void alignStackSpillAreas(uint32_t SpillAreaStartOffset,
    476                             uint32_t SpillAreaAlignmentBytes,
    477                             size_t GlobalsSize,
    478                             uint32_t LocalsSlotsAlignmentBytes,
    479                             uint32_t *SpillAreaPaddingBytes,
    480                             uint32_t *LocalsSlotsPaddingBytes);
    481 
    482   /// Make a pass through the SortedSpilledVariables and actually assign stack
    483   /// slots. SpillAreaPaddingBytes takes into account stack alignment padding.
    484   /// The SpillArea starts after that amount of padding. This matches the scheme
    485   /// in getVarStackSlotParams, where there may be a separate multi-block global
    486   /// var spill area and a local var spill area.
    487   void assignVarStackSlots(VarList &SortedSpilledVariables,
    488                            size_t SpillAreaPaddingBytes,
    489                            size_t SpillAreaSizeBytes,
    490                            size_t GlobalsAndSubsequentPaddingSize,
    491                            bool UsesFramePointer);
    492 
    493   /// Sort the variables in Source based on required alignment. The variables
    494   /// with the largest alignment need are placed in the front of the Dest list.
    495   void sortVarsByAlignment(VarList &Dest, const VarList &Source) const;
    496 
    497   InstCall *makeHelperCall(RuntimeHelper FuncID, Variable *Dest, SizeT MaxSrcs);
    498 
    499   void _set_dest_redefined() { Context.getLastInserted()->setDestRedefined(); }
    500 
    501   bool shouldOptimizeMemIntrins();
    502 
    503   void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
    504                            Operand *Src0, Operand *Src1);
    505 
    506   /// Generalizes scalarizeArithmetic to support other instruction types.
    507   ///
    508   /// insertScalarInstruction is a function-like object with signature
    509   /// (Variable *Dest, Variable *Src0, Variable *Src1) -> Instr *.
    510   template <typename... Operands,
    511             typename F = std::function<Inst *(Variable *, Operands *...)>>
    512   void scalarizeInstruction(Variable *Dest, F insertScalarInstruction,
    513                             Operands *... Srcs) {
    514     assert(GeneratingTargetHelpers &&
    515            "scalarizeInstruction called during incorrect phase");
    516     const Type DestTy = Dest->getType();
    517     assert(isVectorType(DestTy));
    518     const Type DestElementTy = typeElementType(DestTy);
    519     const SizeT NumElements = typeNumElements(DestTy);
    520 
    521     Variable *T = Func->makeVariable(DestTy);
    522     if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T)) {
    523       VarVecOn32->initVecElement(Func);
    524       auto *Undef = ConstantUndef::create(Ctx, DestTy);
    525       Context.insert<InstAssign>(T, Undef);
    526     } else {
    527       Context.insert<InstFakeDef>(T);
    528     }
    529 
    530     for (SizeT I = 0; I < NumElements; ++I) {
    531       auto *Index = Ctx->getConstantInt32(I);
    532 
    533       auto makeExtractThunk = [this, Index, NumElements](Operand *Src) {
    534         return [this, Index, NumElements, Src]() {
    535           assert(typeNumElements(Src->getType()) == NumElements);
    536 
    537           const auto ElementTy = typeElementType(Src->getType());
    538           auto *Op = Func->makeVariable(ElementTy);
    539           Context.insert<InstExtractElement>(Op, Src, Index);
    540           return Op;
    541         };
    542       };
    543 
    544       // Perform the operation as a scalar operation.
    545       auto *Res = Func->makeVariable(DestElementTy);
    546       auto *Arith = applyToThunkedArgs(insertScalarInstruction, Res,
    547                                        makeExtractThunk(Srcs)...);
    548       genTargetHelperCallFor(Arith);
    549 
    550       Variable *DestT = Func->makeVariable(DestTy);
    551       Context.insert<InstInsertElement>(DestT, T, Res, Index);
    552       T = DestT;
    553     }
    554     Context.insert<InstAssign>(Dest, T);
    555   }
    556 
    557   // applyToThunkedArgs is used by scalarizeInstruction. Ideally, we would just
    558   // call insertScalarInstruction(Res, Srcs...), but C++ does not specify
    559   // evaluation order which means this leads to an unpredictable final
    560   // output. Instead, we wrap each of the Srcs in a thunk and these
    561   // applyToThunkedArgs functions apply the thunks in a well defined order so we
    562   // still get well-defined output.
    563   Inst *applyToThunkedArgs(
    564       std::function<Inst *(Variable *, Variable *)> insertScalarInstruction,
    565       Variable *Res, std::function<Variable *()> thunk0) {
    566     auto *Src0 = thunk0();
    567     return insertScalarInstruction(Res, Src0);
    568   }
    569 
    570   Inst *
    571   applyToThunkedArgs(std::function<Inst *(Variable *, Variable *, Variable *)>
    572                          insertScalarInstruction,
    573                      Variable *Res, std::function<Variable *()> thunk0,
    574                      std::function<Variable *()> thunk1) {
    575     auto *Src0 = thunk0();
    576     auto *Src1 = thunk1();
    577     return insertScalarInstruction(Res, Src0, Src1);
    578   }
    579 
    580   Inst *applyToThunkedArgs(
    581       std::function<Inst *(Variable *, Variable *, Variable *, Variable *)>
    582           insertScalarInstruction,
    583       Variable *Res, std::function<Variable *()> thunk0,
    584       std::function<Variable *()> thunk1, std::function<Variable *()> thunk2) {
    585     auto *Src0 = thunk0();
    586     auto *Src1 = thunk1();
    587     auto *Src2 = thunk2();
    588     return insertScalarInstruction(Res, Src0, Src1, Src2);
    589   }
    590 
    591   /// SandboxType enumerates all possible sandboxing strategies that
    592   enum SandboxType {
    593     ST_None,
    594     ST_NaCl,
    595     ST_Nonsfi,
    596   };
    597 
    598   static SandboxType determineSandboxTypeFromFlags(const ClFlags &Flags);
    599 
    600   Cfg *Func;
    601   GlobalContext *Ctx;
    602   bool HasComputedFrame = false;
    603   bool CallsReturnsTwice = false;
    604   SizeT NextLabelNumber = 0;
    605   SizeT NextJumpTableNumber = 0;
    606   LoweringContext Context;
    607   const SandboxType SandboxingType = ST_None;
    608 
    609   const static constexpr char *H_getIP_prefix = "__Sz_getIP_";
    610 };
    611 
    612 /// TargetDataLowering is used for "lowering" data including initializers for
    613 /// global variables, and the internal constant pools. It is separated out from
    614 /// TargetLowering because it does not require a Cfg.
    615 class TargetDataLowering {
    616   TargetDataLowering() = delete;
    617   TargetDataLowering(const TargetDataLowering &) = delete;
    618   TargetDataLowering &operator=(const TargetDataLowering &) = delete;
    619 
    620 public:
    621   static std::unique_ptr<TargetDataLowering> createLowering(GlobalContext *Ctx);
    622   virtual ~TargetDataLowering();
    623 
    624   virtual void lowerGlobals(const VariableDeclarationList &Vars,
    625                             const std::string &SectionSuffix) = 0;
    626   virtual void lowerConstants() = 0;
    627   virtual void lowerJumpTables() = 0;
    628   virtual void emitTargetRODataSections() {}
    629 
    630 protected:
    631   void emitGlobal(const VariableDeclaration &Var,
    632                   const std::string &SectionSuffix);
    633 
    634   /// For now, we assume .long is the right directive for emitting 4 byte emit
    635   /// global relocations. However, LLVM MIPS usually uses .4byte instead.
    636   /// Perhaps there is some difference when the location is unaligned.
    637   static const char *getEmit32Directive() { return ".long"; }
    638 
    639   explicit TargetDataLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
    640   GlobalContext *Ctx;
    641 };
    642 
    643 /// TargetHeaderLowering is used to "lower" the header of an output file. It
    644 /// writes out the target-specific header attributes. E.g., for ARM this writes
    645 /// out the build attributes (float ABI, etc.).
    646 class TargetHeaderLowering {
    647   TargetHeaderLowering() = delete;
    648   TargetHeaderLowering(const TargetHeaderLowering &) = delete;
    649   TargetHeaderLowering &operator=(const TargetHeaderLowering &) = delete;
    650 
    651 public:
    652   static std::unique_ptr<TargetHeaderLowering>
    653   createLowering(GlobalContext *Ctx);
    654   virtual ~TargetHeaderLowering();
    655 
    656   virtual void lower() {}
    657 
    658 protected:
    659   explicit TargetHeaderLowering(GlobalContext *Ctx) : Ctx(Ctx) {}
    660   GlobalContext *Ctx;
    661 };
    662 
    663 } // end of namespace Ice
    664 
    665 #endif // SUBZERO_SRC_ICETARGETLOWERING_H
    666