Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Declares the TargetLoweringARM32 class, which implements the
     12 /// TargetLowering interface for the ARM 32-bit architecture.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
     17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
     18 
     19 #include "IceAssemblerARM32.h"
     20 #include "IceDefs.h"
     21 #include "IceInstARM32.h"
     22 #include "IceRegistersARM32.h"
     23 #include "IceTargetLowering.h"
     24 
     25 #include <utility>
     26 
     27 namespace Ice {
     28 namespace ARM32 {
     29 
     30 // Class encapsulating ARM cpu features / instruction set.
     31 class TargetARM32Features {
     32   TargetARM32Features() = delete;
     33   TargetARM32Features(const TargetARM32Features &) = delete;
     34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
     35 
     36 public:
     37   explicit TargetARM32Features(const ClFlags &Flags);
     38 
     39   enum ARM32InstructionSet {
     40     Begin,
     41     // Neon is the PNaCl baseline instruction set.
     42     Neon = Begin,
     43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
     44     End
     45   };
     46 
     47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
     48 
     49 private:
     50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
     51 };
     52 
     53 // The target lowering logic for ARM32.
     54 class TargetARM32 : public TargetLowering {
     55   TargetARM32() = delete;
     56   TargetARM32(const TargetARM32 &) = delete;
     57   TargetARM32 &operator=(const TargetARM32 &) = delete;
     58 
     59 public:
     60   static void staticInit(GlobalContext *Ctx);
     61 
     62   static bool shouldBePooled(const Constant *C) {
     63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
     64       return !Utils::isPositiveZero(ConstDouble->getValue());
     65     }
     66     if (llvm::isa<ConstantFloat>(C))
     67       return true;
     68     return false;
     69   }
     70 
     71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
     72 
     73   // TODO(jvoung): return a unique_ptr.
     74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
     75     return makeUnique<TargetARM32>(Func);
     76   }
     77 
     78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
     79     const bool IsNonsfi = SandboxingType == ST_Nonsfi;
     80     return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
     81   }
     82 
     83   void initNodeForLowering(CfgNode *Node) override {
     84     Computations.forgetProducers();
     85     Computations.recordProducers(Node);
     86     Computations.dump(Func);
     87   }
     88 
     89   void translateOm1() override;
     90   void translateO2() override;
     91   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
     92 
     93   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
     94   Variable *getPhysicalRegister(RegNumT RegNum,
     95                                 Type Ty = IceType_void) override;
     96   const char *getRegName(RegNumT RegNum, Type Ty) const override;
     97   SmallBitVector getRegisterSet(RegSetMask Include,
     98                                 RegSetMask Exclude) const override;
     99   const SmallBitVector &
    100   getRegistersForVariable(const Variable *Var) const override {
    101     RegClass RC = Var->getRegClass();
    102     switch (RC) {
    103     default:
    104       assert(RC < RC_Target);
    105       return TypeToRegisterSet[RC];
    106     case RegARM32::RCARM32_QtoS:
    107       return TypeToRegisterSet[RC];
    108     }
    109   }
    110   const SmallBitVector &
    111   getAllRegistersForVariable(const Variable *Var) const override {
    112     RegClass RC = Var->getRegClass();
    113     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
    114     return TypeToRegisterSetUnfiltered[RC];
    115   }
    116   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
    117     return RegisterAliases[Reg];
    118   }
    119   bool hasFramePointer() const override { return UsesFramePointer; }
    120   void setHasFramePointer() override { UsesFramePointer = true; }
    121   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
    122   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
    123   RegNumT getFrameOrStackReg() const override {
    124     return UsesFramePointer ? getFrameReg() : getStackReg();
    125   }
    126   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
    127 
    128   size_t typeWidthInBytesOnStack(Type Ty) const override {
    129     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
    130     // are rounded up to 4 bytes.
    131     return (typeWidthInBytes(Ty) + 3) & ~3;
    132   }
    133   uint32_t getStackAlignment() const override;
    134   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
    135     FixedAllocaSizeBytes = Size;
    136     assert(llvm::isPowerOf2_32(Align));
    137     FixedAllocaAlignBytes = Align;
    138     PrologEmitsFixedAllocas = true;
    139   }
    140   int32_t getFrameFixedAllocaOffset() const override {
    141     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
    142   }
    143   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
    144 
    145   bool shouldSplitToVariable64On32(Type Ty) const override {
    146     return Ty == IceType_i64;
    147   }
    148 
    149   // TODO(ascull): what size is best for ARM?
    150   SizeT getMinJumpTableSize() const override { return 3; }
    151   void emitJumpTable(const Cfg *Func,
    152                      const InstJumpTable *JumpTable) const override;
    153 
    154   void emitVariable(const Variable *Var) const override;
    155 
    156   void emit(const ConstantUndef *C) const final;
    157   void emit(const ConstantInteger32 *C) const final;
    158   void emit(const ConstantInteger64 *C) const final;
    159   void emit(const ConstantFloat *C) const final;
    160   void emit(const ConstantDouble *C) const final;
    161   void emit(const ConstantRelocatable *C) const final;
    162 
    163   void lowerArguments() override;
    164   void addProlog(CfgNode *Node) override;
    165   void addEpilog(CfgNode *Node) override;
    166 
    167   Operand *loOperand(Operand *Operand);
    168   Operand *hiOperand(Operand *Operand);
    169   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
    170                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
    171 
    172   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
    173     return CPUFeatures.hasFeature(I);
    174   }
    175 
    176   enum OperandLegalization {
    177     Legal_Reg = 1 << 0,  /// physical register, not stack location
    178     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
    179                          /// immediates, shifted registers, or modified fp imm.
    180     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
    181     Legal_Rematerializable = 1 << 3,
    182     Legal_Default = ~Legal_Rematerializable,
    183   };
    184 
    185   using LegalMask = uint32_t;
    186   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
    187   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
    188                     RegNumT RegNum = RegNumT());
    189   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
    190 
    191   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
    192     assert(ShAmtImm < 32);
    193     return OperandARM32ShAmtImm::create(
    194         Func,
    195         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
    196   }
    197 
    198   GlobalContext *getCtx() const { return Ctx; }
    199 
    200 protected:
    201   explicit TargetARM32(Cfg *Func);
    202 
    203   void postLower() override;
    204 
    205   enum SafeBoolChain {
    206     SBC_No,
    207     SBC_Yes,
    208   };
    209 
    210   void lowerAlloca(const InstAlloca *Instr) override;
    211   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
    212   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
    213                             Operand *Src0, Operand *Src1);
    214   void lowerArithmetic(const InstArithmetic *Instr) override;
    215   void lowerAssign(const InstAssign *Instr) override;
    216   void lowerBr(const InstBr *Instr) override;
    217   void lowerCall(const InstCall *Instr) override;
    218   void lowerCast(const InstCast *Instr) override;
    219   void lowerExtractElement(const InstExtractElement *Instr) override;
    220 
    221   /// CondWhenTrue is a helper type returned by every method in the lowering
    222   /// that emits code to set the condition codes.
    223   class CondWhenTrue {
    224   public:
    225     explicit CondWhenTrue(CondARM32::Cond T0,
    226                           CondARM32::Cond T1 = CondARM32::kNone)
    227         : WhenTrue0(T0), WhenTrue1(T1) {
    228       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
    229       assert(T1 != T0 || T0 == CondARM32::kNone);
    230     }
    231     CondARM32::Cond WhenTrue0;
    232     CondARM32::Cond WhenTrue1;
    233 
    234     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
    235     CondWhenTrue invert() const {
    236       switch (WhenTrue0) {
    237       default:
    238         if (WhenTrue1 == CondARM32::kNone)
    239           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
    240         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
    241                             InstARM32::getOppositeCondition(WhenTrue1));
    242       case CondARM32::AL:
    243         return CondWhenTrue(CondARM32::kNone);
    244       case CondARM32::kNone:
    245         return CondWhenTrue(CondARM32::AL);
    246       }
    247     }
    248   };
    249 
    250   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
    251   void lowerFcmp(const InstFcmp *Instr) override;
    252   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
    253                                          Operand *Src0, Operand *Src1);
    254   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    255                                   Operand *Src1);
    256   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    257                                   Operand *Src1);
    258   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    259                              Operand *Src1);
    260   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
    261   void lowerIcmp(const InstIcmp *Instr) override;
    262   /// Emits the basic sequence for lower-linked/store-exclusive loops:
    263   ///
    264   /// retry:
    265   ///        ldrex tmp, [Addr]
    266   ///        StoreValue = Operation(tmp)
    267   ///        strexCond success, StoreValue, [Addr]
    268   ///        cmpCond success, #0
    269   ///        bne retry
    270   ///
    271   /// Operation needs to return which value to strex in Addr, it must not change
    272   /// the flags if Cond is not AL, and must not emit any instructions that could
    273   /// end up writing to memory. Operation also needs to handle fake-defing for
    274   /// i64 handling.
    275   void
    276   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
    277                                 std::function<Variable *(Variable *)> Operation,
    278                                 CondARM32::Cond Cond = CondARM32::AL);
    279   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
    280                            Operand *Val);
    281   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
    282                       Operand *Val);
    283   void lowerBreakpoint(const InstBreakpoint *Instr) override;
    284   void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
    285   void lowerInsertElement(const InstInsertElement *Instr) override;
    286   void lowerLoad(const InstLoad *Instr) override;
    287   void lowerPhi(const InstPhi *Instr) override;
    288   void lowerRet(const InstRet *Instr) override;
    289   void lowerSelect(const InstSelect *Instr) override;
    290   void lowerShuffleVector(const InstShuffleVector *Instr) override;
    291   void lowerStore(const InstStore *Instr) override;
    292   void lowerSwitch(const InstSwitch *Instr) override;
    293   void lowerUnreachable(const InstUnreachable *Instr) override;
    294   void prelowerPhis() override;
    295   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
    296   void genTargetHelperCallFor(Inst *Instr) override;
    297   void doAddressOptLoad() override;
    298   void doAddressOptStore() override;
    299   void randomlyInsertNop(float Probability,
    300                          RandomNumberGenerator &RNG) override;
    301 
    302   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
    303 
    304   Variable64On32 *makeI64RegPair();
    305   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
    306   static Type stackSlotType();
    307   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
    308   void alignRegisterPow2(Variable *Reg, uint32_t Align,
    309                          RegNumT TmpRegNum = RegNumT());
    310 
    311   /// Returns a vector in a register with the given constant entries.
    312   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
    313 
    314   void
    315   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
    316                                 const SmallBitVector &ExcludeRegisters,
    317                                 uint64_t Salt) const override;
    318 
    319   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
    320   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
    321   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
    322   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
    323                                          CondARM32::Cond);
    324   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
    325                                          CondARM32::Cond);
    326   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
    327                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
    328 
    329   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
    330 
    331   // The following are helpers that insert lowered ARM32 instructions with
    332   // minimal syntactic overhead, so that the lowering code can look as close to
    333   // assembly as practical.
    334   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
    335             CondARM32::Cond Pred = CondARM32::AL) {
    336     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
    337   }
    338   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
    339              CondARM32::Cond Pred = CondARM32::AL) {
    340     constexpr bool SetFlags = true;
    341     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
    342     if (SetFlags) {
    343       Context.insert<InstFakeUse>(Dest);
    344     }
    345   }
    346   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
    347             CondARM32::Cond Pred = CondARM32::AL) {
    348     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
    349   }
    350   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
    351             CondARM32::Cond Pred = CondARM32::AL) {
    352     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
    353   }
    354   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
    355             CondARM32::Cond Pred = CondARM32::AL) {
    356     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
    357   }
    358   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
    359             CondARM32::Cond Pred = CondARM32::AL) {
    360     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
    361   }
    362   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
    363            CondARM32::Cond Condition) {
    364     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
    365   }
    366   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
    367   void _br(CfgNode *Target, CondARM32::Cond Condition) {
    368     Context.insert<InstARM32Br>(Target, Condition);
    369   }
    370   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
    371     Context.insert<InstARM32Br>(Label, Condition);
    372   }
    373   void _cmn(Variable *Src0, Operand *Src1,
    374             CondARM32::Cond Pred = CondARM32::AL) {
    375     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
    376   }
    377   void _cmp(Variable *Src0, Operand *Src1,
    378             CondARM32::Cond Pred = CondARM32::AL) {
    379     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
    380   }
    381   void _clz(Variable *Dest, Variable *Src0,
    382             CondARM32::Cond Pred = CondARM32::AL) {
    383     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
    384   }
    385   void _dmb() { Context.insert<InstARM32Dmb>(); }
    386   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
    387             CondARM32::Cond Pred = CondARM32::AL) {
    388     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
    389   }
    390   /// _ldr, for all your memory to Variable data moves. It handles all types
    391   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
    392   /// type (e.g., no immediates for vector loads, and no index registers for fp
    393   /// loads.)
    394   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
    395             CondARM32::Cond Pred = CondARM32::AL) {
    396     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
    397   }
    398   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
    399                          CondARM32::Cond Pred = CondARM32::AL) {
    400     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
    401     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
    402       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
    403       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
    404     }
    405     return Ldrex;
    406   }
    407   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
    408             CondARM32::Cond Pred = CondARM32::AL) {
    409     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
    410   }
    411   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
    412              CondARM32::Cond Pred = CondARM32::AL) {
    413     constexpr bool SetFlags = true;
    414     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
    415     if (SetFlags) {
    416       Context.insert<InstFakeUse>(Dest);
    417     }
    418   }
    419   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
    420             CondARM32::Cond Pred = CondARM32::AL) {
    421     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
    422   }
    423   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
    424             CondARM32::Cond Pred = CondARM32::AL) {
    425     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
    426   }
    427   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
    428             CondARM32::Cond Pred = CondARM32::AL) {
    429     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
    430   }
    431   /// _mov, for all your Variable to Variable data movement needs. It handles
    432   /// all types (integer, floating point, and vectors), as well as moves between
    433   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
    434   /// confusing, non-uniform) rules for data moves in ARM.
    435   void _mov(Variable *Dest, Operand *Src0,
    436             CondARM32::Cond Pred = CondARM32::AL) {
    437     // _mov used to be unique in the sense that it would create a temporary
    438     // automagically if Dest was nullptr. It won't do that anymore, so we keep
    439     // an assert around just in case there is some untested code path where Dest
    440     // is nullptr.
    441     assert(Dest != nullptr);
    442     assert(!llvm::isa<OperandARM32Mem>(Src0));
    443     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
    444 
    445     if (Instr->isMultiDest()) {
    446       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
    447       // fake-def for Instr.DestHi here.
    448       assert(llvm::isa<Variable64On32>(Dest));
    449       Context.insert<InstFakeDef>(Instr->getDestHi());
    450     }
    451   }
    452 
    453   void _mov_redefined(Variable *Dest, Operand *Src0,
    454                       CondARM32::Cond Pred = CondARM32::AL) {
    455     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
    456     Instr->setDestRedefined();
    457     if (Instr->isMultiDest()) {
    458       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
    459       // fake-def for Instr.DestHi here.
    460       assert(llvm::isa<Variable64On32>(Dest));
    461       Context.insert<InstFakeDef>(Instr->getDestHi());
    462     }
    463   }
    464 
    465   void _nop() { Context.insert<InstARM32Nop>(); }
    466 
    467   // Generates a vmov instruction to extract the given index from a vector
    468   // register.
    469   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
    470                        CondARM32::Cond Pred = CondARM32::AL) {
    471     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
    472   }
    473 
    474   // Generates a vmov instruction to insert a value into the given index of a
    475   // vector register.
    476   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
    477                       CondARM32::Cond Pred = CondARM32::AL) {
    478     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
    479   }
    480 
    481   // --------------------------------------------------------------------------
    482   // Begin bool folding machinery.
    483   //
    484   // There are three types of boolean lowerings handled by this target:
    485   //
    486   // 1) Boolean expressions leading to a boolean Variable definition
    487   // ---------------------------------------------------------------
    488   //
    489   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
    490   // the defining basic block) we do not fold the operation. We instead
    491   // materialize (i.e., compute) the variable normally, so that it can be used
    492   // when needed. We also materialize i1 values that are not single use to
    493   // avoid code duplication. These expressions are not short circuited.
    494   //
    495   // 2) Boolean expressions leading to a select
    496   // ------------------------------------------
    497   //
    498   // These include boolean chains leading to a select instruction, as well as
    499   // i1 Sexts. These boolean expressions are lowered to:
    500   //
    501   // mov T, <false value>
    502   // CC <- eval(Boolean Expression)
    503   // movCC T, <true value>
    504   //
    505   // For Sexts, <false value> is 0, and <true value> is -1.
    506   //
    507   // 3) Boolean expressions leading to a br i1
    508   // -----------------------------------------
    509   //
    510   // These are the boolean chains leading to a branch. These chains are
    511   // short-circuited, i.e.:
    512   //
    513   //   A = or i1 B, C
    514   //   br i1 A, label %T, label %F
    515   //
    516   // becomes
    517   //
    518   //   tst B
    519   //   jne %T
    520   //   tst B
    521   //   jne %T
    522   //   j %F
    523   //
    524   // and
    525   //
    526   //   A = and i1 B, C
    527   //   br i1 A, label %T, label %F
    528   //
    529   // becomes
    530   //
    531   //   tst B
    532   //   jeq %F
    533   //   tst B
    534   //   jeq %F
    535   //   j %T
    536   //
    537   // Arbitrarily long chains are short circuited, e.g
    538   //
    539   //   A = or  i1 B, C
    540   //   D = and i1 A, E
    541   //   F = and i1 G, H
    542   //   I = or i1 D, F
    543   //   br i1 I, label %True, label %False
    544   //
    545   // becomes
    546   //
    547   // Label[A]:
    548   //   tst B, 1
    549   //   bne Label[D]
    550   //   tst C, 1
    551   //   beq Label[I]
    552   // Label[D]:
    553   //   tst E, 1
    554   //   bne %True
    555   // Label[I]
    556   //   tst G, 1
    557   //   beq %False
    558   //   tst H, 1
    559   //   beq %False (bne %True)
    560 
    561   /// lowerInt1 materializes Boolean to a Variable.
    562   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
    563 
    564   /// lowerInt1ForSelect generates the following instruction sequence:
    565   ///
    566   ///   mov T, FalseValue
    567   ///   CC <- eval(Boolean)
    568   ///   movCC T, TrueValue
    569   ///   mov Dest, T
    570   ///
    571   /// It is used for lowering select i1, as well as i1 Sext.
    572   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
    573                           Operand *FalseValue);
    574 
    575   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
    576   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
    577   /// create auxiliary labels for short circuiting the condition evaluation.
    578   class LowerInt1BranchTarget {
    579   public:
    580     explicit LowerInt1BranchTarget(CfgNode *const Target)
    581         : NodeTarget(Target) {}
    582     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
    583         : LabelTarget(Target) {}
    584 
    585     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
    586     /// is the exact copy of this if Label is nullptr; otherwise, the returned
    587     /// object will wrap Label instead.
    588     LowerInt1BranchTarget
    589     createForLabelOrDuplicate(InstARM32Label *Label) const {
    590       if (Label != nullptr)
    591         return LowerInt1BranchTarget(Label);
    592       if (NodeTarget)
    593         return LowerInt1BranchTarget(NodeTarget);
    594       return LowerInt1BranchTarget(LabelTarget);
    595     }
    596 
    597     CfgNode *const NodeTarget = nullptr;
    598     InstARM32Label *const LabelTarget = nullptr;
    599   };
    600 
    601   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
    602   /// determining which type arithmetic is allowed to be short circuited. This
    603   /// is useful for lowering
    604   ///
    605   ///   t1 = and i1 A, B
    606   ///   t2 = and i1 t1, C
    607   ///   br i1 t2, label %False, label %True
    608   ///
    609   /// to
    610   ///
    611   ///   tst A, 1
    612   ///   beq %False
    613   ///   tst B, 1
    614   ///   beq %False
    615   ///   tst C, 1
    616   ///   bne %True
    617   ///   b %False
    618   ///
    619   /// Without this information, short circuiting would only allow to short
    620   /// circuit a single high level instruction. For example:
    621   ///
    622   ///   t1 = or i1 A, B
    623   ///   t2 = and i1 t1, C
    624   ///   br i1 t2, label %False, label %True
    625   ///
    626   /// cannot be lowered to
    627   ///
    628   ///   tst A, 1
    629   ///   bne %True
    630   ///   tst B, 1
    631   ///   bne %True
    632   ///   tst C, 1
    633   ///   beq %True
    634   ///   b %False
    635   ///
    636   /// It needs to be lowered to
    637   ///
    638   ///   tst A, 1
    639   ///   bne Aux
    640   ///   tst B, 1
    641   ///   beq %False
    642   /// Aux:
    643   ///   tst C, 1
    644   ///   bne %True
    645   ///   b %False
    646   ///
    647   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
    648   /// might.)
    649   enum LowerInt1AllowShortCircuit {
    650     SC_And = 1,
    651     SC_Or = 2,
    652     SC_All = SC_And | SC_Or,
    653   };
    654 
    655   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
    656   /// after a lowerInt1ForBranch returns to branch to the
    657   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
    658   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
    659   /// used for short circuiting.
    660   class ShortCircuitCondAndLabel {
    661   public:
    662     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
    663                                       InstARM32Label *L = nullptr)
    664         : Cond(std::move(C)), ShortCircuitTarget(L) {}
    665     const CondWhenTrue Cond;
    666     InstARM32Label *const ShortCircuitTarget;
    667 
    668     CondWhenTrue assertNoLabelAndReturnCond() const {
    669       assert(ShortCircuitTarget == nullptr);
    670       return Cond;
    671     }
    672   };
    673 
    674   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
    675   /// are to be used for branching to the branch's TrueTarget. It may return a
    676   /// label that the expansion of Boolean used to short circuit the chain's
    677   /// evaluation.
    678   ShortCircuitCondAndLabel
    679   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
    680                      const LowerInt1BranchTarget &TargetFalse,
    681                      uint32_t ShortCircuitable);
    682 
    683   // _br is a convenience wrapper that emits br instructions to Target.
    684   void _br(const LowerInt1BranchTarget &BrTarget,
    685            CondARM32::Cond Cond = CondARM32::AL) {
    686     assert((BrTarget.NodeTarget == nullptr) !=
    687            (BrTarget.LabelTarget == nullptr));
    688     if (BrTarget.NodeTarget != nullptr)
    689       _br(BrTarget.NodeTarget, Cond);
    690     else
    691       _br(BrTarget.LabelTarget, Cond);
    692   }
    693 
    694   // _br_short_circuit is used when lowering InstArithmetic::And and
    695   // InstArithmetic::Or and a short circuit branch is needed.
    696   void _br_short_circuit(const LowerInt1BranchTarget &Target,
    697                          const CondWhenTrue &Cond) {
    698     if (Cond.WhenTrue1 != CondARM32::kNone) {
    699       _br(Target, Cond.WhenTrue1);
    700     }
    701     if (Cond.WhenTrue0 != CondARM32::kNone) {
    702       _br(Target, Cond.WhenTrue0);
    703     }
    704   }
    705   // End of bool folding machinery
    706   // --------------------------------------------------------------------------
    707 
    708   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
    709   /// an upper16 relocation).
    710   void _movt(Variable *Dest, Operand *Src0,
    711              CondARM32::Cond Pred = CondARM32::AL) {
    712     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
    713   }
    714   void _movw(Variable *Dest, Operand *Src0,
    715              CondARM32::Cond Pred = CondARM32::AL) {
    716     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
    717   }
    718   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
    719             CondARM32::Cond Pred = CondARM32::AL) {
    720     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
    721   }
    722   void _mvn(Variable *Dest, Operand *Src0,
    723             CondARM32::Cond Pred = CondARM32::AL) {
    724     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
    725   }
    726   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
    727             CondARM32::Cond Pred = CondARM32::AL) {
    728     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
    729   }
    730   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
    731              CondARM32::Cond Pred = CondARM32::AL) {
    732     constexpr bool SetFlags = true;
    733     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
    734     if (SetFlags) {
    735       Context.insert<InstFakeUse>(Dest);
    736     }
    737   }
    738   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
    739   void _pop(const VarList &Dests) {
    740     Context.insert<InstARM32Pop>(Dests);
    741     // Mark dests as modified.
    742     for (Variable *Dest : Dests)
    743       Context.insert<InstFakeDef>(Dest);
    744   }
    745   void _rbit(Variable *Dest, Variable *Src0,
    746              CondARM32::Cond Pred = CondARM32::AL) {
    747     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
    748   }
    749   void _rev(Variable *Dest, Variable *Src0,
    750             CondARM32::Cond Pred = CondARM32::AL) {
    751     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
    752   }
    753   void _ret(Variable *LR, Variable *Src0 = nullptr) {
    754     Context.insert<InstARM32Ret>(LR, Src0);
    755   }
    756   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
    757              CondARM32::Cond Pred = CondARM32::AL) {
    758     constexpr bool SetFlags = true;
    759     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
    760     if (SetFlags) {
    761       Context.insert<InstFakeUse>(Dest);
    762     }
    763   }
    764   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
    765             CondARM32::Cond Pred = CondARM32::AL) {
    766     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
    767   }
    768   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
    769              CondARM32::Cond Pred = CondARM32::AL) {
    770     constexpr bool SetFlags = true;
    771     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
    772     if (SetFlags) {
    773       Context.insert<InstFakeUse>(Dest);
    774     }
    775   }
    776   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
    777             CondARM32::Cond Pred = CondARM32::AL) {
    778     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
    779   }
    780   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
    781             CondARM32::Cond Pred = CondARM32::AL) {
    782     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
    783   }
    784   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
    785              CondARM32::Cond Pred = CondARM32::AL) {
    786     constexpr bool SetFlags = true;
    787     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
    788     if (SetFlags) {
    789       Context.insert<InstFakeUse>(Dest);
    790     }
    791   }
    792   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
    793              CondARM32::Cond Pred = CondARM32::AL) {
    794     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
    795   }
    796   /// _str, for all your Variable to memory transfers. Addr has the same
    797   /// restrictions that it does in _ldr.
    798   void _str(Variable *Value, OperandARM32Mem *Addr,
    799             CondARM32::Cond Pred = CondARM32::AL) {
    800     Context.insert<InstARM32Str>(Value, Addr, Pred);
    801   }
    802   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
    803                          CondARM32::Cond Pred = CondARM32::AL) {
    804     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
    805       Context.insert<InstFakeUse>(Value64->getLo());
    806       Context.insert<InstFakeUse>(Value64->getHi());
    807     }
    808     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
    809   }
    810   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
    811             CondARM32::Cond Pred = CondARM32::AL) {
    812     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
    813   }
    814   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
    815              CondARM32::Cond Pred = CondARM32::AL) {
    816     constexpr bool SetFlags = true;
    817     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
    818     if (SetFlags) {
    819       Context.insert<InstFakeUse>(Dest);
    820     }
    821   }
    822   void _sxt(Variable *Dest, Variable *Src0,
    823             CondARM32::Cond Pred = CondARM32::AL) {
    824     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
    825   }
    826   void _tst(Variable *Src0, Operand *Src1,
    827             CondARM32::Cond Pred = CondARM32::AL) {
    828     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
    829   }
    830   void _trap() { Context.insert<InstARM32Trap>(); }
    831   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
    832              CondARM32::Cond Pred = CondARM32::AL) {
    833     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
    834   }
    835   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
    836               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
    837     // umull requires DestLo and DestHi to be assigned to different GPRs. The
    838     // following lines create overlapping liveness ranges for both variables. If
    839     // either one of them is live, then they are both going to be live, and thus
    840     // assigned to different registers; if they are both dead, then DCE will
    841     // kick in and delete the following three instructions.
    842     Context.insert<InstFakeDef>(DestHi);
    843     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
    844     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
    845     Context.insert<InstFakeUse>(DestHi);
    846   }
    847   void _uxt(Variable *Dest, Variable *Src0,
    848             CondARM32::Cond Pred = CondARM32::AL) {
    849     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
    850   }
    851   void _vabs(Variable *Dest, Variable *Src,
    852              CondARM32::Cond Pred = CondARM32::AL) {
    853     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
    854   }
    855   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
    856     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
    857   }
    858   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
    859     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
    860   }
    861   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
    862     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
    863   }
    864   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
    865     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
    866   }
    867   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
    868     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
    869   }
    870   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
    871     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
    872   }
    873   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
    874              CondARM32::Cond Pred = CondARM32::AL) {
    875     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
    876   }
    877   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
    878     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
    879   }
    880   void _vcmp(Variable *Src0, Variable *Src1,
    881              CondARM32::Cond Pred = CondARM32::AL) {
    882     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
    883   }
    884   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
    885              CondARM32::Cond Pred = CondARM32::AL) {
    886     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
    887   }
    888   void _vdup(Variable *Dest, Variable *Src, int Idx) {
    889     Context.insert<InstARM32Vdup>(Dest, Src, Idx);
    890   }
    891   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
    892     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
    893   }
    894   void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
    895                CondARM32::Cond Pred = CondARM32::AL) {
    896     Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
    897   }
    898   void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
    899                CondARM32::Cond Pred = CondARM32::AL) {
    900     Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
    901   }
    902   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
    903     Context.insert<InstARM32Vmrs>(Pred);
    904   }
    905   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
    906     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
    907   }
    908   void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
    909     Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
    910   }
    911   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
    912     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
    913   }
    914   void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) {
    915     Context.insert<InstARM32Vmovl>(Dest, Src0, Src1);
    916   }
    917   void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) {
    918     Context.insert<InstARM32Vmovh>(Dest, Src0, Src1);
    919   }
    920   void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) {
    921     Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1);
    922   }
    923   void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) {
    924     Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1);
    925   }
    926   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
    927     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
    928   }
    929   void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
    930     Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
    931         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
    932   }
    933   void _vmvn(Variable *Dest, Variable *Src0) {
    934     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
    935   }
    936   void _vneg(Variable *Dest, Variable *Src0) {
    937     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
    938         ->setSignType(InstARM32::FS_Signed);
    939   }
    940   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
    941     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
    942   }
    943   void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
    944     Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
    945         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
    946   }
    947   void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
    948                 bool Saturating) {
    949     Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
    950         ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
    951                                              : InstARM32::FS_Signed)
    952                                  : InstARM32::FS_None);
    953   }
    954   void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
    955     Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
    956         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
    957   }
    958   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
    959     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
    960   }
    961   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
    962     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
    963         ->setSignType(InstARM32::FS_Unsigned);
    964   }
    965   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
    966                        ConstantInteger32 *Src1) {
    967     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
    968   }
    969   void _vsqrt(Variable *Dest, Variable *Src,
    970               CondARM32::Cond Pred = CondARM32::AL) {
    971     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
    972   }
    973   void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
    974                CondARM32::Cond Pred = CondARM32::AL) {
    975     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
    976   }
    977   void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
    978                CondARM32::Cond Pred = CondARM32::AL) {
    979     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
    980   }
    981   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
    982     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
    983   }
    984   void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) {
    985     Context.insert<InstARM32Vzip>(Dest, Src0, Src1);
    986   }
    987 
    988   // Iterates over the CFG and determines the maximum outgoing stack arguments
    989   // bytes. This information is later used during addProlog() to pre-allocate
    990   // the outargs area.
    991   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
    992   // method that the Parser could call.
    993   void findMaxStackOutArgsSize();
    994 
    995   /// Returns true if the given Offset can be represented in a Load/Store Mem
    996   /// Operand.
    997   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
    998 
    999   void postLowerLegalization();
   1000 
   1001   /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
   1002   /// @{
   1003   void createGotPtr();
   1004   void insertGotPtrInitPlaceholder();
   1005   VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
   1006   void materializeGotAddr(CfgNode *Node);
   1007   Variable *GotPtr = nullptr;
   1008   // TODO(jpp): use CfgLocalAllocator.
   1009   /// @}
   1010 
   1011   /// Manages the Gotoff relocations created during the function lowering. A
   1012   /// single Gotoff relocation is created for each global variable used by the
   1013   /// function being lowered.
   1014   /// @{
   1015   // TODO(jpp): if the same global G is used in different functions, then this
   1016   // method will emit one G(gotoff) relocation per function.
   1017   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
   1018   CfgUnorderedSet<GlobalString> KnownGotoffs;
   1019   /// @}
   1020 
   1021   /// Loads the constant relocatable Name to Register. Then invoke Finish to
   1022   /// finish the relocatable lowering. Finish **must** use PC in its first
   1023   /// emitted instruction, or the relocatable in Register will contain the wrong
   1024   /// value.
   1025   //
   1026   // Lowered sequence:
   1027   //
   1028   // Movw:
   1029   //     movw Register, #:lower16:Name - (End - Movw) - 8 .
   1030   // Movt:
   1031   //     movt Register, #:upper16:Name - (End - Movt) - 8 .
   1032   //     PC = fake-def
   1033   // End:
   1034   //     Finish(PC)
   1035   //
   1036   // The -8 in movw/movt above is to account for the PC value that the first
   1037   // instruction emitted by Finish(PC) will read.
   1038   void
   1039   loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
   1040                                   std::function<void(Variable *PC)> Finish);
   1041 
   1042   /// Sandboxer defines methods for ensuring that "dangerous" operations are
   1043   /// masked during sandboxed code emission. For regular, non-sandboxed code
   1044   /// emission, its methods are simple pass-through methods.
   1045   ///
   1046   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
   1047   /// in the constructor/destructor during sandboxed code emission. Therefore,
   1048   /// it is a bad idea to create an object of this type and "keep it around."
   1049   /// The recommended usage is:
   1050   ///
   1051   /// AutoSandboxing(this).<<operation>>(...);
   1052   ///
   1053   /// This usage ensures that no other instructions are inadvertently added to
   1054   /// the bundle.
   1055   class Sandboxer {
   1056     Sandboxer() = delete;
   1057     Sandboxer(const Sandboxer &) = delete;
   1058     Sandboxer &operator=(const Sandboxer &) = delete;
   1059 
   1060   public:
   1061     explicit Sandboxer(
   1062         TargetARM32 *Target,
   1063         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
   1064     ~Sandboxer();
   1065 
   1066     /// Increments sp:
   1067     ///
   1068     ///   add sp, sp, AddAmount
   1069     ///   bic sp, sp, 0xc0000000
   1070     ///
   1071     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
   1072     void add_sp(Operand *AddAmount);
   1073 
   1074     /// Emits code to align sp to the specified alignment:
   1075     ///
   1076     ///   bic/and sp, sp, Alignment
   1077     ///   bic, sp, sp, 0xc0000000
   1078     void align_sp(size_t Alignment);
   1079 
   1080     /// Emits a call instruction. If CallTarget is a Variable, it emits
   1081     ///
   1082     ///   bic CallTarget, CallTarget, 0xc000000f
   1083     ///   bl CallTarget
   1084     ///
   1085     /// Otherwise, it emits
   1086     ///
   1087     ///   bl CallTarget
   1088     ///
   1089     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
   1090     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
   1091 
   1092     /// Emits a load:
   1093     ///
   1094     ///   bic rBase, rBase, 0xc0000000
   1095     ///   ldr rDest, [rBase, #Offset]
   1096     ///
   1097     /// Exception: if rBase is r9 or sp, then the load is emitted as:
   1098     ///
   1099     ///   ldr rDest, [rBase, #Offset]
   1100     ///
   1101     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1102     /// always valid.
   1103     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1104 
   1105     /// Emits a load exclusive:
   1106     ///
   1107     ///   bic rBase, rBase, 0xc0000000
   1108     ///   ldrex rDest, [rBase]
   1109     ///
   1110     /// Exception: if rBase is r9 or sp, then the load is emitted as:
   1111     ///
   1112     ///   ldrex rDest, [rBase]
   1113     ///
   1114     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1115     /// always valid.
   1116     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1117 
   1118     /// Resets sp to Src:
   1119     ///
   1120     ///   mov sp, Src
   1121     ///   bic sp, sp, 0xc0000000
   1122     void reset_sp(Variable *Src);
   1123 
   1124     /// Emits code to return from a function:
   1125     ///
   1126     ///   bic lr, lr, 0xc000000f
   1127     ///   bx lr
   1128     void ret(Variable *RetAddr, Variable *RetValue);
   1129 
   1130     /// Emits a store:
   1131     ///
   1132     ///   bic rBase, rBase, 0xc0000000
   1133     ///   str rSrc, [rBase, #Offset]
   1134     ///
   1135     /// Exception: if rBase is r9 or sp, then the store is emitted as:
   1136     ///
   1137     ///   str rDest, [rBase, #Offset]
   1138     ///
   1139     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1140     /// always valid.
   1141     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1142 
   1143     /// Emits a store exclusive:
   1144     ///
   1145     ///   bic rBase, rBase, 0xc0000000
   1146     ///   strex rDest, rSrc, [rBase]
   1147     ///
   1148     /// Exception: if rBase is r9 or sp, then the store is emitted as:
   1149     ///
   1150     ///   strex rDest, rSrc, [rBase]
   1151     ///
   1152     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1153     /// always valid.
   1154     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
   1155                CondARM32::Cond Pred);
   1156 
   1157     /// Decrements sp:
   1158     ///
   1159     ///   sub sp, sp, SubAmount
   1160     ///   bic sp, sp, 0xc0000000
   1161     void sub_sp(Operand *SubAmount);
   1162 
   1163   private:
   1164     TargetARM32 *const Target;
   1165     const InstBundleLock::Option BundleOption;
   1166     std::unique_ptr<AutoBundle> Bundler;
   1167 
   1168     void createAutoBundle();
   1169   };
   1170 
   1171   class PostLoweringLegalizer {
   1172     PostLoweringLegalizer() = delete;
   1173     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
   1174     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
   1175 
   1176   public:
   1177     explicit PostLoweringLegalizer(TargetARM32 *Target)
   1178         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
   1179                               Target->getFrameOrStackReg())) {}
   1180 
   1181     void resetTempBaseIfClobberedBy(const Inst *Instr);
   1182 
   1183     // Ensures that the TempBase register held by the this legalizer (if any) is
   1184     // assigned to IP.
   1185     void assertNoTempOrAssignedToIP() const {
   1186       assert(TempBaseReg == nullptr ||
   1187              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
   1188     }
   1189 
   1190     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
   1191     // fixed up.
   1192     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
   1193                                         bool AllowOffsets = true);
   1194 
   1195     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
   1196     /// if its Source is a Rematerializable variable (this form is used in lieu
   1197     /// of lea, which is not available in ARM.)
   1198     ///
   1199     /// Moves to memory become store instructions, and moves from memory, loads.
   1200     void legalizeMov(InstARM32Mov *Mov);
   1201 
   1202   private:
   1203     /// Creates a new Base register centered around [Base, +/- Offset].
   1204     Variable *newBaseRegister(Variable *Base, int32_t Offset,
   1205                               RegNumT ScratchRegNum);
   1206 
   1207     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
   1208     /// The returned mem operand is a legal operand for accessing memory that is
   1209     /// of type Ty.
   1210     ///
   1211     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
   1212     /// expressing it. Otherwise,
   1213     ///
   1214     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
   1215     /// method will return that. Otherwise,
   1216     ///
   1217     /// a new base register ip=Base+Offset is created, and the method returns a
   1218     /// memory operand expressing [ip, #0].
   1219     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
   1220                                       bool AllowOffsets = true);
   1221     TargetARM32 *const Target;
   1222     Variable *const StackOrFrameReg;
   1223     Variable *TempBaseReg = nullptr;
   1224     int32_t TempBaseOffset = 0;
   1225   };
   1226 
   1227   const bool NeedSandboxing;
   1228   TargetARM32Features CPUFeatures;
   1229   bool UsesFramePointer = false;
   1230   bool NeedsStackAlignment = false;
   1231   bool MaybeLeafFunc = true;
   1232   size_t SpillAreaSizeBytes = 0;
   1233   size_t FixedAllocaSizeBytes = 0;
   1234   size_t FixedAllocaAlignBytes = 0;
   1235   bool PrologEmitsFixedAllocas = false;
   1236   uint32_t MaxOutArgsSizeBytes = 0;
   1237   // TODO(jpp): std::array instead of array.
   1238   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
   1239   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
   1240   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
   1241   SmallBitVector RegsUsed;
   1242   VarList PhysicalRegisters[IceType_NUM];
   1243   VarList PreservedGPRs;
   1244   VarList PreservedSRegs;
   1245 
   1246   /// Helper class that understands the Calling Convention and register
   1247   /// assignments. The first few integer type parameters can use r0-r3,
   1248   /// regardless of their position relative to the floating-point/vector
   1249   /// arguments in the argument list. Floating-point and vector arguments
   1250   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
   1251   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
   1252   ///
   1253   /// Technically, arguments that can start with registers but extend beyond the
   1254   /// available registers can be split between the registers and the stack.
   1255   /// However, this is typically  for passing GPR structs by value, and PNaCl
   1256   /// transforms expand this out.
   1257   ///
   1258   /// At (public) function entry, the stack must be 8-byte aligned.
   1259   class CallingConv {
   1260     CallingConv(const CallingConv &) = delete;
   1261     CallingConv &operator=(const CallingConv &) = delete;
   1262 
   1263   public:
   1264     CallingConv();
   1265     ~CallingConv() = default;
   1266 
   1267     /// argInGPR returns true if there is a GPR available for the requested
   1268     /// type, and false otherwise. If it returns true, Reg is set to the
   1269     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
   1270     /// be an I64 register pair.
   1271     bool argInGPR(Type Ty, RegNumT *Reg);
   1272 
   1273     /// argInVFP is to floating-point/vector types what argInGPR is for integer
   1274     /// types.
   1275     bool argInVFP(Type Ty, RegNumT *Reg);
   1276 
   1277   private:
   1278     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
   1279     SmallBitVector GPRegsUsed;
   1280     CfgVector<RegNumT> GPRArgs;
   1281     CfgVector<RegNumT> I64Args;
   1282 
   1283     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
   1284     SmallBitVector VFPRegsUsed;
   1285     CfgVector<RegNumT> FP32Args;
   1286     CfgVector<RegNumT> FP64Args;
   1287     CfgVector<RegNumT> Vec128Args;
   1288   };
   1289 
   1290 private:
   1291   ENABLE_MAKE_UNIQUE;
   1292 
   1293   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
   1294                                       Operand *Base);
   1295 
   1296   void postambleCtpop64(const InstCall *Instr);
   1297   void preambleDivRem(const InstCall *Instr);
   1298   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
   1299       ARM32HelpersPreamble;
   1300   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
   1301       ARM32HelpersPostamble;
   1302 
   1303   class ComputationTracker {
   1304   public:
   1305     ComputationTracker() = default;
   1306     ~ComputationTracker() = default;
   1307 
   1308     void forgetProducers() { KnownComputations.clear(); }
   1309     void recordProducers(CfgNode *Node);
   1310 
   1311     const Inst *getProducerOf(const Operand *Opnd) const {
   1312       auto *Var = llvm::dyn_cast<Variable>(Opnd);
   1313       if (Var == nullptr) {
   1314         return nullptr;
   1315       }
   1316 
   1317       auto Iter = KnownComputations.find(Var->getIndex());
   1318       if (Iter == KnownComputations.end()) {
   1319         return nullptr;
   1320       }
   1321 
   1322       return Iter->second.Instr;
   1323     }
   1324 
   1325     void dump(const Cfg *Func) const {
   1326       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
   1327         return;
   1328       OstreamLocker L(Func->getContext());
   1329       Ostream &Str = Func->getContext()->getStrDump();
   1330       Str << "foldable producer:\n";
   1331       for (const auto &Computation : KnownComputations) {
   1332         Str << "    ";
   1333         Computation.second.Instr->dump(Func);
   1334         Str << "\n";
   1335       }
   1336       Str << "\n";
   1337     }
   1338 
   1339   private:
   1340     class ComputationEntry {
   1341     public:
   1342       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
   1343       Inst *const Instr;
   1344       // Boolean folding is disabled for variables whose live range is multi
   1345       // block. We conservatively initialize IsLiveOut to true, and set it to
   1346       // false once we find the end of the live range for the variable defined
   1347       // by this instruction. If liveness analysis is not performed (e.g., in
   1348       // Om1 mode) IsLiveOut will never be set to false, and folding will be
   1349       // disabled.
   1350       bool IsLiveOut = true;
   1351       int32_t NumUses = 0;
   1352       Type ComputationType;
   1353     };
   1354 
   1355     // ComputationMap maps a Variable number to a payload identifying which
   1356     // instruction defined it.
   1357     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
   1358     ComputationMap KnownComputations;
   1359   };
   1360 
   1361   ComputationTracker Computations;
   1362 
   1363   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
   1364   // without specifying a physical register. This is needed for creating unbound
   1365   // temporaries during Ice -> ARM lowering, but before register allocation.
   1366   // This a safe-guard that no unbound temporaries are created during the
   1367   // legalization post-passes.
   1368   bool AllowTemporaryWithNoReg = true;
   1369   // ForbidTemporaryWithoutReg is a RAII class that manages
   1370   // AllowTemporaryWithNoReg.
   1371   class ForbidTemporaryWithoutReg {
   1372     ForbidTemporaryWithoutReg() = delete;
   1373     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
   1374     ForbidTemporaryWithoutReg &
   1375     operator=(const ForbidTemporaryWithoutReg &) = delete;
   1376 
   1377   public:
   1378     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
   1379       Target->AllowTemporaryWithNoReg = false;
   1380     }
   1381     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
   1382 
   1383   private:
   1384     TargetARM32 *const Target;
   1385   };
   1386 };
   1387 
   1388 class TargetDataARM32 final : public TargetDataLowering {
   1389   TargetDataARM32() = delete;
   1390   TargetDataARM32(const TargetDataARM32 &) = delete;
   1391   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
   1392 
   1393 public:
   1394   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
   1395     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
   1396   }
   1397 
   1398   void lowerGlobals(const VariableDeclarationList &Vars,
   1399                     const std::string &SectionSuffix) override;
   1400   void lowerConstants() override;
   1401   void lowerJumpTables() override;
   1402 
   1403 protected:
   1404   explicit TargetDataARM32(GlobalContext *Ctx);
   1405 
   1406 private:
   1407   ~TargetDataARM32() override = default;
   1408 };
   1409 
   1410 class TargetHeaderARM32 final : public TargetHeaderLowering {
   1411   TargetHeaderARM32() = delete;
   1412   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
   1413   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
   1414 
   1415 public:
   1416   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
   1417     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
   1418   }
   1419 
   1420   void lower() override;
   1421 
   1422 protected:
   1423   explicit TargetHeaderARM32(GlobalContext *Ctx);
   1424 
   1425 private:
   1426   ~TargetHeaderARM32() = default;
   1427 
   1428   TargetARM32Features CPUFeatures;
   1429 };
   1430 
   1431 } // end of namespace ARM32
   1432 } // end of namespace Ice
   1433 
   1434 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
   1435