Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
      2 //
      3 //                        The Subzero Code Generator
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 ///
     10 /// \file
     11 /// \brief Declares the TargetLoweringARM32 class, which implements the
     12 /// TargetLowering interface for the ARM 32-bit architecture.
     13 ///
     14 //===----------------------------------------------------------------------===//
     15 
     16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
     17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
     18 
     19 #include "IceAssemblerARM32.h"
     20 #include "IceDefs.h"
     21 #include "IceInstARM32.h"
     22 #include "IceRegistersARM32.h"
     23 #include "IceTargetLowering.h"
     24 
     25 #include <utility>
     26 
     27 namespace Ice {
     28 namespace ARM32 {
     29 
     30 // Class encapsulating ARM cpu features / instruction set.
     31 class TargetARM32Features {
     32   TargetARM32Features() = delete;
     33   TargetARM32Features(const TargetARM32Features &) = delete;
     34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
     35 
     36 public:
     37   explicit TargetARM32Features(const ClFlags &Flags);
     38 
     39   enum ARM32InstructionSet {
     40     Begin,
     41     // Neon is the PNaCl baseline instruction set.
     42     Neon = Begin,
     43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
     44     End
     45   };
     46 
     47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
     48 
     49 private:
     50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
     51 };
     52 
     53 // The target lowering logic for ARM32.
     54 class TargetARM32 : public TargetLowering {
     55   TargetARM32() = delete;
     56   TargetARM32(const TargetARM32 &) = delete;
     57   TargetARM32 &operator=(const TargetARM32 &) = delete;
     58 
     59 public:
     60   static void staticInit(GlobalContext *Ctx);
     61 
     62   static bool shouldBePooled(const Constant *C) {
     63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
     64       return !Utils::isPositiveZero(ConstDouble->getValue());
     65     }
     66     if (llvm::isa<ConstantFloat>(C))
     67       return true;
     68     return false;
     69   }
     70 
     71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
     72 
     73   // TODO(jvoung): return a unique_ptr.
     74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
     75     return makeUnique<TargetARM32>(Func);
     76   }
     77 
     78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
     79     const bool IsNonsfi = SandboxingType == ST_Nonsfi;
     80     return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
     81   }
     82 
     83   void initNodeForLowering(CfgNode *Node) override {
     84     Computations.forgetProducers();
     85     Computations.recordProducers(Node);
     86     Computations.dump(Func);
     87   }
     88 
     89   void translateOm1() override;
     90   void translateO2() override;
     91   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
     92 
     93   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
     94   Variable *getPhysicalRegister(RegNumT RegNum,
     95                                 Type Ty = IceType_void) override;
     96   const char *getRegName(RegNumT RegNum, Type Ty) const override;
     97   SmallBitVector getRegisterSet(RegSetMask Include,
     98                                 RegSetMask Exclude) const override;
     99   const SmallBitVector &
    100   getRegistersForVariable(const Variable *Var) const override {
    101     RegClass RC = Var->getRegClass();
    102     switch (RC) {
    103     default:
    104       assert(RC < RC_Target);
    105       return TypeToRegisterSet[RC];
    106     case RegARM32::RCARM32_QtoS:
    107       return TypeToRegisterSet[RC];
    108     }
    109   }
    110   const SmallBitVector &
    111   getAllRegistersForVariable(const Variable *Var) const override {
    112     RegClass RC = Var->getRegClass();
    113     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
    114     return TypeToRegisterSetUnfiltered[RC];
    115   }
    116   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
    117     return RegisterAliases[Reg];
    118   }
    119   bool hasFramePointer() const override { return UsesFramePointer; }
    120   void setHasFramePointer() override { UsesFramePointer = true; }
    121   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
    122   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
    123   RegNumT getFrameOrStackReg() const override {
    124     return UsesFramePointer ? getFrameReg() : getStackReg();
    125   }
    126   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
    127 
    128   size_t typeWidthInBytesOnStack(Type Ty) const override {
    129     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
    130     // are rounded up to 4 bytes.
    131     return (typeWidthInBytes(Ty) + 3) & ~3;
    132   }
    133   uint32_t getStackAlignment() const override;
    134   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
    135     FixedAllocaSizeBytes = Size;
    136     assert(llvm::isPowerOf2_32(Align));
    137     FixedAllocaAlignBytes = Align;
    138     PrologEmitsFixedAllocas = true;
    139   }
    140   int32_t getFrameFixedAllocaOffset() const override {
    141     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
    142   }
    143   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
    144 
    145   bool shouldSplitToVariable64On32(Type Ty) const override {
    146     return Ty == IceType_i64;
    147   }
    148 
    149   // TODO(ascull): what size is best for ARM?
    150   SizeT getMinJumpTableSize() const override { return 3; }
    151   void emitJumpTable(const Cfg *Func,
    152                      const InstJumpTable *JumpTable) const override;
    153 
    154   void emitVariable(const Variable *Var) const override;
    155 
    156   void emit(const ConstantUndef *C) const final;
    157   void emit(const ConstantInteger32 *C) const final;
    158   void emit(const ConstantInteger64 *C) const final;
    159   void emit(const ConstantFloat *C) const final;
    160   void emit(const ConstantDouble *C) const final;
    161   void emit(const ConstantRelocatable *C) const final;
    162 
    163   void lowerArguments() override;
    164   void addProlog(CfgNode *Node) override;
    165   void addEpilog(CfgNode *Node) override;
    166 
    167   Operand *loOperand(Operand *Operand);
    168   Operand *hiOperand(Operand *Operand);
    169   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
    170                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
    171 
    172   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
    173     return CPUFeatures.hasFeature(I);
    174   }
    175 
    176   enum OperandLegalization {
    177     Legal_Reg = 1 << 0,  /// physical register, not stack location
    178     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
    179                          /// immediates, shifted registers, or modified fp imm.
    180     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
    181     Legal_Rematerializable = 1 << 3,
    182     Legal_Default = ~Legal_Rematerializable,
    183   };
    184 
    185   using LegalMask = uint32_t;
    186   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
    187   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
    188                     RegNumT RegNum = RegNumT());
    189   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
    190 
    191   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
    192     assert(ShAmtImm < 32);
    193     return OperandARM32ShAmtImm::create(
    194         Func,
    195         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
    196   }
    197 
    198   GlobalContext *getCtx() const { return Ctx; }
    199 
    200 protected:
    201   explicit TargetARM32(Cfg *Func);
    202 
    203   void postLower() override;
    204 
    205   enum SafeBoolChain {
    206     SBC_No,
    207     SBC_Yes,
    208   };
    209 
    210   void lowerAlloca(const InstAlloca *Instr) override;
    211   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
    212   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
    213                             Operand *Src0, Operand *Src1);
    214   void lowerArithmetic(const InstArithmetic *Instr) override;
    215   void lowerAssign(const InstAssign *Instr) override;
    216   void lowerBr(const InstBr *Instr) override;
    217   void lowerCall(const InstCall *Instr) override;
    218   void lowerCast(const InstCast *Instr) override;
    219   void lowerExtractElement(const InstExtractElement *Instr) override;
    220 
    221   /// CondWhenTrue is a helper type returned by every method in the lowering
    222   /// that emits code to set the condition codes.
    223   class CondWhenTrue {
    224   public:
    225     explicit CondWhenTrue(CondARM32::Cond T0,
    226                           CondARM32::Cond T1 = CondARM32::kNone)
    227         : WhenTrue0(T0), WhenTrue1(T1) {
    228       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
    229       assert(T1 != T0 || T0 == CondARM32::kNone);
    230     }
    231     CondARM32::Cond WhenTrue0;
    232     CondARM32::Cond WhenTrue1;
    233 
    234     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
    235     CondWhenTrue invert() const {
    236       switch (WhenTrue0) {
    237       default:
    238         if (WhenTrue1 == CondARM32::kNone)
    239           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
    240         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
    241                             InstARM32::getOppositeCondition(WhenTrue1));
    242       case CondARM32::AL:
    243         return CondWhenTrue(CondARM32::kNone);
    244       case CondARM32::kNone:
    245         return CondWhenTrue(CondARM32::AL);
    246       }
    247     }
    248   };
    249 
    250   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
    251   void lowerFcmp(const InstFcmp *Instr) override;
    252   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
    253                                          Operand *Src0, Operand *Src1);
    254   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    255                                   Operand *Src1);
    256   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    257                                   Operand *Src1);
    258   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
    259                              Operand *Src1);
    260   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
    261   void lowerIcmp(const InstIcmp *Instr) override;
    262   /// Emits the basic sequence for lower-linked/store-exclusive loops:
    263   ///
    264   /// retry:
    265   ///        ldrex tmp, [Addr]
    266   ///        StoreValue = Operation(tmp)
    267   ///        strexCond success, StoreValue, [Addr]
    268   ///        cmpCond success, #0
    269   ///        bne retry
    270   ///
    271   /// Operation needs to return which value to strex in Addr, it must not change
    272   /// the flags if Cond is not AL, and must not emit any instructions that could
    273   /// end up writing to memory. Operation also needs to handle fake-defing for
    274   /// i64 handling.
    275   void
    276   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
    277                                 std::function<Variable *(Variable *)> Operation,
    278                                 CondARM32::Cond Cond = CondARM32::AL);
    279   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
    280                            Operand *Val);
    281   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
    282                       Operand *Val);
    283   void lowerBreakpoint(const InstBreakpoint *Instr) override;
    284   void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
    285   void lowerInsertElement(const InstInsertElement *Instr) override;
    286   void lowerLoad(const InstLoad *Instr) override;
    287   void lowerPhi(const InstPhi *Instr) override;
    288   void lowerRet(const InstRet *Instr) override;
    289   void lowerSelect(const InstSelect *Instr) override;
    290   void lowerShuffleVector(const InstShuffleVector *Instr) override;
    291   void lowerStore(const InstStore *Instr) override;
    292   void lowerSwitch(const InstSwitch *Instr) override;
    293   void lowerUnreachable(const InstUnreachable *Instr) override;
    294   void prelowerPhis() override;
    295   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
    296   void genTargetHelperCallFor(Inst *Instr) override;
    297   void doAddressOptLoad() override;
    298   void doAddressOptStore() override;
    299   void randomlyInsertNop(float Probability,
    300                          RandomNumberGenerator &RNG) override;
    301 
    302   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
    303 
    304   Variable64On32 *makeI64RegPair();
    305   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
    306   static Type stackSlotType();
    307   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
    308   void alignRegisterPow2(Variable *Reg, uint32_t Align,
    309                          RegNumT TmpRegNum = RegNumT());
    310 
    311   /// Returns a vector in a register with the given constant entries.
    312   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
    313 
    314   void
    315   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
    316                                 const SmallBitVector &ExcludeRegisters,
    317                                 uint64_t Salt) const override;
    318 
    319   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
    320   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
    321   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
    322   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
    323                                          CondARM32::Cond);
    324   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
    325                                          CondARM32::Cond);
    326   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
    327                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
    328 
    329   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
    330 
    331   // The following are helpers that insert lowered ARM32 instructions with
    332   // minimal syntactic overhead, so that the lowering code can look as close to
    333   // assembly as practical.
    334   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
    335             CondARM32::Cond Pred = CondARM32::AL) {
    336     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
    337   }
    338   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
    339              CondARM32::Cond Pred = CondARM32::AL) {
    340     constexpr bool SetFlags = true;
    341     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
    342     if (SetFlags) {
    343       Context.insert<InstFakeUse>(Dest);
    344     }
    345   }
    346   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
    347             CondARM32::Cond Pred = CondARM32::AL) {
    348     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
    349   }
    350   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
    351             CondARM32::Cond Pred = CondARM32::AL) {
    352     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
    353   }
    354   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
    355             CondARM32::Cond Pred = CondARM32::AL) {
    356     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
    357   }
    358   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
    359             CondARM32::Cond Pred = CondARM32::AL) {
    360     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
    361   }
    362   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
    363            CondARM32::Cond Condition) {
    364     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
    365   }
    366   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
    367   void _br(CfgNode *Target, CondARM32::Cond Condition) {
    368     Context.insert<InstARM32Br>(Target, Condition);
    369   }
    370   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
    371     Context.insert<InstARM32Br>(Label, Condition);
    372   }
    373   void _cmn(Variable *Src0, Operand *Src1,
    374             CondARM32::Cond Pred = CondARM32::AL) {
    375     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
    376   }
    377   void _cmp(Variable *Src0, Operand *Src1,
    378             CondARM32::Cond Pred = CondARM32::AL) {
    379     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
    380   }
    381   void _clz(Variable *Dest, Variable *Src0,
    382             CondARM32::Cond Pred = CondARM32::AL) {
    383     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
    384   }
    385   void _dmb() { Context.insert<InstARM32Dmb>(); }
    386   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
    387             CondARM32::Cond Pred = CondARM32::AL) {
    388     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
    389   }
    390   /// _ldr, for all your memory to Variable data moves. It handles all types
    391   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
    392   /// type (e.g., no immediates for vector loads, and no index registers for fp
    393   /// loads.)
    394   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
    395             CondARM32::Cond Pred = CondARM32::AL) {
    396     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
    397   }
    398   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
    399                          CondARM32::Cond Pred = CondARM32::AL) {
    400     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
    401     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
    402       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
    403       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
    404     }
    405     return Ldrex;
    406   }
    407   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
    408             CondARM32::Cond Pred = CondARM32::AL) {
    409     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
    410   }
    411   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
    412              CondARM32::Cond Pred = CondARM32::AL) {
    413     constexpr bool SetFlags = true;
    414     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
    415     if (SetFlags) {
    416       Context.insert<InstFakeUse>(Dest);
    417     }
    418   }
    419   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
    420             CondARM32::Cond Pred = CondARM32::AL) {
    421     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
    422   }
    423   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
    424             CondARM32::Cond Pred = CondARM32::AL) {
    425     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
    426   }
    427   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
    428             CondARM32::Cond Pred = CondARM32::AL) {
    429     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
    430   }
    431   /// _mov, for all your Variable to Variable data movement needs. It handles
    432   /// all types (integer, floating point, and vectors), as well as moves between
    433   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
    434   /// confusing, non-uniform) rules for data moves in ARM.
    435   void _mov(Variable *Dest, Operand *Src0,
    436             CondARM32::Cond Pred = CondARM32::AL) {
    437     // _mov used to be unique in the sense that it would create a temporary
    438     // automagically if Dest was nullptr. It won't do that anymore, so we keep
    439     // an assert around just in case there is some untested code path where Dest
    440     // is nullptr.
    441     assert(Dest != nullptr);
    442     assert(!llvm::isa<OperandARM32Mem>(Src0));
    443     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
    444 
    445     if (Instr->isMultiDest()) {
    446       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
    447       // fake-def for Instr.DestHi here.
    448       assert(llvm::isa<Variable64On32>(Dest));
    449       Context.insert<InstFakeDef>(Instr->getDestHi());
    450     }
    451   }
    452 
    453   void _mov_redefined(Variable *Dest, Operand *Src0,
    454                       CondARM32::Cond Pred = CondARM32::AL) {
    455     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
    456     Instr->setDestRedefined();
    457     if (Instr->isMultiDest()) {
    458       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
    459       // fake-def for Instr.DestHi here.
    460       assert(llvm::isa<Variable64On32>(Dest));
    461       Context.insert<InstFakeDef>(Instr->getDestHi());
    462     }
    463   }
    464 
    465   void _nop() { Context.insert<InstARM32Nop>(); }
    466 
    467   // Generates a vmov instruction to extract the given index from a vector
    468   // register.
    469   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
    470                        CondARM32::Cond Pred = CondARM32::AL) {
    471     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
    472   }
    473 
    474   // Generates a vmov instruction to insert a value into the given index of a
    475   // vector register.
    476   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
    477                       CondARM32::Cond Pred = CondARM32::AL) {
    478     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
    479   }
    480 
    481   // --------------------------------------------------------------------------
    482   // Begin bool folding machinery.
    483   //
    484   // There are three types of boolean lowerings handled by this target:
    485   //
    486   // 1) Boolean expressions leading to a boolean Variable definition
    487   // ---------------------------------------------------------------
    488   //
    489   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
    490   // the defining basic block) we do not fold the operation. We instead
    491   // materialize (i.e., compute) the variable normally, so that it can be used
    492   // when needed. We also materialize i1 values that are not single use to
    493   // avoid code duplication. These expressions are not short circuited.
    494   //
    495   // 2) Boolean expressions leading to a select
    496   // ------------------------------------------
    497   //
    498   // These include boolean chains leading to a select instruction, as well as
    499   // i1 Sexts. These boolean expressions are lowered to:
    500   //
    501   // mov T, <false value>
    502   // CC <- eval(Boolean Expression)
    503   // movCC T, <true value>
    504   //
    505   // For Sexts, <false value> is 0, and <true value> is -1.
    506   //
    507   // 3) Boolean expressions leading to a br i1
    508   // -----------------------------------------
    509   //
    510   // These are the boolean chains leading to a branch. These chains are
    511   // short-circuited, i.e.:
    512   //
    513   //   A = or i1 B, C
    514   //   br i1 A, label %T, label %F
    515   //
    516   // becomes
    517   //
    518   //   tst B
    519   //   jne %T
    520   //   tst B
    521   //   jne %T
    522   //   j %F
    523   //
    524   // and
    525   //
    526   //   A = and i1 B, C
    527   //   br i1 A, label %T, label %F
    528   //
    529   // becomes
    530   //
    531   //   tst B
    532   //   jeq %F
    533   //   tst B
    534   //   jeq %F
    535   //   j %T
    536   //
    537   // Arbitrarily long chains are short circuited, e.g
    538   //
    539   //   A = or  i1 B, C
    540   //   D = and i1 A, E
    541   //   F = and i1 G, H
    542   //   I = or i1 D, F
    543   //   br i1 I, label %True, label %False
    544   //
    545   // becomes
    546   //
    547   // Label[A]:
    548   //   tst B, 1
    549   //   bne Label[D]
    550   //   tst C, 1
    551   //   beq Label[I]
    552   // Label[D]:
    553   //   tst E, 1
    554   //   bne %True
    555   // Label[I]
    556   //   tst G, 1
    557   //   beq %False
    558   //   tst H, 1
    559   //   beq %False (bne %True)
    560 
    561   /// lowerInt1 materializes Boolean to a Variable.
    562   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
    563 
    564   /// lowerInt1ForSelect generates the following instruction sequence:
    565   ///
    566   ///   mov T, FalseValue
    567   ///   CC <- eval(Boolean)
    568   ///   movCC T, TrueValue
    569   ///   mov Dest, T
    570   ///
    571   /// It is used for lowering select i1, as well as i1 Sext.
    572   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
    573                           Operand *FalseValue);
    574 
    575   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
    576   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
    577   /// create auxiliary labels for short circuiting the condition evaluation.
    578   class LowerInt1BranchTarget {
    579   public:
    580     explicit LowerInt1BranchTarget(CfgNode *const Target)
    581         : NodeTarget(Target) {}
    582     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
    583         : LabelTarget(Target) {}
    584 
    585     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
    586     /// is the exact copy of this if Label is nullptr; otherwise, the returned
    587     /// object will wrap Label instead.
    588     LowerInt1BranchTarget
    589     createForLabelOrDuplicate(InstARM32Label *Label) const {
    590       if (Label != nullptr)
    591         return LowerInt1BranchTarget(Label);
    592       if (NodeTarget)
    593         return LowerInt1BranchTarget(NodeTarget);
    594       return LowerInt1BranchTarget(LabelTarget);
    595     }
    596 
    597     CfgNode *const NodeTarget = nullptr;
    598     InstARM32Label *const LabelTarget = nullptr;
    599   };
    600 
    601   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
    602   /// determining which type arithmetic is allowed to be short circuited. This
    603   /// is useful for lowering
    604   ///
    605   ///   t1 = and i1 A, B
    606   ///   t2 = and i1 t1, C
    607   ///   br i1 t2, label %False, label %True
    608   ///
    609   /// to
    610   ///
    611   ///   tst A, 1
    612   ///   beq %False
    613   ///   tst B, 1
    614   ///   beq %False
    615   ///   tst C, 1
    616   ///   bne %True
    617   ///   b %False
    618   ///
    619   /// Without this information, short circuiting would only allow to short
    620   /// circuit a single high level instruction. For example:
    621   ///
    622   ///   t1 = or i1 A, B
    623   ///   t2 = and i1 t1, C
    624   ///   br i1 t2, label %False, label %True
    625   ///
    626   /// cannot be lowered to
    627   ///
    628   ///   tst A, 1
    629   ///   bne %True
    630   ///   tst B, 1
    631   ///   bne %True
    632   ///   tst C, 1
    633   ///   beq %True
    634   ///   b %False
    635   ///
    636   /// It needs to be lowered to
    637   ///
    638   ///   tst A, 1
    639   ///   bne Aux
    640   ///   tst B, 1
    641   ///   beq %False
    642   /// Aux:
    643   ///   tst C, 1
    644   ///   bne %True
    645   ///   b %False
    646   ///
    647   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
    648   /// might.)
    649   enum LowerInt1AllowShortCircuit {
    650     SC_And = 1,
    651     SC_Or = 2,
    652     SC_All = SC_And | SC_Or,
    653   };
    654 
    655   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
    656   /// after a lowerInt1ForBranch returns to branch to the
    657   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
    658   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
    659   /// used for short circuiting.
    660   class ShortCircuitCondAndLabel {
    661   public:
    662     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
    663                                       InstARM32Label *L = nullptr)
    664         : Cond(std::move(C)), ShortCircuitTarget(L) {}
    665     const CondWhenTrue Cond;
    666     InstARM32Label *const ShortCircuitTarget;
    667 
    668     CondWhenTrue assertNoLabelAndReturnCond() const {
    669       assert(ShortCircuitTarget == nullptr);
    670       return Cond;
    671     }
    672   };
    673 
    674   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
    675   /// are to be used for branching to the branch's TrueTarget. It may return a
    676   /// label that the expansion of Boolean used to short circuit the chain's
    677   /// evaluation.
    678   ShortCircuitCondAndLabel
    679   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
    680                      const LowerInt1BranchTarget &TargetFalse,
    681                      uint32_t ShortCircuitable);
    682 
    683   // _br is a convenience wrapper that emits br instructions to Target.
    684   void _br(const LowerInt1BranchTarget &BrTarget,
    685            CondARM32::Cond Cond = CondARM32::AL) {
    686     assert((BrTarget.NodeTarget == nullptr) !=
    687            (BrTarget.LabelTarget == nullptr));
    688     if (BrTarget.NodeTarget != nullptr)
    689       _br(BrTarget.NodeTarget, Cond);
    690     else
    691       _br(BrTarget.LabelTarget, Cond);
    692   }
    693 
    694   // _br_short_circuit is used when lowering InstArithmetic::And and
    695   // InstArithmetic::Or and a short circuit branch is needed.
    696   void _br_short_circuit(const LowerInt1BranchTarget &Target,
    697                          const CondWhenTrue &Cond) {
    698     if (Cond.WhenTrue1 != CondARM32::kNone) {
    699       _br(Target, Cond.WhenTrue1);
    700     }
    701     if (Cond.WhenTrue0 != CondARM32::kNone) {
    702       _br(Target, Cond.WhenTrue0);
    703     }
    704   }
    705   // End of bool folding machinery
    706   // --------------------------------------------------------------------------
    707 
    708   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
    709   /// an upper16 relocation).
    710   void _movt(Variable *Dest, Operand *Src0,
    711              CondARM32::Cond Pred = CondARM32::AL) {
    712     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
    713   }
    714   void _movw(Variable *Dest, Operand *Src0,
    715              CondARM32::Cond Pred = CondARM32::AL) {
    716     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
    717   }
    718   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
    719             CondARM32::Cond Pred = CondARM32::AL) {
    720     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
    721   }
    722   void _mvn(Variable *Dest, Operand *Src0,
    723             CondARM32::Cond Pred = CondARM32::AL) {
    724     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
    725   }
    726   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
    727             CondARM32::Cond Pred = CondARM32::AL) {
    728     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
    729   }
    730   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
    731              CondARM32::Cond Pred = CondARM32::AL) {
    732     constexpr bool SetFlags = true;
    733     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
    734     if (SetFlags) {
    735       Context.insert<InstFakeUse>(Dest);
    736     }
    737   }
    738   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
    739   void _pop(const VarList &Dests) {
    740     Context.insert<InstARM32Pop>(Dests);
    741     // Mark dests as modified.
    742     for (Variable *Dest : Dests)
    743       Context.insert<InstFakeDef>(Dest);
    744   }
    745   void _rbit(Variable *Dest, Variable *Src0,
    746              CondARM32::Cond Pred = CondARM32::AL) {
    747     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
    748   }
    749   void _rev(Variable *Dest, Variable *Src0,
    750             CondARM32::Cond Pred = CondARM32::AL) {
    751     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
    752   }
    753   void _ret(Variable *LR, Variable *Src0 = nullptr) {
    754     Context.insert<InstARM32Ret>(LR, Src0);
    755   }
    756   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
    757              CondARM32::Cond Pred = CondARM32::AL) {
    758     constexpr bool SetFlags = true;
    759     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
    760     if (SetFlags) {
    761       Context.insert<InstFakeUse>(Dest);
    762     }
    763   }
    764   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
    765             CondARM32::Cond Pred = CondARM32::AL) {
    766     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
    767   }
    768   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
    769              CondARM32::Cond Pred = CondARM32::AL) {
    770     constexpr bool SetFlags = true;
    771     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
    772     if (SetFlags) {
    773       Context.insert<InstFakeUse>(Dest);
    774     }
    775   }
    776   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
    777             CondARM32::Cond Pred = CondARM32::AL) {
    778     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
    779   }
    780   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
    781             CondARM32::Cond Pred = CondARM32::AL) {
    782     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
    783   }
    784   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
    785              CondARM32::Cond Pred = CondARM32::AL) {
    786     constexpr bool SetFlags = true;
    787     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
    788     if (SetFlags) {
    789       Context.insert<InstFakeUse>(Dest);
    790     }
    791   }
    792   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
    793              CondARM32::Cond Pred = CondARM32::AL) {
    794     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
    795   }
    796   /// _str, for all your Variable to memory transfers. Addr has the same
    797   /// restrictions that it does in _ldr.
    798   void _str(Variable *Value, OperandARM32Mem *Addr,
    799             CondARM32::Cond Pred = CondARM32::AL) {
    800     Context.insert<InstARM32Str>(Value, Addr, Pred);
    801   }
    802   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
    803                          CondARM32::Cond Pred = CondARM32::AL) {
    804     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
    805       Context.insert<InstFakeUse>(Value64->getLo());
    806       Context.insert<InstFakeUse>(Value64->getHi());
    807     }
    808     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
    809   }
    810   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
    811             CondARM32::Cond Pred = CondARM32::AL) {
    812     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
    813   }
    814   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
    815              CondARM32::Cond Pred = CondARM32::AL) {
    816     constexpr bool SetFlags = true;
    817     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
    818     if (SetFlags) {
    819       Context.insert<InstFakeUse>(Dest);
    820     }
    821   }
    822   void _sxt(Variable *Dest, Variable *Src0,
    823             CondARM32::Cond Pred = CondARM32::AL) {
    824     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
    825   }
    826   void _tst(Variable *Src0, Operand *Src1,
    827             CondARM32::Cond Pred = CondARM32::AL) {
    828     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
    829   }
    830   void _trap() { Context.insert<InstARM32Trap>(); }
    831   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
    832              CondARM32::Cond Pred = CondARM32::AL) {
    833     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
    834   }
    835   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
    836               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
    837     // umull requires DestLo and DestHi to be assigned to different GPRs. The
    838     // following lines create overlapping liveness ranges for both variables. If
    839     // either one of them is live, then they are both going to be live, and thus
    840     // assigned to different registers; if they are both dead, then DCE will
    841     // kick in and delete the following three instructions.
    842     Context.insert<InstFakeDef>(DestHi);
    843     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
    844     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
    845     Context.insert<InstFakeUse>(DestHi);
    846   }
    847   void _uxt(Variable *Dest, Variable *Src0,
    848             CondARM32::Cond Pred = CondARM32::AL) {
    849     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
    850   }
    851   void _vabs(Variable *Dest, Variable *Src,
    852              CondARM32::Cond Pred = CondARM32::AL) {
    853     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
    854   }
    855   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
    856     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
    857   }
    858   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
    859     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
    860   }
    861   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
    862     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
    863   }
    864   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
    865     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
    866   }
    867   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
    868     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
    869   }
    870   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
    871     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
    872   }
    873   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
    874              CondARM32::Cond Pred = CondARM32::AL) {
    875     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
    876   }
    877   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
    878     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
    879   }
    880   void _vcmp(Variable *Src0, Variable *Src1,
    881              CondARM32::Cond Pred = CondARM32::AL) {
    882     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
    883   }
    884   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
    885              CondARM32::Cond Pred = CondARM32::AL) {
    886     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
    887   }
    888   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
    889     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
    890   }
    891   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
    892     Context.insert<InstARM32Vmrs>(Pred);
    893   }
    894   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
    895     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
    896   }
    897   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
    898     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
    899   }
    900   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
    901     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
    902   }
    903   void _vmvn(Variable *Dest, Variable *Src0) {
    904     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
    905   }
    906   void _vneg(Variable *Dest, Variable *Src0) {
    907     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
    908         ->setSignType(InstARM32::FS_Signed);
    909   }
    910   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
    911     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
    912   }
    913   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
    914     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
    915   }
    916   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
    917     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
    918         ->setSignType(InstARM32::FS_Unsigned);
    919   }
    920   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
    921                        ConstantInteger32 *Src1) {
    922     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
    923   }
    924   void _vsqrt(Variable *Dest, Variable *Src,
    925               CondARM32::Cond Pred = CondARM32::AL) {
    926     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
    927   }
    928   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
    929     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
    930   }
    931 
    932   // Iterates over the CFG and determines the maximum outgoing stack arguments
    933   // bytes. This information is later used during addProlog() to pre-allocate
    934   // the outargs area.
    935   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
    936   // method that the Parser could call.
    937   void findMaxStackOutArgsSize();
    938 
    939   /// Returns true if the given Offset can be represented in a Load/Store Mem
    940   /// Operand.
    941   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
    942 
    943   void postLowerLegalization();
    944 
    945   /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
    946   /// @{
    947   void createGotPtr();
    948   void insertGotPtrInitPlaceholder();
    949   VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
    950   void materializeGotAddr(CfgNode *Node);
    951   Variable *GotPtr = nullptr;
    952   // TODO(jpp): use CfgLocalAllocator.
    953   /// @}
    954 
    955   /// Manages the Gotoff relocations created during the function lowering. A
    956   /// single Gotoff relocation is created for each global variable used by the
    957   /// function being lowered.
    958   /// @{
    959   // TODO(jpp): if the same global G is used in different functions, then this
    960   // method will emit one G(gotoff) relocation per function.
    961   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
    962   CfgUnorderedSet<GlobalString> KnownGotoffs;
    963   /// @}
    964 
    965   /// Loads the constant relocatable Name to Register. Then invoke Finish to
    966   /// finish the relocatable lowering. Finish **must** use PC in its first
    967   /// emitted instruction, or the relocatable in Register will contain the wrong
    968   /// value.
    969   //
    970   // Lowered sequence:
    971   //
    972   // Movw:
    973   //     movw Register, #:lower16:Name - (End - Movw) - 8 .
    974   // Movt:
    975   //     movt Register, #:upper16:Name - (End - Movt) - 8 .
    976   //     PC = fake-def
    977   // End:
    978   //     Finish(PC)
    979   //
    980   // The -8 in movw/movt above is to account for the PC value that the first
    981   // instruction emitted by Finish(PC) will read.
    982   void
    983   loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
    984                                   std::function<void(Variable *PC)> Finish);
    985 
    986   /// Sandboxer defines methods for ensuring that "dangerous" operations are
    987   /// masked during sandboxed code emission. For regular, non-sandboxed code
    988   /// emission, its methods are simple pass-through methods.
    989   ///
    990   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
    991   /// in the constructor/destructor during sandboxed code emission. Therefore,
    992   /// it is a bad idea to create an object of this type and "keep it around."
    993   /// The recommended usage is:
    994   ///
    995   /// AutoSandboxing(this).<<operation>>(...);
    996   ///
    997   /// This usage ensures that no other instructions are inadvertently added to
    998   /// the bundle.
    999   class Sandboxer {
   1000     Sandboxer() = delete;
   1001     Sandboxer(const Sandboxer &) = delete;
   1002     Sandboxer &operator=(const Sandboxer &) = delete;
   1003 
   1004   public:
   1005     explicit Sandboxer(
   1006         TargetARM32 *Target,
   1007         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
   1008     ~Sandboxer();
   1009 
   1010     /// Increments sp:
   1011     ///
   1012     ///   add sp, sp, AddAmount
   1013     ///   bic sp, sp, 0xc0000000
   1014     ///
   1015     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
   1016     void add_sp(Operand *AddAmount);
   1017 
   1018     /// Emits code to align sp to the specified alignment:
   1019     ///
   1020     ///   bic/and sp, sp, Alignment
   1021     ///   bic, sp, sp, 0xc0000000
   1022     void align_sp(size_t Alignment);
   1023 
   1024     /// Emits a call instruction. If CallTarget is a Variable, it emits
   1025     ///
   1026     ///   bic CallTarget, CallTarget, 0xc000000f
   1027     ///   bl CallTarget
   1028     ///
   1029     /// Otherwise, it emits
   1030     ///
   1031     ///   bl CallTarget
   1032     ///
   1033     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
   1034     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
   1035 
   1036     /// Emits a load:
   1037     ///
   1038     ///   bic rBase, rBase, 0xc0000000
   1039     ///   ldr rDest, [rBase, #Offset]
   1040     ///
   1041     /// Exception: if rBase is r9 or sp, then the load is emitted as:
   1042     ///
   1043     ///   ldr rDest, [rBase, #Offset]
   1044     ///
   1045     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1046     /// always valid.
   1047     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1048 
   1049     /// Emits a load exclusive:
   1050     ///
   1051     ///   bic rBase, rBase, 0xc0000000
   1052     ///   ldrex rDest, [rBase]
   1053     ///
   1054     /// Exception: if rBase is r9 or sp, then the load is emitted as:
   1055     ///
   1056     ///   ldrex rDest, [rBase]
   1057     ///
   1058     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1059     /// always valid.
   1060     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1061 
   1062     /// Resets sp to Src:
   1063     ///
   1064     ///   mov sp, Src
   1065     ///   bic sp, sp, 0xc0000000
   1066     void reset_sp(Variable *Src);
   1067 
   1068     /// Emits code to return from a function:
   1069     ///
   1070     ///   bic lr, lr, 0xc000000f
   1071     ///   bx lr
   1072     void ret(Variable *RetAddr, Variable *RetValue);
   1073 
   1074     /// Emits a store:
   1075     ///
   1076     ///   bic rBase, rBase, 0xc0000000
   1077     ///   str rSrc, [rBase, #Offset]
   1078     ///
   1079     /// Exception: if rBase is r9 or sp, then the store is emitted as:
   1080     ///
   1081     ///   str rDest, [rBase, #Offset]
   1082     ///
   1083     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1084     /// always valid.
   1085     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
   1086 
   1087     /// Emits a store exclusive:
   1088     ///
   1089     ///   bic rBase, rBase, 0xc0000000
   1090     ///   strex rDest, rSrc, [rBase]
   1091     ///
   1092     /// Exception: if rBase is r9 or sp, then the store is emitted as:
   1093     ///
   1094     ///   strex rDest, rSrc, [rBase]
   1095     ///
   1096     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
   1097     /// always valid.
   1098     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
   1099                CondARM32::Cond Pred);
   1100 
   1101     /// Decrements sp:
   1102     ///
   1103     ///   sub sp, sp, SubAmount
   1104     ///   bic sp, sp, 0xc0000000
   1105     void sub_sp(Operand *SubAmount);
   1106 
   1107   private:
   1108     TargetARM32 *const Target;
   1109     const InstBundleLock::Option BundleOption;
   1110     std::unique_ptr<AutoBundle> Bundler;
   1111 
   1112     void createAutoBundle();
   1113   };
   1114 
   1115   class PostLoweringLegalizer {
   1116     PostLoweringLegalizer() = delete;
   1117     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
   1118     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
   1119 
   1120   public:
   1121     explicit PostLoweringLegalizer(TargetARM32 *Target)
   1122         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
   1123                               Target->getFrameOrStackReg())) {}
   1124 
   1125     void resetTempBaseIfClobberedBy(const Inst *Instr);
   1126 
   1127     // Ensures that the TempBase register held by the this legalizer (if any) is
   1128     // assigned to IP.
   1129     void assertNoTempOrAssignedToIP() const {
   1130       assert(TempBaseReg == nullptr ||
   1131              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
   1132     }
   1133 
   1134     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
   1135     // fixed up.
   1136     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
   1137                                         bool AllowOffsets = true);
   1138 
   1139     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
   1140     /// if its Source is a Rematerializable variable (this form is used in lieu
   1141     /// of lea, which is not available in ARM.)
   1142     ///
   1143     /// Moves to memory become store instructions, and moves from memory, loads.
   1144     void legalizeMov(InstARM32Mov *Mov);
   1145 
   1146   private:
   1147     /// Creates a new Base register centered around [Base, +/- Offset].
   1148     Variable *newBaseRegister(Variable *Base, int32_t Offset,
   1149                               RegNumT ScratchRegNum);
   1150 
   1151     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
   1152     /// The returned mem operand is a legal operand for accessing memory that is
   1153     /// of type Ty.
   1154     ///
   1155     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
   1156     /// expressing it. Otherwise,
   1157     ///
   1158     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
   1159     /// method will return that. Otherwise,
   1160     ///
   1161     /// a new base register ip=Base+Offset is created, and the method returns a
   1162     /// memory operand expressing [ip, #0].
   1163     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
   1164                                       bool AllowOffsets = true);
   1165     TargetARM32 *const Target;
   1166     Variable *const StackOrFrameReg;
   1167     Variable *TempBaseReg = nullptr;
   1168     int32_t TempBaseOffset = 0;
   1169   };
   1170 
   1171   const bool NeedSandboxing;
   1172   TargetARM32Features CPUFeatures;
   1173   bool UsesFramePointer = false;
   1174   bool NeedsStackAlignment = false;
   1175   bool MaybeLeafFunc = true;
   1176   size_t SpillAreaSizeBytes = 0;
   1177   size_t FixedAllocaSizeBytes = 0;
   1178   size_t FixedAllocaAlignBytes = 0;
   1179   bool PrologEmitsFixedAllocas = false;
   1180   uint32_t MaxOutArgsSizeBytes = 0;
   1181   // TODO(jpp): std::array instead of array.
   1182   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
   1183   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
   1184   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
   1185   SmallBitVector RegsUsed;
   1186   VarList PhysicalRegisters[IceType_NUM];
   1187   VarList PreservedGPRs;
   1188   VarList PreservedSRegs;
   1189 
   1190   /// Helper class that understands the Calling Convention and register
   1191   /// assignments. The first few integer type parameters can use r0-r3,
   1192   /// regardless of their position relative to the floating-point/vector
   1193   /// arguments in the argument list. Floating-point and vector arguments
   1194   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
   1195   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
   1196   ///
   1197   /// Technically, arguments that can start with registers but extend beyond the
   1198   /// available registers can be split between the registers and the stack.
   1199   /// However, this is typically  for passing GPR structs by value, and PNaCl
   1200   /// transforms expand this out.
   1201   ///
   1202   /// At (public) function entry, the stack must be 8-byte aligned.
   1203   class CallingConv {
   1204     CallingConv(const CallingConv &) = delete;
   1205     CallingConv &operator=(const CallingConv &) = delete;
   1206 
   1207   public:
   1208     CallingConv();
   1209     ~CallingConv() = default;
   1210 
   1211     /// argInGPR returns true if there is a GPR available for the requested
   1212     /// type, and false otherwise. If it returns true, Reg is set to the
   1213     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
   1214     /// be an I64 register pair.
   1215     bool argInGPR(Type Ty, RegNumT *Reg);
   1216 
   1217     /// argInVFP is to floating-point/vector types what argInGPR is for integer
   1218     /// types.
   1219     bool argInVFP(Type Ty, RegNumT *Reg);
   1220 
   1221   private:
   1222     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
   1223     SmallBitVector GPRegsUsed;
   1224     CfgVector<RegNumT> GPRArgs;
   1225     CfgVector<RegNumT> I64Args;
   1226 
   1227     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
   1228     SmallBitVector VFPRegsUsed;
   1229     CfgVector<RegNumT> FP32Args;
   1230     CfgVector<RegNumT> FP64Args;
   1231     CfgVector<RegNumT> Vec128Args;
   1232   };
   1233 
   1234 private:
   1235   ENABLE_MAKE_UNIQUE;
   1236 
   1237   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
   1238                                       Operand *Base);
   1239 
   1240   void postambleCtpop64(const InstCall *Instr);
   1241   void preambleDivRem(const InstCall *Instr);
   1242   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
   1243       ARM32HelpersPreamble;
   1244   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
   1245       ARM32HelpersPostamble;
   1246 
   1247   class ComputationTracker {
   1248   public:
   1249     ComputationTracker() = default;
   1250     ~ComputationTracker() = default;
   1251 
   1252     void forgetProducers() { KnownComputations.clear(); }
   1253     void recordProducers(CfgNode *Node);
   1254 
   1255     const Inst *getProducerOf(const Operand *Opnd) const {
   1256       auto *Var = llvm::dyn_cast<Variable>(Opnd);
   1257       if (Var == nullptr) {
   1258         return nullptr;
   1259       }
   1260 
   1261       auto Iter = KnownComputations.find(Var->getIndex());
   1262       if (Iter == KnownComputations.end()) {
   1263         return nullptr;
   1264       }
   1265 
   1266       return Iter->second.Instr;
   1267     }
   1268 
   1269     void dump(const Cfg *Func) const {
   1270       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
   1271         return;
   1272       OstreamLocker L(Func->getContext());
   1273       Ostream &Str = Func->getContext()->getStrDump();
   1274       Str << "foldable producer:\n";
   1275       for (const auto &Computation : KnownComputations) {
   1276         Str << "    ";
   1277         Computation.second.Instr->dump(Func);
   1278         Str << "\n";
   1279       }
   1280       Str << "\n";
   1281     }
   1282 
   1283   private:
   1284     class ComputationEntry {
   1285     public:
   1286       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
   1287       Inst *const Instr;
   1288       // Boolean folding is disabled for variables whose live range is multi
   1289       // block. We conservatively initialize IsLiveOut to true, and set it to
   1290       // false once we find the end of the live range for the variable defined
   1291       // by this instruction. If liveness analysis is not performed (e.g., in
   1292       // Om1 mode) IsLiveOut will never be set to false, and folding will be
   1293       // disabled.
   1294       bool IsLiveOut = true;
   1295       int32_t NumUses = 0;
   1296       Type ComputationType;
   1297     };
   1298 
   1299     // ComputationMap maps a Variable number to a payload identifying which
   1300     // instruction defined it.
   1301     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
   1302     ComputationMap KnownComputations;
   1303   };
   1304 
   1305   ComputationTracker Computations;
   1306 
   1307   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
   1308   // without specifying a physical register. This is needed for creating unbound
   1309   // temporaries during Ice -> ARM lowering, but before register allocation.
   1310   // This a safe-guard that no unbound temporaries are created during the
   1311   // legalization post-passes.
   1312   bool AllowTemporaryWithNoReg = true;
   1313   // ForbidTemporaryWithoutReg is a RAII class that manages
   1314   // AllowTemporaryWithNoReg.
   1315   class ForbidTemporaryWithoutReg {
   1316     ForbidTemporaryWithoutReg() = delete;
   1317     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
   1318     ForbidTemporaryWithoutReg &
   1319     operator=(const ForbidTemporaryWithoutReg &) = delete;
   1320 
   1321   public:
   1322     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
   1323       Target->AllowTemporaryWithNoReg = false;
   1324     }
   1325     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
   1326 
   1327   private:
   1328     TargetARM32 *const Target;
   1329   };
   1330 };
   1331 
   1332 class TargetDataARM32 final : public TargetDataLowering {
   1333   TargetDataARM32() = delete;
   1334   TargetDataARM32(const TargetDataARM32 &) = delete;
   1335   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
   1336 
   1337 public:
   1338   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
   1339     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
   1340   }
   1341 
   1342   void lowerGlobals(const VariableDeclarationList &Vars,
   1343                     const std::string &SectionSuffix) override;
   1344   void lowerConstants() override;
   1345   void lowerJumpTables() override;
   1346 
   1347 protected:
   1348   explicit TargetDataARM32(GlobalContext *Ctx);
   1349 
   1350 private:
   1351   ~TargetDataARM32() override = default;
   1352 };
   1353 
   1354 class TargetHeaderARM32 final : public TargetHeaderLowering {
   1355   TargetHeaderARM32() = delete;
   1356   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
   1357   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
   1358 
   1359 public:
   1360   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
   1361     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
   1362   }
   1363 
   1364   void lower() override;
   1365 
   1366 protected:
   1367   explicit TargetHeaderARM32(GlobalContext *Ctx);
   1368 
   1369 private:
   1370   ~TargetHeaderARM32() = default;
   1371 
   1372   TargetARM32Features CPUFeatures;
   1373 };
   1374 
   1375 } // end of namespace ARM32
   1376 } // end of namespace Ice
   1377 
   1378 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
   1379