Home | History | Annotate | Download | only in src
      1 //===- subzero/src/IceAssemblerARM32.cpp - Assembler for ARM32 --*- C++ -*-===//
      2 //
      3 // Copyright (c) 2013, the Dart project authors.  Please see the AUTHORS file
      4 // for details. All rights reserved. Use of this source code is governed by a
      5 // BSD-style license that can be found in the LICENSE file.
      6 //
      7 // Modified by the Subzero authors.
      8 //
      9 //===----------------------------------------------------------------------===//
     10 //
     11 //                        The Subzero Code Generator
     12 //
     13 // This file is distributed under the University of Illinois Open Source
     14 // License. See LICENSE.TXT for details.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 ///
     18 /// \file
     19 /// \brief Implements the Assembler class for ARM32.
     20 ///
     21 //===----------------------------------------------------------------------===//
     22 
     23 #include "IceAssemblerARM32.h"
     24 #include "IceCfgNode.h"
     25 #include "IceUtils.h"
     26 
     27 namespace {
     28 
     29 using namespace Ice;
     30 using namespace Ice::ARM32;
     31 
     32 using WordType = uint32_t;
     33 static constexpr IValueT kWordSize = sizeof(WordType);
     34 
     35 // The following define individual bits.
     36 static constexpr IValueT B0 = 1;
     37 static constexpr IValueT B1 = 1 << 1;
     38 static constexpr IValueT B2 = 1 << 2;
     39 static constexpr IValueT B3 = 1 << 3;
     40 static constexpr IValueT B4 = 1 << 4;
     41 static constexpr IValueT B5 = 1 << 5;
     42 static constexpr IValueT B6 = 1 << 6;
     43 static constexpr IValueT B7 = 1 << 7;
     44 static constexpr IValueT B8 = 1 << 8;
     45 static constexpr IValueT B9 = 1 << 9;
     46 static constexpr IValueT B10 = 1 << 10;
     47 static constexpr IValueT B11 = 1 << 11;
     48 static constexpr IValueT B12 = 1 << 12;
     49 static constexpr IValueT B13 = 1 << 13;
     50 static constexpr IValueT B14 = 1 << 14;
     51 static constexpr IValueT B15 = 1 << 15;
     52 static constexpr IValueT B16 = 1 << 16;
     53 static constexpr IValueT B17 = 1 << 17;
     54 static constexpr IValueT B18 = 1 << 18;
     55 static constexpr IValueT B19 = 1 << 19;
     56 static constexpr IValueT B20 = 1 << 20;
     57 static constexpr IValueT B21 = 1 << 21;
     58 static constexpr IValueT B22 = 1 << 22;
     59 static constexpr IValueT B23 = 1 << 23;
     60 static constexpr IValueT B24 = 1 << 24;
     61 static constexpr IValueT B25 = 1 << 25;
     62 static constexpr IValueT B26 = 1 << 26;
     63 static constexpr IValueT B27 = 1 << 27;
     64 
     65 // Constants used for the decoding or encoding of the individual fields of
     66 // instructions. Based on ARM section A5.1.
     67 static constexpr IValueT L = 1 << 20; // load (or store)
     68 static constexpr IValueT W = 1 << 21; // writeback base register
     69                                       // (or leave unchanged)
     70 static constexpr IValueT B = 1 << 22; // unsigned byte (or word)
     71 static constexpr IValueT U = 1 << 23; // positive (or negative)
     72                                       // offset/index
     73 static constexpr IValueT P = 1 << 24; // offset/pre-indexed
     74                                       // addressing (or
     75                                       // post-indexed addressing)
     76 
     77 static constexpr IValueT kConditionShift = 28;
     78 static constexpr IValueT kLinkShift = 24;
     79 static constexpr IValueT kOpcodeShift = 21;
     80 static constexpr IValueT kRdShift = 12;
     81 static constexpr IValueT kRmShift = 0;
     82 static constexpr IValueT kRnShift = 16;
     83 static constexpr IValueT kRsShift = 8;
     84 static constexpr IValueT kSShift = 20;
     85 static constexpr IValueT kTypeShift = 25;
     86 
     87 // Immediate instruction fields encoding.
     88 static constexpr IValueT kImmed8Bits = 8;
     89 static constexpr IValueT kImmed8Shift = 0;
     90 static constexpr IValueT kRotateBits = 4;
     91 static constexpr IValueT kRotateShift = 8;
     92 
     93 // Shift instruction register fields encodings.
     94 static constexpr IValueT kShiftImmShift = 7;
     95 static constexpr IValueT kShiftImmBits = 5;
     96 static constexpr IValueT kShiftShift = 5;
     97 static constexpr IValueT kImmed12Bits = 12;
     98 static constexpr IValueT kImm12Shift = 0;
     99 
    100 // Rotation instructions (uxtb etc.).
    101 static constexpr IValueT kRotationShift = 10;
    102 
    103 // MemEx instructions.
    104 static constexpr IValueT kMemExOpcodeShift = 20;
    105 
    106 // Div instruction register field encodings.
    107 static constexpr IValueT kDivRdShift = 16;
    108 static constexpr IValueT kDivRmShift = 8;
    109 static constexpr IValueT kDivRnShift = 0;
    110 
    111 // Type of instruction encoding (bits 25-27). See ARM section A5.1
    112 static constexpr IValueT kInstTypeDataRegister = 0;  // i.e. 000
    113 static constexpr IValueT kInstTypeDataRegShift = 0;  // i.e. 000
    114 static constexpr IValueT kInstTypeDataImmediate = 1; // i.e. 001
    115 static constexpr IValueT kInstTypeMemImmediate = 2;  // i.e. 010
    116 static constexpr IValueT kInstTypeRegisterShift = 3; // i.e. 011
    117 
    118 // Limit on number of registers in a vpush/vpop.
    119 static constexpr SizeT VpushVpopMaxConsecRegs = 16;
    120 
    121 // Offset modifier to current PC for next instruction.  The offset is off by 8
    122 // due to the way the ARM CPUs read PC.
    123 static constexpr IOffsetT kPCReadOffset = 8;
    124 
    125 // Mask to pull out PC offset from branch (b) instruction.
    126 static constexpr int kBranchOffsetBits = 24;
    127 static constexpr IOffsetT kBranchOffsetMask = 0x00ffffff;
    128 
    129 IValueT encodeBool(bool B) { return B ? 1 : 0; }
    130 
    131 IValueT encodeRotation(ARM32::AssemblerARM32::RotationValue Value) {
    132   return static_cast<IValueT>(Value);
    133 }
    134 
    135 IValueT encodeGPRRegister(RegARM32::GPRRegister Rn) {
    136   return static_cast<IValueT>(Rn);
    137 }
    138 
    139 RegARM32::GPRRegister decodeGPRRegister(IValueT R) {
    140   return static_cast<RegARM32::GPRRegister>(R);
    141 }
    142 
    143 IValueT encodeCondition(CondARM32::Cond Cond) {
    144   return static_cast<IValueT>(Cond);
    145 }
    146 
    147 IValueT encodeShift(OperandARM32::ShiftKind Shift) {
    148   // Follows encoding in ARM section A8.4.1 "Constant shifts".
    149   switch (Shift) {
    150   case OperandARM32::kNoShift:
    151   case OperandARM32::LSL:
    152     return 0; // 0b00
    153   case OperandARM32::LSR:
    154     return 1; // 0b01
    155   case OperandARM32::ASR:
    156     return 2; // 0b10
    157   case OperandARM32::ROR:
    158   case OperandARM32::RRX:
    159     return 3; // 0b11
    160   }
    161   llvm::report_fatal_error("Unknown Shift value");
    162   return 0;
    163 }
    164 
    165 // Returns the bits in the corresponding masked value.
    166 IValueT mask(IValueT Value, IValueT Shift, IValueT Bits) {
    167   return (Value >> Shift) & ((1 << Bits) - 1);
    168 }
    169 
    170 // Extract out a Bit in Value.
    171 bool isBitSet(IValueT Bit, IValueT Value) { return (Value & Bit) == Bit; }
    172 
    173 // Returns the GPR register at given Shift in Value.
    174 RegARM32::GPRRegister getGPRReg(IValueT Shift, IValueT Value) {
    175   return decodeGPRRegister((Value >> Shift) & 0xF);
    176 }
    177 
    178 IValueT getEncodedGPRegNum(const Variable *Var) {
    179   assert(Var->hasReg());
    180   const auto Reg = Var->getRegNum();
    181   return llvm::isa<Variable64On32>(Var) ? RegARM32::getI64PairFirstGPRNum(Reg)
    182                                         : RegARM32::getEncodedGPR(Reg);
    183 }
    184 
    185 IValueT getEncodedSRegNum(const Variable *Var) {
    186   assert(Var->hasReg());
    187   return RegARM32::getEncodedSReg(Var->getRegNum());
    188 }
    189 
    190 IValueT getEncodedDRegNum(const Variable *Var) {
    191   return RegARM32::getEncodedDReg(Var->getRegNum());
    192 }
    193 
    194 IValueT getEncodedQRegNum(const Variable *Var) {
    195   return RegARM32::getEncodedQReg(Var->getRegNum());
    196 }
    197 
    198 IValueT mapQRegToDReg(IValueT EncodedQReg) {
    199   IValueT DReg = EncodedQReg << 1;
    200   assert(DReg < RegARM32::getNumDRegs());
    201   return DReg;
    202 }
    203 
    204 IValueT mapQRegToSReg(IValueT EncodedQReg) {
    205   IValueT SReg = EncodedQReg << 2;
    206   assert(SReg < RegARM32::getNumSRegs());
    207   return SReg;
    208 }
    209 
    210 IValueT getYInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY & 0x1; }
    211 
    212 IValueT getXXXXInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY >> 1; }
    213 
    214 IValueT getYInRegYXXXX(IValueT RegYXXXX) { return RegYXXXX >> 4; }
    215 
    216 IValueT getXXXXInRegYXXXX(IValueT RegYXXXX) { return RegYXXXX & 0x0f; }
    217 
    218 // Figures out Op/Cmode values for given Value. Returns true if able to encode.
    219 bool encodeAdvSIMDExpandImm(IValueT Value, Type ElmtTy, IValueT &Op,
    220                             IValueT &Cmode, IValueT &Imm8) {
    221   // TODO(kschimpf): Handle other shifted 8-bit values.
    222   constexpr IValueT Imm8Mask = 0xFF;
    223   if ((Value & IValueT(~Imm8Mask)) != 0)
    224     return false;
    225   Imm8 = Value;
    226   switch (ElmtTy) {
    227   case IceType_i8:
    228     Op = 0;
    229     Cmode = 14; // 0b1110
    230     return true;
    231   case IceType_i16:
    232     Op = 0;
    233     Cmode = 8; // 0b1000
    234     return true;
    235   case IceType_i32:
    236     Op = 0;
    237     Cmode = 0; // 0b0000
    238     return true;
    239   default:
    240     return false;
    241   }
    242 }
    243 
    244 // Defines layouts of an operand representing a (register) memory address,
    245 // possibly modified by an immediate value.
    246 enum EncodedImmAddress {
    247   // Address modified by a rotated immediate 8-bit value.
    248   RotatedImm8Address,
    249 
    250   // Alternate encoding for RotatedImm8Address, where the offset is divided by 4
    251   // before encoding.
    252   RotatedImm8Div4Address,
    253 
    254   // Address modified by an immediate 12-bit value.
    255   Imm12Address,
    256 
    257   // Alternate encoding 3, for an address modified by a rotated immediate 8-bit
    258   // value.
    259   RotatedImm8Enc3Address,
    260 
    261   // Encoding where no immediate offset is used.
    262   NoImmOffsetAddress
    263 };
    264 
    265 // The way an operand is encoded into a sequence of bits in functions
    266 // encodeOperand and encodeAddress below.
    267 enum EncodedOperand {
    268   // Unable to encode, value left undefined.
    269   CantEncode = 0,
    270 
    271   // Value is register found.
    272   EncodedAsRegister,
    273 
    274   // Value=rrrriiiiiiii where rrrr is the rotation, and iiiiiiii is the imm8
    275   // value.
    276   EncodedAsRotatedImm8,
    277 
    278   // EncodedAsImmRegOffset is a memory operand that can take three forms, based
    279   // on type EncodedImmAddress:
    280   //
    281   // ***** RotatedImm8Address *****
    282   //
    283   // Value=0000000pu0w0nnnn0000iiiiiiiiiiii where nnnn is the base register Rn,
    284   // p=1 if pre-indexed addressing, u=1 if offset positive, w=1 if writeback to
    285   // Rn should be used, and iiiiiiiiiiii defines the rotated Imm8 value.
    286   //
    287   // ***** RotatedImm8Div4Address *****
    288   //
    289   // Value=00000000pu0w0nnnn0000iiii0000jjjj where nnnn=Rn, iiiijjjj=Imm8, p=1
    290   // if pre-indexed addressing, u=1 if offset positive, and w=1 if writeback to
    291   // Rn.
    292   //
    293   // ***** Imm12Address *****
    294   //
    295   // Value=0000000pu0w0nnnn0000iiiiiiiiiiii where nnnn is the base register Rn,
    296   // p=1 if pre-indexed addressing, u=1 if offset positive, w=1 if writeback to
    297   // Rn should be used, and iiiiiiiiiiii defines the immediate 12-bit value.
    298   //
    299   // ***** NoImmOffsetAddress *****
    300   //
    301   // Value=000000001000nnnn0000000000000000 where nnnn=Rn.
    302   EncodedAsImmRegOffset,
    303 
    304   // Value=0000000pu0w00nnnnttttiiiiiss0mmmm where nnnn is the base register Rn,
    305   // mmmm is the index register Rm, iiiii is the shift amount, ss is the shift
    306   // kind, p=1 if pre-indexed addressing, u=1 if offset positive, and w=1 if
    307   // writeback to Rn.
    308   EncodedAsShiftRotateImm5,
    309 
    310   // Value=000000000000000000000iiiii0000000 where iiii defines the Imm5 value
    311   // to shift.
    312   EncodedAsShiftImm5,
    313 
    314   // Value=iiiiiss0mmmm where mmmm is the register to rotate, ss is the shift
    315   // kind, and iiiii is the shift amount.
    316   EncodedAsShiftedRegister,
    317 
    318   // Value=ssss0tt1mmmm where mmmm=Rm, tt is an encoded ShiftKind, and ssss=Rms.
    319   EncodedAsRegShiftReg,
    320 
    321   // Value is 32bit integer constant.
    322   EncodedAsConstI32
    323 };
    324 
    325 // Sets Encoding to a rotated Imm8 encoding of Value, if possible.
    326 IValueT encodeRotatedImm8(IValueT RotateAmt, IValueT Immed8) {
    327   assert(RotateAmt < (1 << kRotateBits));
    328   assert(Immed8 < (1 << kImmed8Bits));
    329   return (RotateAmt << kRotateShift) | (Immed8 << kImmed8Shift);
    330 }
    331 
    332 // Encodes iiiiitt0mmmm for data-processing (2nd) operands where iiiii=Imm5,
    333 // tt=Shift, and mmmm=Rm.
    334 IValueT encodeShiftRotateImm5(IValueT Rm, OperandARM32::ShiftKind Shift,
    335                               IOffsetT imm5) {
    336   (void)kShiftImmBits;
    337   assert(imm5 < (1 << kShiftImmBits));
    338   return (imm5 << kShiftImmShift) | (encodeShift(Shift) << kShiftShift) | Rm;
    339 }
    340 
    341 // Encodes mmmmtt01ssss for data-processing operands where mmmm=Rm, ssss=Rs, and
    342 // tt=Shift.
    343 IValueT encodeShiftRotateReg(IValueT Rm, OperandARM32::ShiftKind Shift,
    344                              IValueT Rs) {
    345   return (Rs << kRsShift) | (encodeShift(Shift) << kShiftShift) | B4 |
    346          (Rm << kRmShift);
    347 }
    348 
    349 // Defines the set of registers expected in an operand.
    350 enum RegSetWanted { WantGPRegs, WantSRegs, WantDRegs, WantQRegs };
    351 
    352 EncodedOperand encodeOperand(const Operand *Opnd, IValueT &Value,
    353                              RegSetWanted WantedRegSet) {
    354   Value = 0; // Make sure initialized.
    355   if (const auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
    356     if (Var->hasReg()) {
    357       switch (WantedRegSet) {
    358       case WantGPRegs:
    359         Value = getEncodedGPRegNum(Var);
    360         break;
    361       case WantSRegs:
    362         Value = getEncodedSRegNum(Var);
    363         break;
    364       case WantDRegs:
    365         Value = getEncodedDRegNum(Var);
    366         break;
    367       case WantQRegs:
    368         Value = getEncodedQRegNum(Var);
    369         break;
    370       }
    371       return EncodedAsRegister;
    372     }
    373     return CantEncode;
    374   }
    375   if (const auto *FlexImm = llvm::dyn_cast<OperandARM32FlexImm>(Opnd)) {
    376     const IValueT Immed8 = FlexImm->getImm();
    377     const IValueT Rotate = FlexImm->getRotateAmt();
    378     if (!((Rotate < (1 << kRotateBits)) && (Immed8 < (1 << kImmed8Bits))))
    379       return CantEncode;
    380     Value = (Rotate << kRotateShift) | (Immed8 << kImmed8Shift);
    381     return EncodedAsRotatedImm8;
    382   }
    383   if (const auto *Const = llvm::dyn_cast<ConstantInteger32>(Opnd)) {
    384     Value = Const->getValue();
    385     return EncodedAsConstI32;
    386   }
    387   if (const auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Opnd)) {
    388     Operand *Amt = FlexReg->getShiftAmt();
    389     IValueT Rm;
    390     if (encodeOperand(FlexReg->getReg(), Rm, WantGPRegs) != EncodedAsRegister)
    391       return CantEncode;
    392     if (const auto *Var = llvm::dyn_cast<Variable>(Amt)) {
    393       IValueT Rs;
    394       if (encodeOperand(Var, Rs, WantGPRegs) != EncodedAsRegister)
    395         return CantEncode;
    396       Value = encodeShiftRotateReg(Rm, FlexReg->getShiftOp(), Rs);
    397       return EncodedAsRegShiftReg;
    398     }
    399     // If reached, the amount is a shifted amount by some 5-bit immediate.
    400     uint32_t Imm5;
    401     if (const auto *ShAmt = llvm::dyn_cast<OperandARM32ShAmtImm>(Amt)) {
    402       Imm5 = ShAmt->getShAmtImm();
    403     } else if (const auto *IntConst = llvm::dyn_cast<ConstantInteger32>(Amt)) {
    404       int32_t Val = IntConst->getValue();
    405       if (Val < 0)
    406         return CantEncode;
    407       Imm5 = static_cast<uint32_t>(Val);
    408     } else
    409       return CantEncode;
    410     Value = encodeShiftRotateImm5(Rm, FlexReg->getShiftOp(), Imm5);
    411     return EncodedAsShiftedRegister;
    412   }
    413   if (const auto *ShImm = llvm::dyn_cast<OperandARM32ShAmtImm>(Opnd)) {
    414     const IValueT Immed5 = ShImm->getShAmtImm();
    415     assert(Immed5 < (1 << kShiftImmBits));
    416     Value = (Immed5 << kShiftImmShift);
    417     return EncodedAsShiftImm5;
    418   }
    419   return CantEncode;
    420 }
    421 
    422 IValueT encodeImmRegOffset(IValueT Reg, IOffsetT Offset,
    423                            OperandARM32Mem::AddrMode Mode, IOffsetT MaxOffset,
    424                            IValueT OffsetShift) {
    425   IValueT Value = Mode | (Reg << kRnShift);
    426   if (Offset < 0) {
    427     Offset = -Offset;
    428     Value ^= U; // Flip U to adjust sign.
    429   }
    430   assert(Offset <= MaxOffset);
    431   (void)MaxOffset;
    432   return Value | (Offset >> OffsetShift);
    433 }
    434 
    435 // Encodes immediate register offset using encoding 3.
    436 IValueT encodeImmRegOffsetEnc3(IValueT Rn, IOffsetT Imm8,
    437                                OperandARM32Mem::AddrMode Mode) {
    438   IValueT Value = Mode | (Rn << kRnShift);
    439   if (Imm8 < 0) {
    440     Imm8 = -Imm8;
    441     Value = (Value ^ U);
    442   }
    443   assert(Imm8 < (1 << 8));
    444   Value = Value | B22 | ((Imm8 & 0xf0) << 4) | (Imm8 & 0x0f);
    445   return Value;
    446 }
    447 
    448 IValueT encodeImmRegOffset(EncodedImmAddress ImmEncoding, IValueT Reg,
    449                            IOffsetT Offset, OperandARM32Mem::AddrMode Mode) {
    450   switch (ImmEncoding) {
    451   case RotatedImm8Address: {
    452     constexpr IOffsetT MaxOffset = (1 << 8) - 1;
    453     constexpr IValueT NoRightShift = 0;
    454     return encodeImmRegOffset(Reg, Offset, Mode, MaxOffset, NoRightShift);
    455   }
    456   case RotatedImm8Div4Address: {
    457     assert((Offset & 0x3) == 0);
    458     constexpr IOffsetT MaxOffset = (1 << 8) - 1;
    459     constexpr IValueT RightShift2 = 2;
    460     return encodeImmRegOffset(Reg, Offset, Mode, MaxOffset, RightShift2);
    461   }
    462   case Imm12Address: {
    463     constexpr IOffsetT MaxOffset = (1 << 12) - 1;
    464     constexpr IValueT NoRightShift = 0;
    465     return encodeImmRegOffset(Reg, Offset, Mode, MaxOffset, NoRightShift);
    466   }
    467   case RotatedImm8Enc3Address:
    468     return encodeImmRegOffsetEnc3(Reg, Offset, Mode);
    469   case NoImmOffsetAddress: {
    470     assert(Offset == 0);
    471     assert(Mode == OperandARM32Mem::Offset);
    472     return Reg << kRnShift;
    473   }
    474   }
    475   llvm_unreachable("(silence g++ warning)");
    476 }
    477 
    478 // Encodes memory address Opnd, and encodes that information into Value, based
    479 // on how ARM represents the address. Returns how the value was encoded.
    480 EncodedOperand encodeAddress(const Operand *Opnd, IValueT &Value,
    481                              const AssemblerARM32::TargetInfo &TInfo,
    482                              EncodedImmAddress ImmEncoding) {
    483   Value = 0; // Make sure initialized.
    484   if (const auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
    485     // Should be a stack variable, with an offset.
    486     if (Var->hasReg())
    487       return CantEncode;
    488     IOffsetT Offset = Var->getStackOffset();
    489     if (!Utils::IsAbsoluteUint(12, Offset))
    490       return CantEncode;
    491     const auto BaseRegNum =
    492         Var->hasReg() ? Var->getBaseRegNum() : TInfo.FrameOrStackReg;
    493     Value = encodeImmRegOffset(ImmEncoding, BaseRegNum, Offset,
    494                                OperandARM32Mem::Offset);
    495     return EncodedAsImmRegOffset;
    496   }
    497   if (const auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Opnd)) {
    498     Variable *Var = Mem->getBase();
    499     if (!Var->hasReg())
    500       return CantEncode;
    501     IValueT Rn = getEncodedGPRegNum(Var);
    502     if (Mem->isRegReg()) {
    503       const Variable *Index = Mem->getIndex();
    504       if (Var == nullptr)
    505         return CantEncode;
    506       Value = (Rn << kRnShift) | Mem->getAddrMode() |
    507               encodeShiftRotateImm5(getEncodedGPRegNum(Index),
    508                                     Mem->getShiftOp(), Mem->getShiftAmt());
    509       return EncodedAsShiftRotateImm5;
    510     }
    511     // Encoded as immediate register offset.
    512     ConstantInteger32 *Offset = Mem->getOffset();
    513     Value = encodeImmRegOffset(ImmEncoding, Rn, Offset->getValue(),
    514                                Mem->getAddrMode());
    515     return EncodedAsImmRegOffset;
    516   }
    517   return CantEncode;
    518 }
    519 
    520 // Checks that Offset can fit in imm24 constant of branch (b) instruction.
    521 void assertCanEncodeBranchOffset(IOffsetT Offset) {
    522   (void)Offset;
    523   (void)kBranchOffsetBits;
    524   assert(Utils::IsAligned(Offset, 4) &&
    525          Utils::IsInt(kBranchOffsetBits, Offset >> 2));
    526 }
    527 
    528 IValueT encodeBranchOffset(IOffsetT Offset, IValueT Inst) {
    529   // Adjust offset to the way ARM CPUs read PC.
    530   Offset -= kPCReadOffset;
    531 
    532   assertCanEncodeBranchOffset(Offset);
    533 
    534   // Properly preserve only the bits supported in the instruction.
    535   Offset >>= 2;
    536   Offset &= kBranchOffsetMask;
    537   return (Inst & ~kBranchOffsetMask) | Offset;
    538 }
    539 
    540 IValueT encodeRegister(const Operand *OpReg, RegSetWanted WantedRegSet,
    541                        const char *RegName, const char *InstName) {
    542   IValueT Reg = 0;
    543   if (encodeOperand(OpReg, Reg, WantedRegSet) != EncodedAsRegister)
    544     llvm::report_fatal_error(std::string(InstName) + ": Can't find register " +
    545                              RegName);
    546   return Reg;
    547 }
    548 
    549 IValueT encodeGPRegister(const Operand *OpReg, const char *RegName,
    550                          const char *InstName) {
    551   return encodeRegister(OpReg, WantGPRegs, RegName, InstName);
    552 }
    553 
    554 IValueT encodeSRegister(const Operand *OpReg, const char *RegName,
    555                         const char *InstName) {
    556   return encodeRegister(OpReg, WantSRegs, RegName, InstName);
    557 }
    558 
    559 IValueT encodeDRegister(const Operand *OpReg, const char *RegName,
    560                         const char *InstName) {
    561   return encodeRegister(OpReg, WantDRegs, RegName, InstName);
    562 }
    563 
    564 IValueT encodeQRegister(const Operand *OpReg, const char *RegName,
    565                         const char *InstName) {
    566   return encodeRegister(OpReg, WantQRegs, RegName, InstName);
    567 }
    568 
    569 void verifyPOrNotW(IValueT Address, const char *InstName) {
    570   if (BuildDefs::minimal())
    571     return;
    572   if (!isBitSet(P, Address) && isBitSet(W, Address))
    573     llvm::report_fatal_error(std::string(InstName) +
    574                              ": P=0 when W=1 not allowed");
    575 }
    576 
    577 void verifyRegsNotEq(IValueT Reg1, const char *Reg1Name, IValueT Reg2,
    578                      const char *Reg2Name, const char *InstName) {
    579   if (BuildDefs::minimal())
    580     return;
    581   if (Reg1 == Reg2)
    582     llvm::report_fatal_error(std::string(InstName) + ": " + Reg1Name + "=" +
    583                              Reg2Name + " not allowed");
    584 }
    585 
    586 void verifyRegNotPc(IValueT Reg, const char *RegName, const char *InstName) {
    587   verifyRegsNotEq(Reg, RegName, RegARM32::Encoded_Reg_pc, "pc", InstName);
    588 }
    589 
    590 void verifyAddrRegNotPc(IValueT RegShift, IValueT Address, const char *RegName,
    591                         const char *InstName) {
    592   if (BuildDefs::minimal())
    593     return;
    594   if (getGPRReg(RegShift, Address) == RegARM32::Encoded_Reg_pc)
    595     llvm::report_fatal_error(std::string(InstName) + ": " + RegName +
    596                              "=pc not allowed");
    597 }
    598 
    599 void verifyRegNotPcWhenSetFlags(IValueT Reg, bool SetFlags,
    600                                 const char *InstName) {
    601   if (BuildDefs::minimal())
    602     return;
    603   if (SetFlags && (Reg == RegARM32::Encoded_Reg_pc))
    604     llvm::report_fatal_error(std::string(InstName) + ": " +
    605                              RegARM32::getRegName(RegARM32::Reg_pc) +
    606                              "=pc not allowed when CC=1");
    607 }
    608 
    609 enum SIMDShiftType { ST_Vshl, ST_Vshr };
    610 
    611 IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
    612                             const IValueT Imm) {
    613   assert(Imm > 0);
    614   const SizeT MaxShift = getScalarIntBitWidth(ElmtTy);
    615   assert(Imm < 2 * MaxShift);
    616   assert(ElmtTy == IceType_i8 || ElmtTy == IceType_i16 ||
    617          ElmtTy == IceType_i32);
    618   const IValueT VshlImm = Imm - MaxShift;
    619   const IValueT VshrImm = 2 * MaxShift - Imm;
    620   return ((Shift == ST_Vshl) ? VshlImm : VshrImm) & (2 * MaxShift - 1);
    621 }
    622 
    623 IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
    624                             const ConstantInteger32 *Imm6) {
    625   const IValueT Imm = Imm6->getValue();
    626   return encodeSIMDShiftImm6(Shift, ElmtTy, Imm);
    627 }
    628 } // end of anonymous namespace
    629 
    630 namespace Ice {
    631 namespace ARM32 {
    632 
    633 size_t MoveRelocatableFixup::emit(GlobalContext *Ctx,
    634                                   const Assembler &Asm) const {
    635   if (!BuildDefs::dump())
    636     return InstARM32::InstSize;
    637   Ostream &Str = Ctx->getStrEmit();
    638   IValueT Inst = Asm.load<IValueT>(position());
    639   const bool IsMovw = kind() == llvm::ELF::R_ARM_MOVW_ABS_NC ||
    640                       kind() == llvm::ELF::R_ARM_MOVW_PREL_NC;
    641   const auto Symbol = symbol().toString();
    642   const bool NeedsPCRelSuffix =
    643       (Asm.fixupIsPCRel(kind()) || Symbol == GlobalOffsetTable);
    644   Str << "\t"
    645          "mov" << (IsMovw ? "w" : "t") << "\t"
    646       << RegARM32::getRegName(RegNumT::fixme((Inst >> kRdShift) & 0xF))
    647       << ", #:" << (IsMovw ? "lower" : "upper") << "16:" << Symbol
    648       << (NeedsPCRelSuffix ? " - ." : "") << "\t@ .word "
    649       // TODO(jpp): This is broken, it also needs to add a magic constant.
    650       << llvm::format_hex_no_prefix(Inst, 8) << "\n";
    651   return InstARM32::InstSize;
    652 }
    653 
    654 IValueT AssemblerARM32::encodeElmtType(Type ElmtTy) {
    655   switch (ElmtTy) {
    656   case IceType_i8:
    657     return 0;
    658   case IceType_i16:
    659     return 1;
    660   case IceType_i32:
    661   case IceType_f32:
    662     return 2;
    663   case IceType_i64:
    664     return 3;
    665   default:
    666     llvm::report_fatal_error("SIMD op: Don't understand element type " +
    667                              typeStdString(ElmtTy));
    668   }
    669 }
    670 
    671 // This fixup points to an ARM32 instruction with the following format:
    672 void MoveRelocatableFixup::emitOffset(Assembler *Asm) const {
    673   // cccc00110T00iiiiddddiiiiiiiiiiii where cccc=Cond, dddd=Rd,
    674   // iiiiiiiiiiiiiiii = Imm16, and T=1 for movt.
    675 
    676   const IValueT Inst = Asm->load<IValueT>(position());
    677   constexpr IValueT Imm16Mask = 0x000F0FFF;
    678   const IValueT Imm16 = offset() & 0xffff;
    679   Asm->store(position(),
    680              (Inst & ~Imm16Mask) | ((Imm16 >> 12) << 16) | (Imm16 & 0xfff));
    681 }
    682 
    683 MoveRelocatableFixup *AssemblerARM32::createMoveFixup(bool IsMovW,
    684                                                       const Constant *Value) {
    685   MoveRelocatableFixup *F =
    686       new (allocate<MoveRelocatableFixup>()) MoveRelocatableFixup();
    687   F->set_kind(IsMovW ? (IsNonsfi ? llvm::ELF::R_ARM_MOVW_PREL_NC
    688                                  : llvm::ELF::R_ARM_MOVW_ABS_NC)
    689                      : (IsNonsfi ? llvm::ELF::R_ARM_MOVT_PREL
    690                                  : llvm::ELF::R_ARM_MOVT_ABS));
    691   F->set_value(Value);
    692   Buffer.installFixup(F);
    693   return F;
    694 }
    695 
    696 size_t BlRelocatableFixup::emit(GlobalContext *Ctx,
    697                                 const Assembler &Asm) const {
    698   if (!BuildDefs::dump())
    699     return InstARM32::InstSize;
    700   Ostream &Str = Ctx->getStrEmit();
    701   IValueT Inst = Asm.load<IValueT>(position());
    702   Str << "\t"
    703          "bl\t" << symbol() << "\t@ .word "
    704       << llvm::format_hex_no_prefix(Inst, 8) << "\n";
    705   return InstARM32::InstSize;
    706 }
    707 
    708 void BlRelocatableFixup::emitOffset(Assembler *Asm) const {
    709   // cccc101liiiiiiiiiiiiiiiiiiiiiiii where cccc=Cond, l=Link, and
    710   // iiiiiiiiiiiiiiiiiiiiiiii=
    711   // EncodedBranchOffset(cccc101l000000000000000000000000, Offset);
    712   const IValueT Inst = Asm->load<IValueT>(position());
    713   constexpr IValueT OffsetMask = 0x00FFFFFF;
    714   Asm->store(position(), encodeBranchOffset(offset(), Inst & ~OffsetMask));
    715 }
    716 
    717 void AssemblerARM32::padWithNop(intptr_t Padding) {
    718   constexpr intptr_t InstWidth = sizeof(IValueT);
    719   assert(Padding % InstWidth == 0 &&
    720          "Padding not multiple of instruction size");
    721   for (intptr_t i = 0; i < Padding; i += InstWidth)
    722     nop();
    723 }
    724 
    725 BlRelocatableFixup *
    726 AssemblerARM32::createBlFixup(const ConstantRelocatable *BlTarget) {
    727   BlRelocatableFixup *F =
    728       new (allocate<BlRelocatableFixup>()) BlRelocatableFixup();
    729   F->set_kind(llvm::ELF::R_ARM_CALL);
    730   F->set_value(BlTarget);
    731   Buffer.installFixup(F);
    732   return F;
    733 }
    734 
    735 void AssemblerARM32::bindCfgNodeLabel(const CfgNode *Node) {
    736   if (BuildDefs::dump() && !getFlags().getDisableHybridAssembly()) {
    737     // Generate label name so that branches can find it.
    738     constexpr SizeT InstSize = 0;
    739     emitTextInst(Node->getAsmName() + ":", InstSize);
    740   }
    741   SizeT NodeNumber = Node->getIndex();
    742   assert(!getPreliminary());
    743   Label *L = getOrCreateCfgNodeLabel(NodeNumber);
    744   this->bind(L);
    745 }
    746 
    747 Label *AssemblerARM32::getOrCreateLabel(SizeT Number, LabelVector &Labels) {
    748   Label *L = nullptr;
    749   if (Number == Labels.size()) {
    750     L = new (this->allocate<Label>()) Label();
    751     Labels.push_back(L);
    752     return L;
    753   }
    754   if (Number > Labels.size()) {
    755     Labels.resize(Number + 1);
    756   }
    757   L = Labels[Number];
    758   if (!L) {
    759     L = new (this->allocate<Label>()) Label();
    760     Labels[Number] = L;
    761   }
    762   return L;
    763 }
    764 
    765 // Pull out offset from branch Inst.
    766 IOffsetT AssemblerARM32::decodeBranchOffset(IValueT Inst) {
    767   // Sign-extend, left-shift by 2, and adjust to the way ARM CPUs read PC.
    768   const IOffsetT Offset = (Inst & kBranchOffsetMask) << 8;
    769   return (Offset >> 6) + kPCReadOffset;
    770 }
    771 
    772 void AssemblerARM32::bind(Label *L) {
    773   IOffsetT BoundPc = Buffer.size();
    774   assert(!L->isBound()); // Labels can only be bound once.
    775   while (L->isLinked()) {
    776     IOffsetT Position = L->getLinkPosition();
    777     IOffsetT Dest = BoundPc - Position;
    778     IValueT Inst = Buffer.load<IValueT>(Position);
    779     Buffer.store<IValueT>(Position, encodeBranchOffset(Dest, Inst));
    780     L->setPosition(decodeBranchOffset(Inst));
    781   }
    782   L->bindTo(BoundPc);
    783 }
    784 
    785 void AssemblerARM32::emitTextInst(const std::string &Text, SizeT InstSize) {
    786   AssemblerFixup *F = createTextFixup(Text, InstSize);
    787   emitFixup(F);
    788   for (SizeT I = 0; I < InstSize; ++I) {
    789     AssemblerBuffer::EnsureCapacity ensured(&Buffer);
    790     Buffer.emit<char>(0);
    791   }
    792 }
    793 
    794 void AssemblerARM32::emitType01(CondARM32::Cond Cond, IValueT InstType,
    795                                 IValueT Opcode, bool SetFlags, IValueT Rn,
    796                                 IValueT Rd, IValueT Imm12,
    797                                 EmitChecks RuleChecks, const char *InstName) {
    798   switch (RuleChecks) {
    799   case NoChecks:
    800     break;
    801   case RdIsPcAndSetFlags:
    802     verifyRegNotPcWhenSetFlags(Rd, SetFlags, InstName);
    803     break;
    804   }
    805   assert(Rd < RegARM32::getNumGPRegs());
    806   assert(CondARM32::isDefined(Cond));
    807   const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) |
    808                            (InstType << kTypeShift) | (Opcode << kOpcodeShift) |
    809                            (encodeBool(SetFlags) << kSShift) |
    810                            (Rn << kRnShift) | (Rd << kRdShift) | Imm12;
    811   emitInst(Encoding);
    812 }
    813 
    814 void AssemblerARM32::emitType01(CondARM32::Cond Cond, IValueT Opcode,
    815                                 const Operand *OpRd, const Operand *OpRn,
    816                                 const Operand *OpSrc1, bool SetFlags,
    817                                 EmitChecks RuleChecks, const char *InstName) {
    818   IValueT Rd = encodeGPRegister(OpRd, "Rd", InstName);
    819   IValueT Rn = encodeGPRegister(OpRn, "Rn", InstName);
    820   emitType01(Cond, Opcode, Rd, Rn, OpSrc1, SetFlags, RuleChecks, InstName);
    821 }
    822 
    823 void AssemblerARM32::emitType01(CondARM32::Cond Cond, IValueT Opcode,
    824                                 IValueT Rd, IValueT Rn, const Operand *OpSrc1,
    825                                 bool SetFlags, EmitChecks RuleChecks,
    826                                 const char *InstName) {
    827   IValueT Src1Value;
    828   // TODO(kschimpf) Other possible decodings of data operations.
    829   switch (encodeOperand(OpSrc1, Src1Value, WantGPRegs)) {
    830   default:
    831     llvm::report_fatal_error(std::string(InstName) +
    832                              ": Can't encode instruction");
    833     return;
    834   case EncodedAsRegister: {
    835     // XXX (register)
    836     //   xxx{s}<c> <Rd>, <Rn>, <Rm>{, <shiff>}
    837     //
    838     // cccc000xxxxsnnnnddddiiiiitt0mmmm where cccc=Cond, xxxx=Opcode, dddd=Rd,
    839     // nnnn=Rn, mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
    840     constexpr IValueT Imm5 = 0;
    841     Src1Value = encodeShiftRotateImm5(Src1Value, OperandARM32::kNoShift, Imm5);
    842     emitType01(Cond, kInstTypeDataRegister, Opcode, SetFlags, Rn, Rd, Src1Value,
    843                RuleChecks, InstName);
    844     return;
    845   }
    846   case EncodedAsShiftedRegister: {
    847     // Form is defined in case EncodedAsRegister. (i.e. XXX (register)).
    848     emitType01(Cond, kInstTypeDataRegister, Opcode, SetFlags, Rn, Rd, Src1Value,
    849                RuleChecks, InstName);
    850     return;
    851   }
    852   case EncodedAsConstI32: {
    853     // See if we can convert this to an XXX (immediate).
    854     IValueT RotateAmt;
    855     IValueT Imm8;
    856     if (!OperandARM32FlexImm::canHoldImm(Src1Value, &RotateAmt, &Imm8))
    857       llvm::report_fatal_error(std::string(InstName) +
    858                                ": Immediate rotated constant not valid");
    859     Src1Value = encodeRotatedImm8(RotateAmt, Imm8);
    860     // Intentionally fall to next case!
    861   }
    862   case EncodedAsRotatedImm8: {
    863     // XXX (Immediate)
    864     //   xxx{s}<c> <Rd>, <Rn>, #<RotatedImm8>
    865     //
    866     // cccc001xxxxsnnnnddddiiiiiiiiiiii where cccc=Cond, xxxx=Opcode, dddd=Rd,
    867     // nnnn=Rn, s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
    868     emitType01(Cond, kInstTypeDataImmediate, Opcode, SetFlags, Rn, Rd,
    869                Src1Value, RuleChecks, InstName);
    870     return;
    871   }
    872   case EncodedAsRegShiftReg: {
    873     // XXX (register-shifted reg)
    874     //   xxx{s}<c> <Rd>, <Rn>, <Rm>, <type> <Rs>
    875     //
    876     // cccc000xxxxfnnnnddddssss0tt1mmmm where cccc=Cond, xxxx=Opcode, dddd=Rd,
    877     // nnnn=Rn, ssss=Rs, f=SetFlags, tt is encoding of type, and
    878     // Src1Value=ssss01tt1mmmm.
    879     emitType01(Cond, kInstTypeDataRegShift, Opcode, SetFlags, Rn, Rd, Src1Value,
    880                RuleChecks, InstName);
    881     return;
    882   }
    883   }
    884 }
    885 
    886 void AssemblerARM32::emitType05(CondARM32::Cond Cond, IOffsetT Offset,
    887                                 bool Link) {
    888   // cccc101liiiiiiiiiiiiiiiiiiiiiiii where cccc=Cond, l=Link, and
    889   // iiiiiiiiiiiiiiiiiiiiiiii=
    890   // EncodedBranchOffset(cccc101l000000000000000000000000, Offset);
    891   assert(CondARM32::isDefined(Cond));
    892   IValueT Encoding = static_cast<int32_t>(Cond) << kConditionShift |
    893                      5 << kTypeShift | (Link ? 1 : 0) << kLinkShift;
    894   Encoding = encodeBranchOffset(Offset, Encoding);
    895   emitInst(Encoding);
    896 }
    897 
    898 void AssemblerARM32::emitBranch(Label *L, CondARM32::Cond Cond, bool Link) {
    899   // TODO(kschimpf): Handle far jumps.
    900   if (L->isBound()) {
    901     const int32_t Dest = L->getPosition() - Buffer.size();
    902     emitType05(Cond, Dest, Link);
    903     return;
    904   }
    905   const IOffsetT Position = Buffer.size();
    906   // Use the offset field of the branch instruction for linking the sites.
    907   emitType05(Cond, L->getEncodedPosition(), Link);
    908   L->linkTo(*this, Position);
    909 }
    910 
    911 void AssemblerARM32::emitCompareOp(CondARM32::Cond Cond, IValueT Opcode,
    912                                    const Operand *OpRn, const Operand *OpSrc1,
    913                                    const char *InstName) {
    914   // XXX (register)
    915   //   XXX<c> <Rn>, <Rm>{, <shift>}
    916   //
    917   // ccccyyyxxxx1nnnn0000iiiiitt0mmmm where cccc=Cond, nnnn=Rn, mmmm=Rm, iiiii
    918   // defines shift constant, tt=ShiftKind, yyy=kInstTypeDataRegister, and
    919   // xxxx=Opcode.
    920   //
    921   // XXX (immediate)
    922   //  XXX<c> <Rn>, #<RotatedImm8>
    923   //
    924   // ccccyyyxxxx1nnnn0000iiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
    925   // yyy=kInstTypeDataImmdiate, xxxx=Opcode, and iiiiiiiiiiii=Src1Value
    926   // defining RotatedImm8.
    927   constexpr bool SetFlags = true;
    928   constexpr IValueT Rd = RegARM32::Encoded_Reg_r0;
    929   IValueT Rn = encodeGPRegister(OpRn, "Rn", InstName);
    930   emitType01(Cond, Opcode, Rd, Rn, OpSrc1, SetFlags, NoChecks, InstName);
    931 }
    932 
    933 void AssemblerARM32::emitMemOp(CondARM32::Cond Cond, IValueT InstType,
    934                                bool IsLoad, bool IsByte, IValueT Rt,
    935                                IValueT Address) {
    936   assert(Rt < RegARM32::getNumGPRegs());
    937   assert(CondARM32::isDefined(Cond));
    938   const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) |
    939                            (InstType << kTypeShift) | (IsLoad ? L : 0) |
    940                            (IsByte ? B : 0) | (Rt << kRdShift) | Address;
    941   emitInst(Encoding);
    942 }
    943 
    944 void AssemblerARM32::emitMemOp(CondARM32::Cond Cond, bool IsLoad, bool IsByte,
    945                                IValueT Rt, const Operand *OpAddress,
    946                                const TargetInfo &TInfo, const char *InstName) {
    947   IValueT Address;
    948   switch (encodeAddress(OpAddress, Address, TInfo, Imm12Address)) {
    949   default:
    950     llvm::report_fatal_error(std::string(InstName) +
    951                              ": Memory address not understood");
    952   case EncodedAsImmRegOffset: {
    953     // XXX{B} (immediate):
    954     //   xxx{b}<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
    955     //   xxx{b}<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
    956     //   xxx{b}<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
    957     //
    958     // cccc010pubwlnnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
    959     // iiiiiiiiiiii=imm12, b=IsByte, pu0w<<21 is a BlockAddr, l=IsLoad, and
    960     // pu0w0nnnn0000iiiiiiiiiiii=Address.
    961     RegARM32::GPRRegister Rn = getGPRReg(kRnShift, Address);
    962 
    963     // Check if conditions of rules violated.
    964     verifyRegNotPc(Rn, "Rn", InstName);
    965     verifyPOrNotW(Address, InstName);
    966     if (!IsByte && (Rn == RegARM32::Encoded_Reg_sp) && !isBitSet(P, Address) &&
    967         isBitSet(U, Address) && !isBitSet(W, Address) &&
    968         (mask(Address, kImm12Shift, kImmed12Bits) == 0x8 /* 000000000100 */))
    969       llvm::report_fatal_error(std::string(InstName) +
    970                                ": Use push/pop instead");
    971 
    972     emitMemOp(Cond, kInstTypeMemImmediate, IsLoad, IsByte, Rt, Address);
    973     return;
    974   }
    975   case EncodedAsShiftRotateImm5: {
    976     // XXX{B} (register)
    977     //   xxx{b}<c> <Rt>, [<Rn>, +/-<Rm>{, <shift>}]{!}
    978     //   xxx{b}<c> <Rt>, [<Rn>], +/-<Rm>{, <shift>}
    979     //
    980     // cccc011pubwlnnnnttttiiiiiss0mmmm where cccc=Cond, tttt=Rt,
    981     // b=IsByte, U=1 if +, pu0b is a BlockAddr, l=IsLoad, and
    982     // pu0w0nnnn0000iiiiiss0mmmm=Address.
    983     RegARM32::GPRRegister Rn = getGPRReg(kRnShift, Address);
    984     RegARM32::GPRRegister Rm = getGPRReg(kRmShift, Address);
    985 
    986     // Check if conditions of rules violated.
    987     verifyPOrNotW(Address, InstName);
    988     verifyRegNotPc(Rm, "Rm", InstName);
    989     if (IsByte)
    990       verifyRegNotPc(Rt, "Rt", InstName);
    991     if (isBitSet(W, Address)) {
    992       verifyRegNotPc(Rn, "Rn", InstName);
    993       verifyRegsNotEq(Rn, "Rn", Rt, "Rt", InstName);
    994     }
    995     emitMemOp(Cond, kInstTypeRegisterShift, IsLoad, IsByte, Rt, Address);
    996     return;
    997   }
    998   }
    999 }
   1000 
   1001 void AssemblerARM32::emitMemOpEnc3(CondARM32::Cond Cond, IValueT Opcode,
   1002                                    IValueT Rt, const Operand *OpAddress,
   1003                                    const TargetInfo &TInfo,
   1004                                    const char *InstName) {
   1005   IValueT Address;
   1006   switch (encodeAddress(OpAddress, Address, TInfo, RotatedImm8Enc3Address)) {
   1007   default:
   1008     llvm::report_fatal_error(std::string(InstName) +
   1009                              ": Memory address not understood");
   1010   case EncodedAsImmRegOffset: {
   1011     // XXXH (immediate)
   1012     //   xxxh<c> <Rt>, [<Rn>{, #+-<Imm8>}]
   1013     //   xxxh<c> <Rt>, [<Rn>, #+/-<Imm8>]
   1014     //   xxxh<c> <Rt>, [<Rn>, #+/-<Imm8>]!
   1015     //
   1016     // cccc000pu0wxnnnnttttiiiiyyyyjjjj where cccc=Cond, nnnn=Rn, tttt=Rt,
   1017     // iiiijjjj=Imm8, pu0w<<21 is a BlockAddr, x000000000000yyyy0000=Opcode,
   1018     // and pu0w0nnnn0000iiii0000jjjj=Address.
   1019     assert(Rt < RegARM32::getNumGPRegs());
   1020     assert(CondARM32::isDefined(Cond));
   1021     verifyPOrNotW(Address, InstName);
   1022     verifyRegNotPc(Rt, "Rt", InstName);
   1023     if (isBitSet(W, Address))
   1024       verifyRegsNotEq(getGPRReg(kRnShift, Address), "Rn", Rt, "Rt", InstName);
   1025     const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) |
   1026                              Opcode | (Rt << kRdShift) | Address;
   1027     emitInst(Encoding);
   1028     return;
   1029   }
   1030   case EncodedAsShiftRotateImm5: {
   1031     // XXXH (register)
   1032     //   xxxh<c> <Rt>, [<Rn>, +/-<Rm>]{!}
   1033     //   xxxh<c> <Rt>, [<Rn>], +/-<Rm>
   1034     //
   1035     // cccc000pu0wxnnnntttt00001011mmmm where cccc=Cond, tttt=Rt, nnnn=Rn,
   1036     // mmmm=Rm, pu0w<<21 is a BlockAddr, x000000000000yyyy0000=Opcode, and
   1037     // pu0w0nnnn000000000000mmmm=Address.
   1038     assert(Rt < RegARM32::getNumGPRegs());
   1039     assert(CondARM32::isDefined(Cond));
   1040     verifyPOrNotW(Address, InstName);
   1041     verifyRegNotPc(Rt, "Rt", InstName);
   1042     verifyAddrRegNotPc(kRmShift, Address, "Rm", InstName);
   1043     const RegARM32::GPRRegister Rn = getGPRReg(kRnShift, Address);
   1044     if (isBitSet(W, Address)) {
   1045       verifyRegNotPc(Rn, "Rn", InstName);
   1046       verifyRegsNotEq(Rn, "Rn", Rt, "Rt", InstName);
   1047     }
   1048     if (mask(Address, kShiftImmShift, 5) != 0)
   1049       // For encoding 3, no shift is allowed.
   1050       llvm::report_fatal_error(std::string(InstName) +
   1051                                ": Shift constant not allowed");
   1052     const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) |
   1053                              Opcode | (Rt << kRdShift) | Address;
   1054     emitInst(Encoding);
   1055     return;
   1056   }
   1057   }
   1058 }
   1059 
   1060 void AssemblerARM32::emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd,
   1061                                IValueT Rn, IValueT Rm) {
   1062   assert(Rd < RegARM32::getNumGPRegs());
   1063   assert(Rn < RegARM32::getNumGPRegs());
   1064   assert(Rm < RegARM32::getNumGPRegs());
   1065   assert(CondARM32::isDefined(Cond));
   1066   const IValueT Encoding = Opcode | (encodeCondition(Cond) << kConditionShift) |
   1067                            (Rn << kDivRnShift) | (Rd << kDivRdShift) | B26 |
   1068                            B25 | B24 | B20 | B15 | B14 | B13 | B12 | B4 |
   1069                            (Rm << kDivRmShift);
   1070   emitInst(Encoding);
   1071 }
   1072 
   1073 void AssemblerARM32::emitInsertExtractInt(CondARM32::Cond Cond,
   1074                                           const Operand *OpQn, uint32_t Index,
   1075                                           const Operand *OpRt, bool IsExtract,
   1076                                           const char *InstName) {
   1077   const IValueT Rt = encodeGPRegister(OpRt, "Rt", InstName);
   1078   IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", InstName));
   1079   assert(Rt != RegARM32::Encoded_Reg_pc);
   1080   assert(Rt != RegARM32::Encoded_Reg_sp);
   1081   assert(CondARM32::isDefined(Cond));
   1082   const uint32_t BitSize = typeWidthInBytes(OpRt->getType()) * CHAR_BIT;
   1083   IValueT Opcode1 = 0;
   1084   IValueT Opcode2 = 0;
   1085   switch (BitSize) {
   1086   default:
   1087     llvm::report_fatal_error(std::string(InstName) +
   1088                              ": Unable to process type " +
   1089                              typeStdString(OpRt->getType()));
   1090   case 8:
   1091     assert(Index < 16);
   1092     Dn = Dn | mask(Index, 3, 1);
   1093     Opcode1 = B1 | mask(Index, 2, 1);
   1094     Opcode2 = mask(Index, 0, 2);
   1095     break;
   1096   case 16:
   1097     assert(Index < 8);
   1098     Dn = Dn | mask(Index, 2, 1);
   1099     Opcode1 = mask(Index, 1, 1);
   1100     Opcode2 = (mask(Index, 0, 1) << 1) | B0;
   1101     break;
   1102   case 32:
   1103     assert(Index < 4);
   1104     Dn = Dn | mask(Index, 1, 1);
   1105     Opcode1 = mask(Index, 0, 1);
   1106     break;
   1107   }
   1108   const IValueT Encoding = B27 | B26 | B25 | B11 | B9 | B8 | B4 |
   1109                            (encodeCondition(Cond) << kConditionShift) |
   1110                            (Opcode1 << 21) |
   1111                            (getXXXXInRegYXXXX(Dn) << kRnShift) | (Rt << 12) |
   1112                            (encodeBool(IsExtract) << 20) |
   1113                            (getYInRegYXXXX(Dn) << 7) | (Opcode2 << 5);
   1114   emitInst(Encoding);
   1115 }
   1116 
   1117 void AssemblerARM32::emitMoveSS(CondARM32::Cond Cond, IValueT Sd, IValueT Sm) {
   1118   // VMOV (register) - ARM section A8.8.340, encoding A2:
   1119   //   vmov<c>.f32 <Sd>, <Sm>
   1120   //
   1121   // cccc11101D110000dddd101001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   1122   constexpr IValueT VmovssOpcode = B23 | B21 | B20 | B6;
   1123   constexpr IValueT S0 = 0;
   1124   emitVFPsss(Cond, VmovssOpcode, Sd, S0, Sm);
   1125 }
   1126 
   1127 void AssemblerARM32::emitMulOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd,
   1128                                IValueT Rn, IValueT Rm, IValueT Rs,
   1129                                bool SetFlags) {
   1130   assert(Rd < RegARM32::getNumGPRegs());
   1131   assert(Rn < RegARM32::getNumGPRegs());
   1132   assert(Rm < RegARM32::getNumGPRegs());
   1133   assert(Rs < RegARM32::getNumGPRegs());
   1134   assert(CondARM32::isDefined(Cond));
   1135   IValueT Encoding = Opcode | (encodeCondition(Cond) << kConditionShift) |
   1136                      (encodeBool(SetFlags) << kSShift) | (Rn << kRnShift) |
   1137                      (Rd << kRdShift) | (Rs << kRsShift) | B7 | B4 |
   1138                      (Rm << kRmShift);
   1139   emitInst(Encoding);
   1140 }
   1141 
   1142 void AssemblerARM32::emitMultiMemOp(CondARM32::Cond Cond,
   1143                                     BlockAddressMode AddressMode, bool IsLoad,
   1144                                     IValueT BaseReg, IValueT Registers) {
   1145   assert(CondARM32::isDefined(Cond));
   1146   assert(BaseReg < RegARM32::getNumGPRegs());
   1147   assert(Registers < (1 << RegARM32::getNumGPRegs()));
   1148   IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | B27 |
   1149                      AddressMode | (IsLoad ? L : 0) | (BaseReg << kRnShift) |
   1150                      Registers;
   1151   emitInst(Encoding);
   1152 }
   1153 
   1154 void AssemblerARM32::emitSignExtend(CondARM32::Cond Cond, IValueT Opcode,
   1155                                     const Operand *OpRd, const Operand *OpSrc0,
   1156                                     const char *InstName) {
   1157   IValueT Rd = encodeGPRegister(OpRd, "Rd", InstName);
   1158   IValueT Rm = encodeGPRegister(OpSrc0, "Rm", InstName);
   1159   // Note: For the moment, we assume no rotation is specified.
   1160   RotationValue Rotation = kRotateNone;
   1161   constexpr IValueT Rn = RegARM32::Encoded_Reg_pc;
   1162   const Type Ty = OpSrc0->getType();
   1163   switch (Ty) {
   1164   default:
   1165     llvm::report_fatal_error(std::string(InstName) + ": Type " +
   1166                              typeString(Ty) + " not allowed");
   1167     break;
   1168   case IceType_i1:
   1169   case IceType_i8: {
   1170     // SXTB/UXTB - Arm sections A8.8.233 and A8.8.274, encoding A1:
   1171     //   sxtb<c> <Rd>, <Rm>{, <rotate>}
   1172     //   uxtb<c> <Rd>, <Rm>{, <rotate>}
   1173     //
   1174     // ccccxxxxxxxx1111ddddrr000111mmmm where cccc=Cond, xxxxxxxx<<20=Opcode,
   1175     // dddd=Rd, mmmm=Rm, and rr defined (RotationValue) rotate.
   1176     break;
   1177   }
   1178   case IceType_i16: {
   1179     // SXTH/UXTH - ARM sections A8.8.235 and A8.8.276, encoding A1:
   1180     //   uxth<c> <Rd>< <Rm>{, <rotate>}
   1181     //
   1182     // cccc01101111nnnnddddrr000111mmmm where cccc=Cond, dddd=Rd, mmmm=Rm, and
   1183     // rr defined (RotationValue) rotate.
   1184     Opcode |= B20;
   1185     break;
   1186   }
   1187   }
   1188 
   1189   assert(CondARM32::isDefined(Cond));
   1190   IValueT Rot = encodeRotation(Rotation);
   1191   if (!Utils::IsUint(2, Rot))
   1192     llvm::report_fatal_error(std::string(InstName) +
   1193                              ": Illegal rotation value");
   1194   IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | Opcode |
   1195                      (Rn << kRnShift) | (Rd << kRdShift) |
   1196                      (Rot << kRotationShift) | B6 | B5 | B4 | (Rm << kRmShift);
   1197   emitInst(Encoding);
   1198 }
   1199 
   1200 void AssemblerARM32::emitSIMDBase(IValueT Opcode, IValueT Dd, IValueT Dn,
   1201                                   IValueT Dm, bool UseQRegs, bool IsFloatTy) {
   1202   const IValueT Encoding =
   1203       Opcode | B25 | (encodeCondition(CondARM32::kNone) << kConditionShift) |
   1204       (getYInRegYXXXX(Dd) << 22) | (getXXXXInRegYXXXX(Dn) << 16) |
   1205       (getXXXXInRegYXXXX(Dd) << 12) | (IsFloatTy ? B10 : 0) |
   1206       (getYInRegYXXXX(Dn) << 7) | (encodeBool(UseQRegs) << 6) |
   1207       (getYInRegYXXXX(Dm) << 5) | getXXXXInRegYXXXX(Dm);
   1208   emitInst(Encoding);
   1209 }
   1210 
   1211 void AssemblerARM32::emitSIMD(IValueT Opcode, Type ElmtTy, IValueT Dd,
   1212                               IValueT Dn, IValueT Dm, bool UseQRegs) {
   1213   constexpr IValueT ElmtShift = 20;
   1214   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   1215   assert(Utils::IsUint(2, ElmtSize));
   1216   emitSIMDBase(Opcode | (ElmtSize << ElmtShift), Dd, Dn, Dm, UseQRegs,
   1217                isFloatingType(ElmtTy));
   1218 }
   1219 
   1220 void AssemblerARM32::emitSIMDqqqBase(IValueT Opcode, const Operand *OpQd,
   1221                                      const Operand *OpQn, const Operand *OpQm,
   1222                                      bool IsFloatTy, const char *OpcodeName) {
   1223   const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
   1224   const IValueT Qn = encodeQRegister(OpQn, "Qn", OpcodeName);
   1225   const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
   1226   constexpr bool UseQRegs = true;
   1227   emitSIMDBase(Opcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn), mapQRegToDReg(Qm),
   1228                UseQRegs, IsFloatTy);
   1229 }
   1230 
   1231 void AssemblerARM32::emitSIMDqqq(IValueT Opcode, Type ElmtTy,
   1232                                  const Operand *OpQd, const Operand *OpQn,
   1233                                  const Operand *OpQm, const char *OpcodeName) {
   1234   constexpr IValueT ElmtShift = 20;
   1235   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   1236   assert(Utils::IsUint(2, ElmtSize));
   1237   emitSIMDqqqBase(Opcode | (ElmtSize << ElmtShift), OpQd, OpQn, OpQm,
   1238                   isFloatingType(ElmtTy), OpcodeName);
   1239 }
   1240 
   1241 void AssemblerARM32::emitSIMDShiftqqc(IValueT Opcode, const Operand *OpQd,
   1242                                       const Operand *OpQm, const IValueT Imm6,
   1243                                       const char *OpcodeName) {
   1244   const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
   1245   const IValueT Qn = 0;
   1246   const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
   1247   constexpr bool UseQRegs = true;
   1248   constexpr bool IsFloatTy = false;
   1249   constexpr IValueT ElmtShift = 16;
   1250   emitSIMDBase(Opcode | (Imm6 << ElmtShift), mapQRegToDReg(Qd),
   1251                mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
   1252 }
   1253 
   1254 void AssemblerARM32::emitSIMDCvtqq(IValueT Opcode, const Operand *OpQd,
   1255                                    const Operand *OpQm,
   1256                                    const char *OpcodeName) {
   1257   const IValueT SIMDOpcode =
   1258       B24 | B23 | B21 | B20 | B19 | B17 | B16 | B10 | B9 | Opcode;
   1259   constexpr bool UseQRegs = true;
   1260   constexpr bool IsFloatTy = false;
   1261   const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
   1262   constexpr IValueT Qn = 0;
   1263   const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
   1264   emitSIMDBase(SIMDOpcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
   1265                mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
   1266 }
   1267 
   1268 void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
   1269                                 IValueT Dd, IValueT Dn, IValueT Dm) {
   1270   assert(Dd < RegARM32::getNumDRegs());
   1271   assert(Dn < RegARM32::getNumDRegs());
   1272   assert(Dm < RegARM32::getNumDRegs());
   1273   assert(CondARM32::isDefined(Cond));
   1274   constexpr IValueT VFPOpcode = B27 | B26 | B25 | B11 | B9 | B8;
   1275   const IValueT Encoding =
   1276       Opcode | VFPOpcode | (encodeCondition(Cond) << kConditionShift) |
   1277       (getYInRegYXXXX(Dd) << 22) | (getXXXXInRegYXXXX(Dn) << 16) |
   1278       (getXXXXInRegYXXXX(Dd) << 12) | (getYInRegYXXXX(Dn) << 7) |
   1279       (getYInRegYXXXX(Dm) << 5) | getXXXXInRegYXXXX(Dm);
   1280   emitInst(Encoding);
   1281 }
   1282 
   1283 void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
   1284                                 const Operand *OpDd, const Operand *OpDn,
   1285                                 const Operand *OpDm, const char *InstName) {
   1286   IValueT Dd = encodeDRegister(OpDd, "Dd", InstName);
   1287   IValueT Dn = encodeDRegister(OpDn, "Dn", InstName);
   1288   IValueT Dm = encodeDRegister(OpDm, "Dm", InstName);
   1289   emitVFPddd(Cond, Opcode, Dd, Dn, Dm);
   1290 }
   1291 
   1292 void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
   1293                                 IValueT Sd, IValueT Sn, IValueT Sm) {
   1294   assert(Sd < RegARM32::getNumSRegs());
   1295   assert(Sn < RegARM32::getNumSRegs());
   1296   assert(Sm < RegARM32::getNumSRegs());
   1297   assert(CondARM32::isDefined(Cond));
   1298   constexpr IValueT VFPOpcode = B27 | B26 | B25 | B11 | B9;
   1299   const IValueT Encoding =
   1300       Opcode | VFPOpcode | (encodeCondition(Cond) << kConditionShift) |
   1301       (getYInRegXXXXY(Sd) << 22) | (getXXXXInRegXXXXY(Sn) << 16) |
   1302       (getXXXXInRegXXXXY(Sd) << 12) | (getYInRegXXXXY(Sn) << 7) |
   1303       (getYInRegXXXXY(Sm) << 5) | getXXXXInRegXXXXY(Sm);
   1304   emitInst(Encoding);
   1305 }
   1306 
   1307 void AssemblerARM32::emitVFPsss(CondARM32::Cond Cond, IValueT Opcode,
   1308                                 const Operand *OpSd, const Operand *OpSn,
   1309                                 const Operand *OpSm, const char *InstName) {
   1310   const IValueT Sd = encodeSRegister(OpSd, "Sd", InstName);
   1311   const IValueT Sn = encodeSRegister(OpSn, "Sn", InstName);
   1312   const IValueT Sm = encodeSRegister(OpSm, "Sm", InstName);
   1313   emitVFPsss(Cond, Opcode, Sd, Sn, Sm);
   1314 }
   1315 
   1316 void AssemblerARM32::adc(const Operand *OpRd, const Operand *OpRn,
   1317                          const Operand *OpSrc1, bool SetFlags,
   1318                          CondARM32::Cond Cond) {
   1319   // ADC (register) - ARM section 18.8.2, encoding A1:
   1320   //   adc{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   1321   //
   1322   // cccc0000101snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1323   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1324   //
   1325   // ADC (Immediate) - ARM section A8.8.1, encoding A1:
   1326   //   adc{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   1327   //
   1328   // cccc0010101snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1329   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1330   constexpr const char *AdcName = "adc";
   1331   constexpr IValueT AdcOpcode = B2 | B0; // 0101
   1332   emitType01(Cond, AdcOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1333              AdcName);
   1334 }
   1335 
   1336 void AssemblerARM32::add(const Operand *OpRd, const Operand *OpRn,
   1337                          const Operand *OpSrc1, bool SetFlags,
   1338                          CondARM32::Cond Cond) {
   1339   // ADD (register) - ARM section A8.8.7, encoding A1:
   1340   //   add{s}<c> <Rd>, <Rn>, <Rm>{, <shiff>}
   1341   // ADD (Sp plus register) - ARM section A8.8.11, encoding A1:
   1342   //   add{s}<c> sp, <Rn>, <Rm>{, <shiff>}
   1343   //
   1344   // cccc0000100snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1345   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1346   //
   1347   // ADD (Immediate) - ARM section A8.8.5, encoding A1:
   1348   //   add{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   1349   // ADD (SP plus immediate) - ARM section A8.8.9, encoding A1.
   1350   //   add{s}<c> <Rd>, sp, #<RotatedImm8>
   1351   //
   1352   // cccc0010100snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1353   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1354   constexpr const char *AddName = "add";
   1355   constexpr IValueT Add = B2; // 0100
   1356   emitType01(Cond, Add, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1357              AddName);
   1358 }
   1359 
   1360 void AssemblerARM32::and_(const Operand *OpRd, const Operand *OpRn,
   1361                           const Operand *OpSrc1, bool SetFlags,
   1362                           CondARM32::Cond Cond) {
   1363   // AND (register) - ARM section A8.8.14, encoding A1:
   1364   //   and{s}<c> <Rd>, <Rn>{, <shift>}
   1365   //
   1366   // cccc0000000snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1367   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1368   //
   1369   // AND (Immediate) - ARM section A8.8.13, encoding A1:
   1370   //   and{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   1371   //
   1372   // cccc0010100snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1373   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1374   constexpr const char *AndName = "and";
   1375   constexpr IValueT And = 0; // 0000
   1376   emitType01(Cond, And, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1377              AndName);
   1378 }
   1379 
   1380 void AssemblerARM32::b(Label *L, CondARM32::Cond Cond) {
   1381   emitBranch(L, Cond, false);
   1382 }
   1383 
   1384 void AssemblerARM32::bkpt(uint16_t Imm16) {
   1385   // BKPT - ARM section A*.8.24 - encoding A1:
   1386   //   bkpt #<Imm16>
   1387   //
   1388   // cccc00010010iiiiiiiiiiii0111iiii where cccc=AL and iiiiiiiiiiiiiiii=Imm16
   1389   const IValueT Encoding = (CondARM32::AL << kConditionShift) | B24 | B21 |
   1390                            ((Imm16 >> 4) << 8) | B6 | B5 | B4 | (Imm16 & 0xf);
   1391   emitInst(Encoding);
   1392 }
   1393 
   1394 void AssemblerARM32::bic(const Operand *OpRd, const Operand *OpRn,
   1395                          const Operand *OpSrc1, bool SetFlags,
   1396                          CondARM32::Cond Cond) {
   1397   // BIC (register) - ARM section A8.8.22, encoding A1:
   1398   //   bic{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   1399   //
   1400   // cccc0001110snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1401   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1402   //
   1403   // BIC (immediate) - ARM section A8.8.21, encoding A1:
   1404   //   bic{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   1405   //
   1406   // cccc0011110snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rn, nnnn=Rn,
   1407   // s=SetFlags, and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1408   constexpr const char *BicName = "bic";
   1409   constexpr IValueT BicOpcode = B3 | B2 | B1; // i.e. 1110
   1410   emitType01(Cond, BicOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1411              BicName);
   1412 }
   1413 
   1414 void AssemblerARM32::bl(const ConstantRelocatable *Target) {
   1415   // BL (immediate) - ARM section A8.8.25, encoding A1:
   1416   //   bl<c> <label>
   1417   //
   1418   // cccc1011iiiiiiiiiiiiiiiiiiiiiiii where cccc=Cond (not currently allowed)
   1419   // and iiiiiiiiiiiiiiiiiiiiiiii is the (encoded) Target to branch to.
   1420   emitFixup(createBlFixup(Target));
   1421   constexpr CondARM32::Cond Cond = CondARM32::AL;
   1422   constexpr IValueT Immed = 0;
   1423   constexpr bool Link = true;
   1424   emitType05(Cond, Immed, Link);
   1425 }
   1426 
   1427 void AssemblerARM32::blx(const Operand *Target) {
   1428   // BLX (register) - ARM section A8.8.26, encoding A1:
   1429   //   blx<c> <Rm>
   1430   //
   1431   // cccc000100101111111111110011mmmm where cccc=Cond (not currently allowed)
   1432   // and mmmm=Rm.
   1433   constexpr const char *BlxName = "Blx";
   1434   IValueT Rm = encodeGPRegister(Target, "Rm", BlxName);
   1435   verifyRegNotPc(Rm, "Rm", BlxName);
   1436   constexpr CondARM32::Cond Cond = CondARM32::AL;
   1437   int32_t Encoding = (encodeCondition(Cond) << kConditionShift) | B24 | B21 |
   1438                      (0xfff << 8) | B5 | B4 | (Rm << kRmShift);
   1439   emitInst(Encoding);
   1440 }
   1441 
   1442 void AssemblerARM32::bx(RegARM32::GPRRegister Rm, CondARM32::Cond Cond) {
   1443   // BX - ARM section A8.8.27, encoding A1:
   1444   //   bx<c> <Rm>
   1445   //
   1446   // cccc000100101111111111110001mmmm where mmmm=rm and cccc=Cond.
   1447   assert(CondARM32::isDefined(Cond));
   1448   const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | B24 |
   1449                            B21 | (0xfff << 8) | B4 |
   1450                            (encodeGPRRegister(Rm) << kRmShift);
   1451   emitInst(Encoding);
   1452 }
   1453 
   1454 void AssemblerARM32::clz(const Operand *OpRd, const Operand *OpSrc,
   1455                          CondARM32::Cond Cond) {
   1456   // CLZ - ARM section A8.8.33, encoding A1:
   1457   //   clz<c> <Rd> <Rm>
   1458   //
   1459   // cccc000101101111dddd11110001mmmm where cccc=Cond, dddd=Rd, and mmmm=Rm.
   1460   constexpr const char *ClzName = "clz";
   1461   constexpr const char *RdName = "Rd";
   1462   constexpr const char *RmName = "Rm";
   1463   IValueT Rd = encodeGPRegister(OpRd, RdName, ClzName);
   1464   assert(Rd < RegARM32::getNumGPRegs());
   1465   verifyRegNotPc(Rd, RdName, ClzName);
   1466   IValueT Rm = encodeGPRegister(OpSrc, RmName, ClzName);
   1467   assert(Rm < RegARM32::getNumGPRegs());
   1468   verifyRegNotPc(Rm, RmName, ClzName);
   1469   assert(CondARM32::isDefined(Cond));
   1470   constexpr IValueT PredefinedBits =
   1471       B24 | B22 | B21 | (0xF << 16) | (0xf << 8) | B4;
   1472   const IValueT Encoding = PredefinedBits | (Cond << kConditionShift) |
   1473                            (Rd << kRdShift) | (Rm << kRmShift);
   1474   emitInst(Encoding);
   1475 }
   1476 
   1477 void AssemblerARM32::cmn(const Operand *OpRn, const Operand *OpSrc1,
   1478                          CondARM32::Cond Cond) {
   1479   // CMN (immediate) - ARM section A8.8.34, encoding A1:
   1480   //   cmn<c> <Rn>, #<RotatedImm8>
   1481   //
   1482   // cccc00110111nnnn0000iiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1483   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1484   //
   1485   // CMN (register) - ARM section A8.8.35, encodeing A1:
   1486   //   cmn<c> <Rn>, <Rm>{, <shift>}
   1487   //
   1488   // cccc00010111nnnn0000iiiiitt0mmmm where cccc=Cond, nnnn=Rn, mmmm=Rm,
   1489   // iiiii=Shift, and tt=ShiftKind.
   1490   constexpr const char *CmnName = "cmn";
   1491   constexpr IValueT CmnOpcode = B3 | B1 | B0; // ie. 1011
   1492   emitCompareOp(Cond, CmnOpcode, OpRn, OpSrc1, CmnName);
   1493 }
   1494 
   1495 void AssemblerARM32::cmp(const Operand *OpRn, const Operand *OpSrc1,
   1496                          CondARM32::Cond Cond) {
   1497   // CMP (register) - ARM section A8.8.38, encoding A1:
   1498   //   cmp<c> <Rn>, <Rm>{, <shift>}
   1499   //
   1500   // cccc00010101nnnn0000iiiiitt0mmmm where cccc=Cond, nnnn=Rn, mmmm=Rm,
   1501   // iiiii=Shift, and tt=ShiftKind.
   1502   //
   1503   // CMP (immediate) - ARM section A8.8.37
   1504   //  cmp<c: <Rn>, #<RotatedImm8>
   1505   //
   1506   // cccc00110101nnnn0000iiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1507   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1508   constexpr const char *CmpName = "cmp";
   1509   constexpr IValueT CmpOpcode = B3 | B1; // ie. 1010
   1510   emitCompareOp(Cond, CmpOpcode, OpRn, OpSrc1, CmpName);
   1511 }
   1512 
   1513 void AssemblerARM32::dmb(IValueT Option) {
   1514   // DMB - ARM section A8.8.43, encoding A1:
   1515   //   dmb <option>
   1516   //
   1517   // 1111010101111111111100000101xxxx where xxxx=Option.
   1518   assert(Utils::IsUint(4, Option) && "Bad dmb option");
   1519   const IValueT Encoding =
   1520       (encodeCondition(CondARM32::kNone) << kConditionShift) | B26 | B24 | B22 |
   1521       B21 | B20 | B19 | B18 | B17 | B16 | B15 | B14 | B13 | B12 | B6 | B4 |
   1522       Option;
   1523   emitInst(Encoding);
   1524 }
   1525 
   1526 void AssemblerARM32::eor(const Operand *OpRd, const Operand *OpRn,
   1527                          const Operand *OpSrc1, bool SetFlags,
   1528                          CondARM32::Cond Cond) {
   1529   // EOR (register) - ARM section A*.8.47, encoding A1:
   1530   //   eor{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   1531   //
   1532   // cccc0000001snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1533   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1534   //
   1535   // EOR (Immediate) - ARM section A8.*.46, encoding A1:
   1536   //   eor{s}<c> <Rd>, <Rn>, #RotatedImm8
   1537   //
   1538   // cccc0010001snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1539   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1540   constexpr const char *EorName = "eor";
   1541   constexpr IValueT EorOpcode = B0; // 0001
   1542   emitType01(Cond, EorOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1543              EorName);
   1544 }
   1545 
   1546 void AssemblerARM32::ldr(const Operand *OpRt, const Operand *OpAddress,
   1547                          CondARM32::Cond Cond, const TargetInfo &TInfo) {
   1548   constexpr const char *LdrName = "ldr";
   1549   constexpr bool IsLoad = true;
   1550   IValueT Rt = encodeGPRegister(OpRt, "Rt", LdrName);
   1551   const Type Ty = OpRt->getType();
   1552   switch (Ty) {
   1553   case IceType_i64:
   1554     // LDRD is not implemented because target lowering handles i64 and double by
   1555     // using two (32-bit) load instructions. Note: Intentionally drop to default
   1556     // case.
   1557     llvm::report_fatal_error(std::string("ldr : Type ") + typeString(Ty) +
   1558                              " not implemented");
   1559   default:
   1560     llvm::report_fatal_error(std::string("ldr : Type ") + typeString(Ty) +
   1561                              " not allowed");
   1562   case IceType_i1:
   1563   case IceType_i8: {
   1564     // LDRB (immediate) - ARM section A8.8.68, encoding A1:
   1565     //   ldrb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
   1566     //   ldrb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
   1567     //   ldrb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
   1568     //
   1569     // cccc010pu1w1nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1570     // iiiiiiiiiiii=imm12, u=1 if +, pu0w is a BlockAddr, and
   1571     // pu0w0nnnn0000iiiiiiiiiiii=Address.
   1572     //
   1573     // LDRB (register) - ARM section A8.8.66, encoding A1:
   1574     //   ldrb<c> <Rt>, [<Rn>, +/-<Rm>{, <shift>}]{!}
   1575     //   ldrb<c> <Rt>, [<Rn>], +/-<Rm>{, <shift>}
   1576     //
   1577     // cccc011pu1w1nnnnttttiiiiiss0mmmm where cccc=Cond, tttt=Rt, U=1 if +, pu0b
   1578     // is a BlockAddr, and pu0w0nnnn0000iiiiiss0mmmm=Address.
   1579     constexpr bool IsByte = true;
   1580     emitMemOp(Cond, IsLoad, IsByte, Rt, OpAddress, TInfo, LdrName);
   1581     return;
   1582   }
   1583   case IceType_i16: {
   1584     // LDRH (immediate) - ARM section A8.8.80, encoding A1:
   1585     //   ldrh<c> <Rt>, [<Rn>{, #+/-<Imm8>}]
   1586     //   ldrh<c> <Rt>, [<Rn>], #+/-<Imm8>
   1587     //   ldrh<c> <Rt>, [<Rn>, #+/-<Imm8>]!
   1588     //
   1589     // cccc000pu1w1nnnnttttiiii1011iiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1590     // iiiiiiii=Imm8, u=1 if +, pu0w is a BlockAddr, and
   1591     // pu0w0nnnn0000iiiiiiiiiiii=Address.
   1592     constexpr const char *Ldrh = "ldrh";
   1593     emitMemOpEnc3(Cond, L | B7 | B5 | B4, Rt, OpAddress, TInfo, Ldrh);
   1594     return;
   1595   }
   1596   case IceType_i32: {
   1597     // LDR (immediate) - ARM section A8.8.63, encoding A1:
   1598     //   ldr<c> <Rt>, [<Rn>{, #+/-<imm12>}]      ; p=1, w=0
   1599     //   ldr<c> <Rt>, [<Rn>], #+/-<imm12>        ; p=1, w=1
   1600     //   ldr<c> <Rt>, [<Rn>, #+/-<imm12>]!       ; p=0, w=1
   1601     //
   1602     // cccc010pu0w1nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1603     // iiiiiiiiiiii=imm12, u=1 if +, pu0w is a BlockAddr, and
   1604     //
   1605     // LDR (register) - ARM section A8.8.70, encoding A1:
   1606     //   ldrb<c> <Rt>, [<Rn>, +/-<Rm>{, <shift>}]{!}
   1607     //   ldrb<c> <Rt>, [<Rn>], +-<Rm>{, <shift>}
   1608     //
   1609     // cccc011pu0w1nnnnttttiiiiiss0mmmm where cccc=Cond, tttt=Rt, U=1 if +, pu0b
   1610     // is a BlockAddr, and pu0w0nnnn0000iiiiiss0mmmm=Address.
   1611     constexpr bool IsByte = false;
   1612     emitMemOp(Cond, IsLoad, IsByte, Rt, OpAddress, TInfo, LdrName);
   1613     return;
   1614   }
   1615   }
   1616 }
   1617 
   1618 void AssemblerARM32::emitMemExOp(CondARM32::Cond Cond, Type Ty, bool IsLoad,
   1619                                  const Operand *OpRd, IValueT Rt,
   1620                                  const Operand *OpAddress,
   1621                                  const TargetInfo &TInfo,
   1622                                  const char *InstName) {
   1623   IValueT Rd = encodeGPRegister(OpRd, "Rd", InstName);
   1624   IValueT MemExOpcode = IsLoad ? B0 : 0;
   1625   switch (Ty) {
   1626   default:
   1627     llvm::report_fatal_error(std::string(InstName) + ": Type " +
   1628                              typeString(Ty) + " not allowed");
   1629   case IceType_i1:
   1630   case IceType_i8:
   1631     MemExOpcode |= B2;
   1632     break;
   1633   case IceType_i16:
   1634     MemExOpcode |= B2 | B1;
   1635     break;
   1636   case IceType_i32:
   1637     break;
   1638   case IceType_i64:
   1639     MemExOpcode |= B1;
   1640   }
   1641   IValueT AddressRn;
   1642   if (encodeAddress(OpAddress, AddressRn, TInfo, NoImmOffsetAddress) !=
   1643       EncodedAsImmRegOffset)
   1644     llvm::report_fatal_error(std::string(InstName) +
   1645                              ": Can't extract Rn from address");
   1646   assert(Utils::IsAbsoluteUint(3, MemExOpcode));
   1647   assert(Rd < RegARM32::getNumGPRegs());
   1648   assert(Rt < RegARM32::getNumGPRegs());
   1649   assert(CondARM32::isDefined(Cond));
   1650   IValueT Encoding = (Cond << kConditionShift) | B24 | B23 | B11 | B10 | B9 |
   1651                      B8 | B7 | B4 | (MemExOpcode << kMemExOpcodeShift) |
   1652                      AddressRn | (Rd << kRdShift) | (Rt << kRmShift);
   1653   emitInst(Encoding);
   1654   return;
   1655 }
   1656 
   1657 void AssemblerARM32::ldrex(const Operand *OpRt, const Operand *OpAddress,
   1658                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   1659   // LDREXB - ARM section A8.8.76, encoding A1:
   1660   //   ldrexb<c> <Rt>, [<Rn>]
   1661   //
   1662   // cccc00011101nnnntttt111110011111 where cccc=Cond, tttt=Rt, and nnnn=Rn.
   1663   //
   1664   // LDREXH - ARM section A8.8.78, encoding A1:
   1665   //   ldrexh<c> <Rt>, [<Rn>]
   1666   //
   1667   // cccc00011111nnnntttt111110011111 where cccc=Cond, tttt=Rt, and nnnn=Rn.
   1668   //
   1669   // LDREX - ARM section A8.8.75, encoding A1:
   1670   //   ldrex<c> <Rt>, [<Rn>]
   1671   //
   1672   // cccc00011001nnnntttt111110011111 where cccc=Cond, tttt=Rt, and nnnn=Rn.
   1673   //
   1674   // LDREXD - ARM section A8.
   1675   //   ldrexd<c> <Rt>, [<Rn>]
   1676   //
   1677   // cccc00011001nnnntttt111110011111 where cccc=Cond, tttt=Rt, and nnnn=Rn.
   1678   constexpr const char *LdrexName = "ldrex";
   1679   const Type Ty = OpRt->getType();
   1680   constexpr bool IsLoad = true;
   1681   constexpr IValueT Rm = RegARM32::Encoded_Reg_pc;
   1682   emitMemExOp(Cond, Ty, IsLoad, OpRt, Rm, OpAddress, TInfo, LdrexName);
   1683 }
   1684 
   1685 void AssemblerARM32::emitShift(const CondARM32::Cond Cond,
   1686                                const OperandARM32::ShiftKind Shift,
   1687                                const Operand *OpRd, const Operand *OpRm,
   1688                                const Operand *OpSrc1, const bool SetFlags,
   1689                                const char *InstName) {
   1690   constexpr IValueT ShiftOpcode = B3 | B2 | B0; // 1101
   1691   IValueT Rd = encodeGPRegister(OpRd, "Rd", InstName);
   1692   IValueT Rm = encodeGPRegister(OpRm, "Rm", InstName);
   1693   IValueT Value;
   1694   switch (encodeOperand(OpSrc1, Value, WantGPRegs)) {
   1695   default:
   1696     llvm::report_fatal_error(std::string(InstName) +
   1697                              ": Last operand not understood");
   1698   case EncodedAsShiftImm5: {
   1699     // XXX (immediate)
   1700     //   xxx{s}<c> <Rd>, <Rm>, #imm5
   1701     //
   1702     // cccc0001101s0000ddddiiiii000mmmm where cccc=Cond, s=SetFlags, dddd=Rd,
   1703     // iiiii=imm5, and mmmm=Rm.
   1704     constexpr IValueT Rn = 0; // Rn field is not used.
   1705     Value = Value | (Rm << kRmShift) | (Shift << kShiftShift);
   1706     emitType01(Cond, kInstTypeDataRegShift, ShiftOpcode, SetFlags, Rn, Rd,
   1707                Value, RdIsPcAndSetFlags, InstName);
   1708     return;
   1709   }
   1710   case EncodedAsRegister: {
   1711     // XXX (register)
   1712     //   xxx{S}<c> <Rd>, <Rm>, <Rs>
   1713     //
   1714     // cccc0001101s0000ddddssss0001mmmm where cccc=Cond, s=SetFlags, dddd=Rd,
   1715     // mmmm=Rm, and ssss=Rs.
   1716     constexpr IValueT Rn = 0; // Rn field is not used.
   1717     IValueT Rs = encodeGPRegister(OpSrc1, "Rs", InstName);
   1718     verifyRegNotPc(Rd, "Rd", InstName);
   1719     verifyRegNotPc(Rm, "Rm", InstName);
   1720     verifyRegNotPc(Rs, "Rs", InstName);
   1721     emitType01(Cond, kInstTypeDataRegShift, ShiftOpcode, SetFlags, Rn, Rd,
   1722                encodeShiftRotateReg(Rm, Shift, Rs), NoChecks, InstName);
   1723     return;
   1724   }
   1725   }
   1726 }
   1727 
   1728 void AssemblerARM32::asr(const Operand *OpRd, const Operand *OpRm,
   1729                          const Operand *OpSrc1, bool SetFlags,
   1730                          CondARM32::Cond Cond) {
   1731   constexpr const char *AsrName = "asr";
   1732   emitShift(Cond, OperandARM32::ASR, OpRd, OpRm, OpSrc1, SetFlags, AsrName);
   1733 }
   1734 
   1735 void AssemblerARM32::lsl(const Operand *OpRd, const Operand *OpRm,
   1736                          const Operand *OpSrc1, bool SetFlags,
   1737                          CondARM32::Cond Cond) {
   1738   constexpr const char *LslName = "lsl";
   1739   emitShift(Cond, OperandARM32::LSL, OpRd, OpRm, OpSrc1, SetFlags, LslName);
   1740 }
   1741 
   1742 void AssemblerARM32::lsr(const Operand *OpRd, const Operand *OpRm,
   1743                          const Operand *OpSrc1, bool SetFlags,
   1744                          CondARM32::Cond Cond) {
   1745   constexpr const char *LsrName = "lsr";
   1746   emitShift(Cond, OperandARM32::LSR, OpRd, OpRm, OpSrc1, SetFlags, LsrName);
   1747 }
   1748 
   1749 void AssemblerARM32::mov(const Operand *OpRd, const Operand *OpSrc,
   1750                          CondARM32::Cond Cond) {
   1751   // MOV (register) - ARM section A8.8.104, encoding A1:
   1752   //   mov{S}<c> <Rd>, <Rn>
   1753   //
   1754   // cccc0001101s0000dddd00000000mmmm where cccc=Cond, s=SetFlags, dddd=Rd,
   1755   // and nnnn=Rn.
   1756   //
   1757   // MOV (immediate) - ARM section A8.8.102, encoding A1:
   1758   //   mov{S}<c> <Rd>, #<RotatedImm8>
   1759   //
   1760   // cccc0011101s0000ddddiiiiiiiiiiii where cccc=Cond, s=SetFlags, dddd=Rd,
   1761   // and iiiiiiiiiiii=RotatedImm8=Src.  Note: We don't use movs in this
   1762   // assembler.
   1763   constexpr const char *MovName = "mov";
   1764   IValueT Rd = encodeGPRegister(OpRd, "Rd", MovName);
   1765   constexpr bool SetFlags = false;
   1766   constexpr IValueT Rn = 0;
   1767   constexpr IValueT MovOpcode = B3 | B2 | B0; // 1101.
   1768   emitType01(Cond, MovOpcode, Rd, Rn, OpSrc, SetFlags, RdIsPcAndSetFlags,
   1769              MovName);
   1770 }
   1771 
   1772 void AssemblerARM32::emitMovwt(CondARM32::Cond Cond, bool IsMovW,
   1773                                const Operand *OpRd, const Operand *OpSrc,
   1774                                const char *MovName) {
   1775   IValueT Opcode = B25 | B24 | (IsMovW ? 0 : B22);
   1776   IValueT Rd = encodeGPRegister(OpRd, "Rd", MovName);
   1777   IValueT Imm16;
   1778   if (const auto *Src = llvm::dyn_cast<ConstantRelocatable>(OpSrc)) {
   1779     emitFixup(createMoveFixup(IsMovW, Src));
   1780     // Use 0 for the lower 16 bits of the relocatable, and add a fixup to
   1781     // install the correct bits.
   1782     Imm16 = 0;
   1783   } else if (encodeOperand(OpSrc, Imm16, WantGPRegs) != EncodedAsConstI32) {
   1784     llvm::report_fatal_error(std::string(MovName) + ": Not i32 constant");
   1785   }
   1786   assert(CondARM32::isDefined(Cond));
   1787   if (!Utils::IsAbsoluteUint(16, Imm16))
   1788     llvm::report_fatal_error(std::string(MovName) + ": Constant not i16");
   1789   const IValueT Encoding = encodeCondition(Cond) << kConditionShift | Opcode |
   1790                            ((Imm16 >> 12) << 16) | Rd << kRdShift |
   1791                            (Imm16 & 0xfff);
   1792   emitInst(Encoding);
   1793 }
   1794 
   1795 void AssemblerARM32::movw(const Operand *OpRd, const Operand *OpSrc,
   1796                           CondARM32::Cond Cond) {
   1797   // MOV (immediate) - ARM section A8.8.102, encoding A2:
   1798   //  movw<c> <Rd>, #<imm16>
   1799   //
   1800   // cccc00110000iiiiddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, and
   1801   // iiiiiiiiiiiiiiii=imm16.
   1802   constexpr const char *MovwName = "movw";
   1803   constexpr bool IsMovW = true;
   1804   emitMovwt(Cond, IsMovW, OpRd, OpSrc, MovwName);
   1805 }
   1806 
   1807 void AssemblerARM32::movt(const Operand *OpRd, const Operand *OpSrc,
   1808                           CondARM32::Cond Cond) {
   1809   // MOVT - ARM section A8.8.106, encoding A1:
   1810   //  movt<c> <Rd>, #<imm16>
   1811   //
   1812   // cccc00110100iiiiddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, and
   1813   // iiiiiiiiiiiiiiii=imm16.
   1814   constexpr const char *MovtName = "movt";
   1815   constexpr bool IsMovW = false;
   1816   emitMovwt(Cond, IsMovW, OpRd, OpSrc, MovtName);
   1817 }
   1818 
   1819 void AssemblerARM32::mvn(const Operand *OpRd, const Operand *OpSrc,
   1820                          CondARM32::Cond Cond) {
   1821   // MVN (immediate) - ARM section A8.8.115, encoding A1:
   1822   //   mvn{s}<c> <Rd>, #<const>
   1823   //
   1824   // cccc0011111s0000ddddiiiiiiiiiiii where cccc=Cond, s=SetFlags=0, dddd=Rd,
   1825   // and iiiiiiiiiiii=const
   1826   //
   1827   // MVN (register) - ARM section A8.8.116, encoding A1:
   1828   //   mvn{s}<c> <Rd>, <Rm>{, <shift>
   1829   //
   1830   // cccc0001111s0000ddddiiiiitt0mmmm where cccc=Cond, s=SetFlags=0, dddd=Rd,
   1831   // mmmm=Rm, iiii defines shift constant, and tt=ShiftKind.
   1832   constexpr const char *MvnName = "mvn";
   1833   IValueT Rd = encodeGPRegister(OpRd, "Rd", MvnName);
   1834   constexpr IValueT MvnOpcode = B3 | B2 | B1 | B0; // i.e. 1111
   1835   constexpr IValueT Rn = 0;
   1836   constexpr bool SetFlags = false;
   1837   emitType01(Cond, MvnOpcode, Rd, Rn, OpSrc, SetFlags, RdIsPcAndSetFlags,
   1838              MvnName);
   1839 }
   1840 
   1841 void AssemblerARM32::nop() {
   1842   // NOP - Section A8.8.119, encoding A1:
   1843   //  nop<c>
   1844   //
   1845   // cccc0011001000001111000000000000 where cccc=Cond.
   1846   constexpr CondARM32::Cond Cond = CondARM32::AL;
   1847   const IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | B25 |
   1848                            B24 | B21 | B15 | B14 | B13 | B12;
   1849   emitInst(Encoding);
   1850 }
   1851 
   1852 void AssemblerARM32::sbc(const Operand *OpRd, const Operand *OpRn,
   1853                          const Operand *OpSrc1, bool SetFlags,
   1854                          CondARM32::Cond Cond) {
   1855   // SBC (register) - ARM section 18.8.162, encoding A1:
   1856   //   sbc{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   1857   //
   1858   // cccc0000110snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1859   // mmmm=Rm, iiiii=Shift, tt=ShiftKind, and s=SetFlags.
   1860   //
   1861   // SBC (Immediate) - ARM section A8.8.161, encoding A1:
   1862   //   sbc{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   1863   //
   1864   // cccc0010110snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1865   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1866   constexpr const char *SbcName = "sbc";
   1867   constexpr IValueT SbcOpcode = B2 | B1; // 0110
   1868   emitType01(Cond, SbcOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   1869              SbcName);
   1870 }
   1871 
   1872 void AssemblerARM32::sdiv(const Operand *OpRd, const Operand *OpRn,
   1873                           const Operand *OpSrc1, CondARM32::Cond Cond) {
   1874   // SDIV - ARM section A8.8.165, encoding A1.
   1875   //   sdiv<c> <Rd>, <Rn>, <Rm>
   1876   //
   1877   // cccc01110001dddd1111mmmm0001nnnn where cccc=Cond, dddd=Rd, nnnn=Rn, and
   1878   // mmmm=Rm.
   1879   constexpr const char *SdivName = "sdiv";
   1880   IValueT Rd = encodeGPRegister(OpRd, "Rd", SdivName);
   1881   IValueT Rn = encodeGPRegister(OpRn, "Rn", SdivName);
   1882   IValueT Rm = encodeGPRegister(OpSrc1, "Rm", SdivName);
   1883   verifyRegNotPc(Rd, "Rd", SdivName);
   1884   verifyRegNotPc(Rn, "Rn", SdivName);
   1885   verifyRegNotPc(Rm, "Rm", SdivName);
   1886   // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
   1887   constexpr IValueT SdivOpcode = 0;
   1888   emitDivOp(Cond, SdivOpcode, Rd, Rn, Rm);
   1889 }
   1890 
   1891 void AssemblerARM32::str(const Operand *OpRt, const Operand *OpAddress,
   1892                          CondARM32::Cond Cond, const TargetInfo &TInfo) {
   1893   constexpr const char *StrName = "str";
   1894   constexpr bool IsLoad = false;
   1895   IValueT Rt = encodeGPRegister(OpRt, "Rt", StrName);
   1896   const Type Ty = OpRt->getType();
   1897   switch (Ty) {
   1898   case IceType_i64:
   1899     // STRD is not implemented because target lowering handles i64 and double by
   1900     // using two (32-bit) store instructions.  Note: Intentionally drop to
   1901     // default case.
   1902     llvm::report_fatal_error(std::string(StrName) + ": Type " + typeString(Ty) +
   1903                              " not implemented");
   1904   default:
   1905     llvm::report_fatal_error(std::string(StrName) + ": Type " + typeString(Ty) +
   1906                              " not allowed");
   1907   case IceType_i1:
   1908   case IceType_i8: {
   1909     // STRB (immediate) - ARM section A8.8.207, encoding A1:
   1910     //   strb<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
   1911     //   strb<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
   1912     //   strb<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
   1913     //
   1914     // cccc010pu1w0nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1915     // iiiiiiiiiiii=imm12, u=1 if +.
   1916     constexpr bool IsByte = true;
   1917     emitMemOp(Cond, IsLoad, IsByte, Rt, OpAddress, TInfo, StrName);
   1918     return;
   1919   }
   1920   case IceType_i16: {
   1921     // STRH (immediate) - ARM section A8.*.217, encoding A1:
   1922     //   strh<c> <Rt>, [<Rn>{, #+/-<Imm8>}]
   1923     //   strh<c> <Rt>, [<Rn>], #+/-<Imm8>
   1924     //   strh<c> <Rt>, [<Rn>, #+/-<Imm8>]!
   1925     //
   1926     // cccc000pu1w0nnnnttttiiii1011iiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1927     // iiiiiiii=Imm8, u=1 if +, pu0w is a BlockAddr, and
   1928     // pu0w0nnnn0000iiiiiiiiiiii=Address.
   1929     constexpr const char *Strh = "strh";
   1930     emitMemOpEnc3(Cond, B7 | B5 | B4, Rt, OpAddress, TInfo, Strh);
   1931     return;
   1932   }
   1933   case IceType_i32: {
   1934     // Note: Handles i32 and float stores. Target lowering handles i64 and
   1935     // double by using two (32 bit) store instructions.
   1936     //
   1937     // STR (immediate) - ARM section A8.8.207, encoding A1:
   1938     //   str<c> <Rt>, [<Rn>{, #+/-<imm12>}]     ; p=1, w=0
   1939     //   str<c> <Rt>, [<Rn>], #+/-<imm12>       ; p=1, w=1
   1940     //   str<c> <Rt>, [<Rn>, #+/-<imm12>]!      ; p=0, w=1
   1941     //
   1942     // cccc010pu1w0nnnnttttiiiiiiiiiiii where cccc=Cond, tttt=Rt, nnnn=Rn,
   1943     // iiiiiiiiiiii=imm12, u=1 if +.
   1944     constexpr bool IsByte = false;
   1945     emitMemOp(Cond, IsLoad, IsByte, Rt, OpAddress, TInfo, StrName);
   1946     return;
   1947   }
   1948   }
   1949 }
   1950 
   1951 void AssemblerARM32::strex(const Operand *OpRd, const Operand *OpRt,
   1952                            const Operand *OpAddress, CondARM32::Cond Cond,
   1953                            const TargetInfo &TInfo) {
   1954   // STREXB - ARM section A8.8.213, encoding A1:
   1955   //   strexb<c> <Rd>, <Rt>, [<Rn>]
   1956   //
   1957   // cccc00011100nnnndddd11111001tttt where cccc=Cond, dddd=Rd, tttt=Rt, and
   1958   // nnnn=Rn.
   1959   //
   1960   // STREXH - ARM section A8.8.215, encoding A1:
   1961   //   strexh<c> <Rd>, <Rt>, [<Rn>]
   1962   //
   1963   // cccc00011110nnnndddd11111001tttt where cccc=Cond, dddd=Rd, tttt=Rt, and
   1964   // nnnn=Rn.
   1965   //
   1966   // STREX - ARM section A8.8.212, encoding A1:
   1967   //   strex<c> <Rd>, <Rt>, [<Rn>]
   1968   //
   1969   // cccc00011000nnnndddd11111001tttt where cccc=Cond, dddd=Rd, tttt=Rt, and
   1970   // nnnn=Rn.
   1971   //
   1972   // STREXD - ARM section A8.8.214, encoding A1:
   1973   //   strexd<c> <Rd>, <Rt>, [<Rn>]
   1974   //
   1975   // cccc00011010nnnndddd11111001tttt where cccc=Cond, dddd=Rd, tttt=Rt, and
   1976   // nnnn=Rn.
   1977   constexpr const char *StrexName = "strex";
   1978   // Note: Rt uses Rm shift in encoding.
   1979   IValueT Rt = encodeGPRegister(OpRt, "Rt", StrexName);
   1980   const Type Ty = OpRt->getType();
   1981   constexpr bool IsLoad = true;
   1982   emitMemExOp(Cond, Ty, !IsLoad, OpRd, Rt, OpAddress, TInfo, StrexName);
   1983 }
   1984 
   1985 void AssemblerARM32::orr(const Operand *OpRd, const Operand *OpRn,
   1986                          const Operand *OpSrc1, bool SetFlags,
   1987                          CondARM32::Cond Cond) {
   1988   // ORR (register) - ARM Section A8.8.123, encoding A1:
   1989   //   orr{s}<c> <Rd>, <Rn>, <Rm>
   1990   //
   1991   // cccc0001100snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   1992   // mmmm=Rm, iiiii=shift, tt=ShiftKind,, and s=SetFlags.
   1993   //
   1994   // ORR (register) - ARM Section A8.8.123, encoding A1:
   1995   //   orr{s}<c> <Rd>, <Rn>,  #<RotatedImm8>
   1996   //
   1997   // cccc0001100snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   1998   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8.
   1999   constexpr const char *OrrName = "orr";
   2000   constexpr IValueT OrrOpcode = B3 | B2; // i.e. 1100
   2001   emitType01(Cond, OrrOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   2002              OrrName);
   2003 }
   2004 
   2005 void AssemblerARM32::pop(const Variable *OpRt, CondARM32::Cond Cond) {
   2006   // POP - ARM section A8.8.132, encoding A2:
   2007   //   pop<c> {Rt}
   2008   //
   2009   // cccc010010011101dddd000000000100 where dddd=Rt and cccc=Cond.
   2010   constexpr const char *Pop = "pop";
   2011   IValueT Rt = encodeGPRegister(OpRt, "Rt", Pop);
   2012   verifyRegsNotEq(Rt, "Rt", RegARM32::Encoded_Reg_sp, "sp", Pop);
   2013   // Same as load instruction.
   2014   constexpr bool IsLoad = true;
   2015   constexpr bool IsByte = false;
   2016   constexpr IOffsetT MaxOffset = (1 << 8) - 1;
   2017   constexpr IValueT NoShiftRight = 0;
   2018   IValueT Address =
   2019       encodeImmRegOffset(RegARM32::Encoded_Reg_sp, kWordSize,
   2020                          OperandARM32Mem::PostIndex, MaxOffset, NoShiftRight);
   2021   emitMemOp(Cond, kInstTypeMemImmediate, IsLoad, IsByte, Rt, Address);
   2022 }
   2023 
   2024 void AssemblerARM32::popList(const IValueT Registers, CondARM32::Cond Cond) {
   2025   // POP - ARM section A8.*.131, encoding A1:
   2026   //   pop<c> <registers>
   2027   //
   2028   // cccc100010111101rrrrrrrrrrrrrrrr where cccc=Cond and
   2029   // rrrrrrrrrrrrrrrr=Registers (one bit for each GP register).
   2030   constexpr bool IsLoad = true;
   2031   emitMultiMemOp(Cond, IA_W, IsLoad, RegARM32::Encoded_Reg_sp, Registers);
   2032 }
   2033 
   2034 void AssemblerARM32::push(const Operand *OpRt, CondARM32::Cond Cond) {
   2035   // PUSH - ARM section A8.8.133, encoding A2:
   2036   //   push<c> {Rt}
   2037   //
   2038   // cccc010100101101dddd000000000100 where dddd=Rt and cccc=Cond.
   2039   constexpr const char *Push = "push";
   2040   IValueT Rt = encodeGPRegister(OpRt, "Rt", Push);
   2041   verifyRegsNotEq(Rt, "Rt", RegARM32::Encoded_Reg_sp, "sp", Push);
   2042   // Same as store instruction.
   2043   constexpr bool isLoad = false;
   2044   constexpr bool isByte = false;
   2045   constexpr IOffsetT MaxOffset = (1 << 8) - 1;
   2046   constexpr IValueT NoShiftRight = 0;
   2047   IValueT Address =
   2048       encodeImmRegOffset(RegARM32::Encoded_Reg_sp, -kWordSize,
   2049                          OperandARM32Mem::PreIndex, MaxOffset, NoShiftRight);
   2050   emitMemOp(Cond, kInstTypeMemImmediate, isLoad, isByte, Rt, Address);
   2051 }
   2052 
   2053 void AssemblerARM32::pushList(const IValueT Registers, CondARM32::Cond Cond) {
   2054   // PUSH - ARM section A8.8.133, encoding A1:
   2055   //   push<c> <Registers>
   2056   //
   2057   // cccc100100101101rrrrrrrrrrrrrrrr where cccc=Cond and
   2058   // rrrrrrrrrrrrrrrr=Registers (one bit for each GP register).
   2059   constexpr bool IsLoad = false;
   2060   emitMultiMemOp(Cond, DB_W, IsLoad, RegARM32::Encoded_Reg_sp, Registers);
   2061 }
   2062 
   2063 void AssemblerARM32::mla(const Operand *OpRd, const Operand *OpRn,
   2064                          const Operand *OpRm, const Operand *OpRa,
   2065                          CondARM32::Cond Cond) {
   2066   // MLA - ARM section A8.8.114, encoding A1.
   2067   //   mla{s}<c> <Rd>, <Rn>, <Rm>, <Ra>
   2068   //
   2069   // cccc0000001sddddaaaammmm1001nnnn where cccc=Cond, s=SetFlags, dddd=Rd,
   2070   // aaaa=Ra, mmmm=Rm, and nnnn=Rn.
   2071   constexpr const char *MlaName = "mla";
   2072   IValueT Rd = encodeGPRegister(OpRd, "Rd", MlaName);
   2073   IValueT Rn = encodeGPRegister(OpRn, "Rn", MlaName);
   2074   IValueT Rm = encodeGPRegister(OpRm, "Rm", MlaName);
   2075   IValueT Ra = encodeGPRegister(OpRa, "Ra", MlaName);
   2076   verifyRegNotPc(Rd, "Rd", MlaName);
   2077   verifyRegNotPc(Rn, "Rn", MlaName);
   2078   verifyRegNotPc(Rm, "Rm", MlaName);
   2079   verifyRegNotPc(Ra, "Ra", MlaName);
   2080   constexpr IValueT MlaOpcode = B21;
   2081   constexpr bool SetFlags = true;
   2082   // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
   2083   emitMulOp(Cond, MlaOpcode, Ra, Rd, Rn, Rm, !SetFlags);
   2084 }
   2085 
   2086 void AssemblerARM32::mls(const Operand *OpRd, const Operand *OpRn,
   2087                          const Operand *OpRm, const Operand *OpRa,
   2088                          CondARM32::Cond Cond) {
   2089   constexpr const char *MlsName = "mls";
   2090   IValueT Rd = encodeGPRegister(OpRd, "Rd", MlsName);
   2091   IValueT Rn = encodeGPRegister(OpRn, "Rn", MlsName);
   2092   IValueT Rm = encodeGPRegister(OpRm, "Rm", MlsName);
   2093   IValueT Ra = encodeGPRegister(OpRa, "Ra", MlsName);
   2094   verifyRegNotPc(Rd, "Rd", MlsName);
   2095   verifyRegNotPc(Rn, "Rn", MlsName);
   2096   verifyRegNotPc(Rm, "Rm", MlsName);
   2097   verifyRegNotPc(Ra, "Ra", MlsName);
   2098   constexpr IValueT MlsOpcode = B22 | B21;
   2099   constexpr bool SetFlags = true;
   2100   // Assembler registers rd, rn, rm, ra are encoded as rn, rm, rs, rd.
   2101   emitMulOp(Cond, MlsOpcode, Ra, Rd, Rn, Rm, !SetFlags);
   2102 }
   2103 
   2104 void AssemblerARM32::mul(const Operand *OpRd, const Operand *OpRn,
   2105                          const Operand *OpSrc1, bool SetFlags,
   2106                          CondARM32::Cond Cond) {
   2107   // MUL - ARM section A8.8.114, encoding A1.
   2108   //   mul{s}<c> <Rd>, <Rn>, <Rm>
   2109   //
   2110   // cccc0000000sdddd0000mmmm1001nnnn where cccc=Cond, dddd=Rd, nnnn=Rn,
   2111   // mmmm=Rm, and s=SetFlags.
   2112   constexpr const char *MulName = "mul";
   2113   IValueT Rd = encodeGPRegister(OpRd, "Rd", MulName);
   2114   IValueT Rn = encodeGPRegister(OpRn, "Rn", MulName);
   2115   IValueT Rm = encodeGPRegister(OpSrc1, "Rm", MulName);
   2116   verifyRegNotPc(Rd, "Rd", MulName);
   2117   verifyRegNotPc(Rn, "Rn", MulName);
   2118   verifyRegNotPc(Rm, "Rm", MulName);
   2119   // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
   2120   constexpr IValueT MulOpcode = 0;
   2121   emitMulOp(Cond, MulOpcode, RegARM32::Encoded_Reg_r0, Rd, Rn, Rm, SetFlags);
   2122 }
   2123 
   2124 void AssemblerARM32::emitRdRm(CondARM32::Cond Cond, IValueT Opcode,
   2125                               const Operand *OpRd, const Operand *OpRm,
   2126                               const char *InstName) {
   2127   IValueT Rd = encodeGPRegister(OpRd, "Rd", InstName);
   2128   IValueT Rm = encodeGPRegister(OpRm, "Rm", InstName);
   2129   IValueT Encoding =
   2130       (Cond << kConditionShift) | Opcode | (Rd << kRdShift) | (Rm << kRmShift);
   2131   emitInst(Encoding);
   2132 }
   2133 
   2134 void AssemblerARM32::rbit(const Operand *OpRd, const Operand *OpRm,
   2135                           CondARM32::Cond Cond) {
   2136   // RBIT - ARM section A8.8.144, encoding A1:
   2137   //   rbit<c> <Rd>, <Rm>
   2138   //
   2139   // cccc011011111111dddd11110011mmmm where cccc=Cond, dddd=Rn, and mmmm=Rm.
   2140   constexpr const char *RbitName = "rev";
   2141   constexpr IValueT RbitOpcode = B26 | B25 | B23 | B22 | B21 | B20 | B19 | B18 |
   2142                                  B17 | B16 | B11 | B10 | B9 | B8 | B5 | B4;
   2143   emitRdRm(Cond, RbitOpcode, OpRd, OpRm, RbitName);
   2144 }
   2145 
   2146 void AssemblerARM32::rev(const Operand *OpRd, const Operand *OpRm,
   2147                          CondARM32::Cond Cond) {
   2148   // REV - ARM section A8.8.145, encoding A1:
   2149   //   rev<c> <Rd>, <Rm>
   2150   //
   2151   // cccc011010111111dddd11110011mmmm where cccc=Cond, dddd=Rn, and mmmm=Rm.
   2152   constexpr const char *RevName = "rev";
   2153   constexpr IValueT RevOpcode = B26 | B25 | B23 | B21 | B20 | B19 | B18 | B17 |
   2154                                 B16 | B11 | B10 | B9 | B8 | B5 | B4;
   2155   emitRdRm(Cond, RevOpcode, OpRd, OpRm, RevName);
   2156 }
   2157 
   2158 void AssemblerARM32::rsb(const Operand *OpRd, const Operand *OpRn,
   2159                          const Operand *OpSrc1, bool SetFlags,
   2160                          CondARM32::Cond Cond) {
   2161   // RSB (immediate) - ARM section A8.8.152, encoding A1.
   2162   //   rsb{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   2163   //
   2164   // cccc0010011snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   2165   // s=setFlags and iiiiiiiiiiii defines the RotatedImm8 value.
   2166   //
   2167   // RSB (register) - ARM section A8.8.163, encoding A1.
   2168   //   rsb{s}<c> <Rd>, <Rn>, <Rm>{, <Shift>}
   2169   //
   2170   // cccc0000011snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   2171   // mmmm=Rm, iiiii=shift, tt==ShiftKind, and s=SetFlags.
   2172   constexpr const char *RsbName = "rsb";
   2173   constexpr IValueT RsbOpcode = B1 | B0; // 0011
   2174   emitType01(Cond, RsbOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   2175              RsbName);
   2176 }
   2177 
   2178 void AssemblerARM32::rsc(const Operand *OpRd, const Operand *OpRn,
   2179                          const Operand *OpSrc1, bool SetFlags,
   2180                          CondARM32::Cond Cond) {
   2181   // RSC (immediate) - ARM section A8.8.155, encoding A1:
   2182   //   rsc{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   2183   //
   2184   // cccc0010111snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   2185   // mmmm=Rm, iiiii=shift, tt=ShiftKind, and s=SetFlags.
   2186   //
   2187   // RSC (register) - ARM section A8.8.156, encoding A1:
   2188   //   rsc{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   2189   //
   2190   // cccc0000111snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   2191   // mmmm=Rm, iiiii=shift, tt=ShiftKind, and s=SetFlags.
   2192   //
   2193   // RSC (register-shifted register) - ARM section A8.8.157, encoding A1:
   2194   //   rsc{s}<c> <Rd>, <Rn>, <Rm>, <type> <Rs>
   2195   //
   2196   // cccc0000111fnnnnddddssss0tt1mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   2197   // mmmm=Rm, ssss=Rs, tt defined <type>, and f=SetFlags.
   2198   constexpr const char *RscName = "rsc";
   2199   constexpr IValueT RscOpcode = B2 | B1 | B0; // i.e. 0111.
   2200   emitType01(Cond, RscOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   2201              RscName);
   2202 }
   2203 
   2204 void AssemblerARM32::sxt(const Operand *OpRd, const Operand *OpSrc0,
   2205                          CondARM32::Cond Cond) {
   2206   constexpr const char *SxtName = "sxt";
   2207   constexpr IValueT SxtOpcode = B26 | B25 | B23 | B21;
   2208   emitSignExtend(Cond, SxtOpcode, OpRd, OpSrc0, SxtName);
   2209 }
   2210 
   2211 void AssemblerARM32::sub(const Operand *OpRd, const Operand *OpRn,
   2212                          const Operand *OpSrc1, bool SetFlags,
   2213                          CondARM32::Cond Cond) {
   2214   // SUB (register) - ARM section A8.8.223, encoding A1:
   2215   //   sub{s}<c> <Rd>, <Rn>, <Rm>{, <shift>}
   2216   // SUB (SP minus register): See ARM section 8.8.226, encoding A1:
   2217   //   sub{s}<c> <Rd>, sp, <Rm>{, <Shift>}
   2218   //
   2219   // cccc0000010snnnnddddiiiiitt0mmmm where cccc=Cond, dddd=Rd, nnnn=Rn,
   2220   // mmmm=Rm, iiiii=shift, tt=ShiftKind, and s=SetFlags.
   2221   //
   2222   // Sub (Immediate) - ARM section A8.8.222, encoding A1:
   2223   //    sub{s}<c> <Rd>, <Rn>, #<RotatedImm8>
   2224   // Sub (Sp minus immediate) - ARM section A8.8.225, encoding A1:
   2225   //    sub{s}<c> sp, <Rn>, #<RotatedImm8>
   2226   //
   2227   // cccc0010010snnnnddddiiiiiiiiiiii where cccc=Cond, dddd=Rd, nnnn=Rn,
   2228   // s=SetFlags and iiiiiiiiiiii=Src1Value defining RotatedImm8
   2229   constexpr const char *SubName = "sub";
   2230   constexpr IValueT SubOpcode = B1; // 0010
   2231   emitType01(Cond, SubOpcode, OpRd, OpRn, OpSrc1, SetFlags, RdIsPcAndSetFlags,
   2232              SubName);
   2233 }
   2234 
   2235 namespace {
   2236 
   2237 // Use a particular UDF encoding -- TRAPNaCl in LLVM: 0xE7FEDEF0
   2238 // http://llvm.org/viewvc/llvm-project?view=revision&revision=173943
   2239 const uint8_t TrapBytesRaw[] = {0xE7, 0xFE, 0xDE, 0xF0};
   2240 
   2241 const auto TrapBytes =
   2242     llvm::ArrayRef<uint8_t>(TrapBytesRaw, llvm::array_lengthof(TrapBytesRaw));
   2243 
   2244 } // end of anonymous namespace
   2245 
   2246 llvm::ArrayRef<uint8_t> AssemblerARM32::getNonExecBundlePadding() const {
   2247   return TrapBytes;
   2248 }
   2249 
   2250 void AssemblerARM32::trap() {
   2251   AssemblerBuffer::EnsureCapacity ensured(&Buffer);
   2252   for (const uint8_t &Byte : reverse_range(TrapBytes))
   2253     Buffer.emit<uint8_t>(Byte);
   2254 }
   2255 
   2256 void AssemblerARM32::tst(const Operand *OpRn, const Operand *OpSrc1,
   2257                          CondARM32::Cond Cond) {
   2258   // TST (register) - ARM section A8.8.241, encoding A1:
   2259   //   tst<c> <Rn>, <Rm>(, <shift>}
   2260   //
   2261   // cccc00010001nnnn0000iiiiitt0mmmm where cccc=Cond, nnnn=Rn, mmmm=Rm,
   2262   // iiiii=Shift, and tt=ShiftKind.
   2263   //
   2264   // TST (immediate) - ARM section A8.8.240, encoding A1:
   2265   //   tst<c> <Rn>, #<RotatedImm8>
   2266   //
   2267   // cccc00110001nnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn, and
   2268   // iiiiiiiiiiii defines RotatedImm8.
   2269   constexpr const char *TstName = "tst";
   2270   constexpr IValueT TstOpcode = B3; // ie. 1000
   2271   emitCompareOp(Cond, TstOpcode, OpRn, OpSrc1, TstName);
   2272 }
   2273 
   2274 void AssemblerARM32::udiv(const Operand *OpRd, const Operand *OpRn,
   2275                           const Operand *OpSrc1, CondARM32::Cond Cond) {
   2276   // UDIV - ARM section A8.8.248, encoding A1.
   2277   //   udiv<c> <Rd>, <Rn>, <Rm>
   2278   //
   2279   // cccc01110011dddd1111mmmm0001nnnn where cccc=Cond, dddd=Rd, nnnn=Rn, and
   2280   // mmmm=Rm.
   2281   constexpr const char *UdivName = "udiv";
   2282   IValueT Rd = encodeGPRegister(OpRd, "Rd", UdivName);
   2283   IValueT Rn = encodeGPRegister(OpRn, "Rn", UdivName);
   2284   IValueT Rm = encodeGPRegister(OpSrc1, "Rm", UdivName);
   2285   verifyRegNotPc(Rd, "Rd", UdivName);
   2286   verifyRegNotPc(Rn, "Rn", UdivName);
   2287   verifyRegNotPc(Rm, "Rm", UdivName);
   2288   // Assembler registers rd, rn, rm are encoded as rn, rm, rs.
   2289   constexpr IValueT UdivOpcode = B21;
   2290   emitDivOp(Cond, UdivOpcode, Rd, Rn, Rm);
   2291 }
   2292 
   2293 void AssemblerARM32::umull(const Operand *OpRdLo, const Operand *OpRdHi,
   2294                            const Operand *OpRn, const Operand *OpRm,
   2295                            CondARM32::Cond Cond) {
   2296   // UMULL - ARM section A8.8.257, encoding A1:
   2297   //   umull<c> <RdLo>, <RdHi>, <Rn>, <Rm>
   2298   //
   2299   // cccc0000100shhhhllllmmmm1001nnnn where hhhh=RdHi, llll=RdLo, nnnn=Rn,
   2300   // mmmm=Rm, and s=SetFlags
   2301   constexpr const char *UmullName = "umull";
   2302   IValueT RdLo = encodeGPRegister(OpRdLo, "RdLo", UmullName);
   2303   IValueT RdHi = encodeGPRegister(OpRdHi, "RdHi", UmullName);
   2304   IValueT Rn = encodeGPRegister(OpRn, "Rn", UmullName);
   2305   IValueT Rm = encodeGPRegister(OpRm, "Rm", UmullName);
   2306   verifyRegNotPc(RdLo, "RdLo", UmullName);
   2307   verifyRegNotPc(RdHi, "RdHi", UmullName);
   2308   verifyRegNotPc(Rn, "Rn", UmullName);
   2309   verifyRegNotPc(Rm, "Rm", UmullName);
   2310   verifyRegsNotEq(RdHi, "RdHi", RdLo, "RdLo", UmullName);
   2311   constexpr IValueT UmullOpcode = B23;
   2312   constexpr bool SetFlags = false;
   2313   emitMulOp(Cond, UmullOpcode, RdLo, RdHi, Rn, Rm, SetFlags);
   2314 }
   2315 
   2316 void AssemblerARM32::uxt(const Operand *OpRd, const Operand *OpSrc0,
   2317                          CondARM32::Cond Cond) {
   2318   constexpr const char *UxtName = "uxt";
   2319   constexpr IValueT UxtOpcode = B26 | B25 | B23 | B22 | B21;
   2320   emitSignExtend(Cond, UxtOpcode, OpRd, OpSrc0, UxtName);
   2321 }
   2322 
   2323 void AssemblerARM32::vabss(const Operand *OpSd, const Operand *OpSm,
   2324                            CondARM32::Cond Cond) {
   2325   // VABS - ARM section A8.8.280, encoding A2:
   2326   //   vabs<c>.f32 <Sd>, <Sm>
   2327   //
   2328   // cccc11101D110000dddd101011M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   2329   constexpr const char *Vabss = "vabss";
   2330   IValueT Sd = encodeSRegister(OpSd, "Sd", Vabss);
   2331   IValueT Sm = encodeSRegister(OpSm, "Sm", Vabss);
   2332   constexpr IValueT S0 = 0;
   2333   constexpr IValueT VabssOpcode = B23 | B21 | B20 | B7 | B6;
   2334   emitVFPsss(Cond, VabssOpcode, Sd, S0, Sm);
   2335 }
   2336 
   2337 void AssemblerARM32::vabsd(const Operand *OpDd, const Operand *OpDm,
   2338                            CondARM32::Cond Cond) {
   2339   // VABS - ARM section A8.8.280, encoding A2:
   2340   //   vabs<c>.f64 <Dd>, <Dm>
   2341   //
   2342   // cccc11101D110000dddd101111M0mmmm where cccc=Cond, Ddddd=Dd, and Mmmmm=Dm.
   2343   constexpr const char *Vabsd = "vabsd";
   2344   const IValueT Dd = encodeDRegister(OpDd, "Dd", Vabsd);
   2345   const IValueT Dm = encodeDRegister(OpDm, "Dm", Vabsd);
   2346   constexpr IValueT D0 = 0;
   2347   constexpr IValueT VabsdOpcode = B23 | B21 | B20 | B7 | B6;
   2348   emitVFPddd(Cond, VabsdOpcode, Dd, D0, Dm);
   2349 }
   2350 
   2351 void AssemblerARM32::vabsq(const Operand *OpQd, const Operand *OpQm) {
   2352   // VABS - ARM section A8.8.280, encoding A1:
   2353   //   vabs.<dt> <Qd>, <Qm>
   2354   //
   2355   // 111100111D11ss01ddd0f1101M0mmm0 where Dddd=OpQd, Mddd=OpQm, and
   2356   // <dt> in {s8, s16, s32, f32} and ss is the encoding of <dt>.
   2357   const Type ElmtTy = typeElementType(OpQd->getType());
   2358   assert(ElmtTy != IceType_i64 && "vabsq doesn't allow i64!");
   2359   constexpr const char *Vabsq = "vabsq";
   2360   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vabsq));
   2361   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vabsq));
   2362   constexpr IValueT Dn = 0;
   2363   const IValueT VabsqOpcode =
   2364       B24 | B23 | B21 | B20 | B16 | B9 | B8 | (encodeElmtType(ElmtTy) << 18);
   2365   constexpr bool UseQRegs = true;
   2366   emitSIMDBase(VabsqOpcode, Dd, Dn, Dm, UseQRegs, isFloatingType(ElmtTy));
   2367 }
   2368 
   2369 void AssemblerARM32::vadds(const Operand *OpSd, const Operand *OpSn,
   2370                            const Operand *OpSm, CondARM32::Cond Cond) {
   2371   // VADD (floating-point) - ARM section A8.8.283, encoding A2:
   2372   //   vadd<c>.f32 <Sd>, <Sn>, <Sm>
   2373   //
   2374   // cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
   2375   // and mmmmM=Rm.
   2376   constexpr const char *Vadds = "vadds";
   2377   constexpr IValueT VaddsOpcode = B21 | B20;
   2378   emitVFPsss(Cond, VaddsOpcode, OpSd, OpSn, OpSm, Vadds);
   2379 }
   2380 
   2381 void AssemblerARM32::vaddqi(Type ElmtTy, const Operand *OpQd,
   2382                             const Operand *OpQm, const Operand *OpQn) {
   2383   // VADD (integer) - ARM section A8.8.282, encoding A1:
   2384   //   vadd.<dt> <Qd>, <Qn>, <Qm>
   2385   //
   2386   // 111100100Dssnnn0ddd01000N1M0mmm0 where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   2387   // and dt in [i8, i16, i32, i64] where ss is the index.
   2388   assert(isScalarIntegerType(ElmtTy) &&
   2389          "vaddqi expects vector with integer element type");
   2390   constexpr const char *Vaddqi = "vaddqi";
   2391   constexpr IValueT VaddqiOpcode = B11;
   2392   emitSIMDqqq(VaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vaddqi);
   2393 }
   2394 
   2395 void AssemblerARM32::vaddqf(const Operand *OpQd, const Operand *OpQn,
   2396                             const Operand *OpQm) {
   2397   // VADD (floating-point) - ARM section A8.8.283, Encoding A1:
   2398   //   vadd.f32 <Qd>, <Qn>, <Qm>
   2399   //
   2400   // 111100100D00nnn0ddd01101N1M0mmm0 where Dddd=Qd, Nnnn=Qn, and Mmmm=Qm.
   2401   assert(OpQd->getType() == IceType_v4f32 && "vaddqf expects type <4 x float>");
   2402   constexpr const char *Vaddqf = "vaddqf";
   2403   constexpr IValueT VaddqfOpcode = B11 | B8;
   2404   constexpr bool IsFloatTy = true;
   2405   emitSIMDqqqBase(VaddqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vaddqf);
   2406 }
   2407 
   2408 void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
   2409                            const Operand *OpDm, CondARM32::Cond Cond) {
   2410   // VADD (floating-point) - ARM section A8.8.283, encoding A2:
   2411   //   vadd<c>.f64 <Dd>, <Dn>, <Dm>
   2412   //
   2413   // cccc11100D11nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
   2414   // and Mmmmm=Rm.
   2415   constexpr const char *Vaddd = "vaddd";
   2416   constexpr IValueT VadddOpcode = B21 | B20;
   2417   emitVFPddd(Cond, VadddOpcode, OpDd, OpDn, OpDm, Vaddd);
   2418 }
   2419 
   2420 void AssemblerARM32::vandq(const Operand *OpQd, const Operand *OpQm,
   2421                            const Operand *OpQn) {
   2422   // VAND (register) - ARM section A8.8.287, encoding A1:
   2423   //   vand <Qd>, <Qn>, <Qm>
   2424   //
   2425   // 111100100D00nnn0ddd00001N1M1mmm0 where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   2426   constexpr const char *Vandq = "vandq";
   2427   constexpr IValueT VandqOpcode = B8 | B4;
   2428   constexpr Type ElmtTy = IceType_i8;
   2429   emitSIMDqqq(VandqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vandq);
   2430 }
   2431 
   2432 void AssemblerARM32::vbslq(const Operand *OpQd, const Operand *OpQm,
   2433                            const Operand *OpQn) {
   2434   // VBSL (register) - ARM section A8.8.290, encoding A1:
   2435   //   vbsl <Qd>, <Qn>, <Qm>
   2436   //
   2437   // 111100110D01nnn0ddd00001N1M1mmm0 where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   2438   constexpr const char *Vbslq = "vbslq";
   2439   constexpr IValueT VbslqOpcode = B24 | B20 | B8 | B4;
   2440   constexpr Type ElmtTy = IceType_i8; // emits sz=0
   2441   emitSIMDqqq(VbslqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vbslq);
   2442 }
   2443 
   2444 void AssemblerARM32::vceqqi(const Type ElmtTy, const Operand *OpQd,
   2445                             const Operand *OpQm, const Operand *OpQn) {
   2446   // vceq (register) - ARM section A8.8.291, encoding A1:
   2447   //   vceq.<st> <Qd>, <Qn>, <Qm>
   2448   //
   2449   // 111100110Dssnnnndddd1000NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm, and
   2450   // st in [i8, i16, i32] where ss is the index.
   2451   constexpr const char *Vceq = "vceq";
   2452   constexpr IValueT VceqOpcode = B24 | B11 | B4;
   2453   emitSIMDqqq(VceqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vceq);
   2454 }
   2455 
   2456 void AssemblerARM32::vceqqs(const Operand *OpQd, const Operand *OpQm,
   2457                             const Operand *OpQn) {
   2458   // vceq (register) - ARM section A8.8.291, encoding A2:
   2459   //   vceq.f32 <Qd>, <Qn>, <Qm>
   2460   //
   2461   // 111100100D00nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   2462   constexpr const char *Vceq = "vceq";
   2463   constexpr IValueT VceqOpcode = B11 | B10 | B9;
   2464   constexpr Type ElmtTy = IceType_i8; // encoded as 0b00
   2465   emitSIMDqqq(VceqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vceq);
   2466 }
   2467 
   2468 void AssemblerARM32::vcgeqi(const Type ElmtTy, const Operand *OpQd,
   2469                             const Operand *OpQm, const Operand *OpQn) {
   2470   // vcge (register) - ARM section A8.8.293, encoding A1:
   2471   //   vcge.<st> <Qd>, <Qn>, <Qm>
   2472   //
   2473   // 1111001U0Dssnnnndddd0011NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   2474   // 0=U, and st in [s8, s16, s32] where ss is the index.
   2475   constexpr const char *Vcge = "vcge";
   2476   constexpr IValueT VcgeOpcode = B9 | B8 | B4;
   2477   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2478 }
   2479 
   2480 void AssemblerARM32::vcugeqi(const Type ElmtTy, const Operand *OpQd,
   2481                              const Operand *OpQm, const Operand *OpQn) {
   2482   // vcge (register) - ARM section A8.8.293, encoding A1:
   2483   //   vcge.<st> <Qd>, <Qn>, <Qm>
   2484   //
   2485   // 1111001U0Dssnnnndddd0011NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   2486   // 1=U, and st in [u8, u16, u32] where ss is the index.
   2487   constexpr const char *Vcge = "vcge";
   2488   constexpr IValueT VcgeOpcode = B24 | B9 | B8 | B4;
   2489   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2490 }
   2491 
   2492 void AssemblerARM32::vcgeqs(const Operand *OpQd, const Operand *OpQm,
   2493                             const Operand *OpQn) {
   2494   // vcge (register) - ARM section A8.8.293, encoding A2:
   2495   //   vcge.f32 <Qd>, <Qn>, <Qm>
   2496   //
   2497   // 111100110D00nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   2498   constexpr const char *Vcge = "vcge";
   2499   constexpr IValueT VcgeOpcode = B24 | B11 | B10 | B9;
   2500   constexpr Type ElmtTy = IceType_i8; // encoded as 0b00.
   2501   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2502 }
   2503 
   2504 void AssemblerARM32::vcgtqi(const Type ElmtTy, const Operand *OpQd,
   2505                             const Operand *OpQm, const Operand *OpQn) {
   2506   // vcgt (register) - ARM section A8.8.295, encoding A1:
   2507   //   vcgt.<st> <Qd>, <Qn>, <Qm>
   2508   //
   2509   // 1111001U0Dssnnnndddd0011NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   2510   // 0=U, and st in [s8, s16, s32] where ss is the index.
   2511   constexpr const char *Vcge = "vcgt";
   2512   constexpr IValueT VcgeOpcode = B9 | B8;
   2513   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2514 }
   2515 
   2516 void AssemblerARM32::vcugtqi(const Type ElmtTy, const Operand *OpQd,
   2517                              const Operand *OpQm, const Operand *OpQn) {
   2518   // vcgt (register) - ARM section A8.8.295, encoding A1:
   2519   //   vcgt.<st> <Qd>, <Qn>, <Qm>
   2520   //
   2521   // 111100110Dssnnnndddd0011NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   2522   // 1=U, and st in [u8, u16, u32] where ss is the index.
   2523   constexpr const char *Vcge = "vcgt";
   2524   constexpr IValueT VcgeOpcode = B24 | B9 | B8;
   2525   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2526 }
   2527 
   2528 void AssemblerARM32::vcgtqs(const Operand *OpQd, const Operand *OpQm,
   2529                             const Operand *OpQn) {
   2530   // vcgt (register) - ARM section A8.8.295, encoding A2:
   2531   //   vcgt.f32 <Qd>, <Qn>, <Qm>
   2532   //
   2533   // 111100110D10nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   2534   constexpr const char *Vcge = "vcgt";
   2535   constexpr IValueT VcgeOpcode = B24 | B21 | B11 | B10 | B9;
   2536   constexpr Type ElmtTy = IceType_i8; // encoded as 0b00.
   2537   emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
   2538 }
   2539 
   2540 void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm,
   2541                            CondARM32::Cond Cond) {
   2542   constexpr const char *Vcmpd = "vcmpd";
   2543   IValueT Dd = encodeDRegister(OpDd, "Dd", Vcmpd);
   2544   IValueT Dm = encodeDRegister(OpDm, "Dm", Vcmpd);
   2545   constexpr IValueT VcmpdOpcode = B23 | B21 | B20 | B18 | B6;
   2546   constexpr IValueT Dn = 0;
   2547   emitVFPddd(Cond, VcmpdOpcode, Dd, Dn, Dm);
   2548 }
   2549 
   2550 void AssemblerARM32::vcmpdz(const Operand *OpDd, CondARM32::Cond Cond) {
   2551   constexpr const char *Vcmpdz = "vcmpdz";
   2552   IValueT Dd = encodeDRegister(OpDd, "Dd", Vcmpdz);
   2553   constexpr IValueT VcmpdzOpcode = B23 | B21 | B20 | B18 | B16 | B6;
   2554   constexpr IValueT Dn = 0;
   2555   constexpr IValueT Dm = 0;
   2556   emitVFPddd(Cond, VcmpdzOpcode, Dd, Dn, Dm);
   2557 }
   2558 
   2559 void AssemblerARM32::vcmps(const Operand *OpSd, const Operand *OpSm,
   2560                            CondARM32::Cond Cond) {
   2561   constexpr const char *Vcmps = "vcmps";
   2562   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcmps);
   2563   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcmps);
   2564   constexpr IValueT VcmpsOpcode = B23 | B21 | B20 | B18 | B6;
   2565   constexpr IValueT Sn = 0;
   2566   emitVFPsss(Cond, VcmpsOpcode, Sd, Sn, Sm);
   2567 }
   2568 
   2569 void AssemblerARM32::vcmpsz(const Operand *OpSd, CondARM32::Cond Cond) {
   2570   constexpr const char *Vcmpsz = "vcmps";
   2571   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcmpsz);
   2572   constexpr IValueT VcmpszOpcode = B23 | B21 | B20 | B18 | B16 | B6;
   2573   constexpr IValueT Sn = 0;
   2574   constexpr IValueT Sm = 0;
   2575   emitVFPsss(Cond, VcmpszOpcode, Sd, Sn, Sm);
   2576 }
   2577 
   2578 void AssemblerARM32::emitVFPsd(CondARM32::Cond Cond, IValueT Opcode, IValueT Sd,
   2579                                IValueT Dm) {
   2580   assert(Sd < RegARM32::getNumSRegs());
   2581   assert(Dm < RegARM32::getNumDRegs());
   2582   assert(CondARM32::isDefined(Cond));
   2583   constexpr IValueT VFPOpcode = B27 | B26 | B25 | B11 | B9;
   2584   const IValueT Encoding =
   2585       Opcode | VFPOpcode | (encodeCondition(Cond) << kConditionShift) |
   2586       (getYInRegXXXXY(Sd) << 22) | (getXXXXInRegXXXXY(Sd) << 12) |
   2587       (getYInRegYXXXX(Dm) << 5) | getXXXXInRegYXXXX(Dm);
   2588   emitInst(Encoding);
   2589 }
   2590 
   2591 void AssemblerARM32::vcvtdi(const Operand *OpDd, const Operand *OpSm,
   2592                             CondARM32::Cond Cond) {
   2593   // VCVT (between floating-point and integer, Floating-point)
   2594   //      - ARM Section A8.8.306, encoding A1:
   2595   //   vcvt<c>.f64.s32 <Dd>, <Sm>
   2596   //
   2597   // cccc11101D111000dddd10111M0mmmm where cccc=Cond, Ddddd=Dd, and mmmmM=Sm.
   2598   constexpr const char *Vcvtdi = "vcvtdi";
   2599   IValueT Dd = encodeDRegister(OpDd, "Dd", Vcvtdi);
   2600   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtdi);
   2601   constexpr IValueT VcvtdiOpcode = B23 | B21 | B20 | B19 | B8 | B7 | B6;
   2602   emitVFPds(Cond, VcvtdiOpcode, Dd, Sm);
   2603 }
   2604 
   2605 void AssemblerARM32::vcvtdu(const Operand *OpDd, const Operand *OpSm,
   2606                             CondARM32::Cond Cond) {
   2607   // VCVT (between floating-point and integer, Floating-point)
   2608   //      - ARM Section A8.8.306, encoding A1:
   2609   //   vcvt<c>.f64.u32 <Dd>, <Sm>
   2610   //
   2611   // cccc11101D111000dddd10101M0mmmm where cccc=Cond, Ddddd=Dd, and mmmmM=Sm.
   2612   constexpr const char *Vcvtdu = "vcvtdu";
   2613   IValueT Dd = encodeDRegister(OpDd, "Dd", Vcvtdu);
   2614   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtdu);
   2615   constexpr IValueT VcvtduOpcode = B23 | B21 | B20 | B19 | B8 | B6;
   2616   emitVFPds(Cond, VcvtduOpcode, Dd, Sm);
   2617 }
   2618 
   2619 void AssemblerARM32::vcvtsd(const Operand *OpSd, const Operand *OpDm,
   2620                             CondARM32::Cond Cond) {
   2621   constexpr const char *Vcvtsd = "vcvtsd";
   2622   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtsd);
   2623   IValueT Dm = encodeDRegister(OpDm, "Dm", Vcvtsd);
   2624   constexpr IValueT VcvtsdOpcode =
   2625       B23 | B21 | B20 | B18 | B17 | B16 | B8 | B7 | B6;
   2626   emitVFPsd(Cond, VcvtsdOpcode, Sd, Dm);
   2627 }
   2628 
   2629 void AssemblerARM32::vcvtis(const Operand *OpSd, const Operand *OpSm,
   2630                             CondARM32::Cond Cond) {
   2631   // VCVT (between floating-point and integer, Floating-point)
   2632   //      - ARM Section A8.8.306, encoding A1:
   2633   //   vcvt<c>.s32.f32 <Sd>, <Sm>
   2634   //
   2635   // cccc11101D111101dddd10011M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   2636   constexpr const char *Vcvtis = "vcvtis";
   2637   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtis);
   2638   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtis);
   2639   constexpr IValueT VcvtisOpcode = B23 | B21 | B20 | B19 | B18 | B16 | B7 | B6;
   2640   constexpr IValueT S0 = 0;
   2641   emitVFPsss(Cond, VcvtisOpcode, Sd, S0, Sm);
   2642 }
   2643 
   2644 void AssemblerARM32::vcvtid(const Operand *OpSd, const Operand *OpDm,
   2645                             CondARM32::Cond Cond) {
   2646   // VCVT (between floating-point and integer, Floating-point)
   2647   //      - ARM Section A8.8.306, encoding A1:
   2648   //   vcvt<c>.s32.f64 <Sd>, <Dm>
   2649   //
   2650   // cccc11101D111101dddd10111M0mmmm where cccc=Cond, ddddD=Sd, and Mmmmm=Dm.
   2651   constexpr const char *Vcvtid = "vcvtid";
   2652   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtid);
   2653   IValueT Dm = encodeDRegister(OpDm, "Dm", Vcvtid);
   2654   constexpr IValueT VcvtidOpcode =
   2655       B23 | B21 | B20 | B19 | B18 | B16 | B8 | B7 | B6;
   2656   emitVFPsd(Cond, VcvtidOpcode, Sd, Dm);
   2657 }
   2658 
   2659 void AssemblerARM32::vcvtsi(const Operand *OpSd, const Operand *OpSm,
   2660                             CondARM32::Cond Cond) {
   2661   // VCVT (between floating-point and integer, Floating-point)
   2662   //      - ARM Section A8.8.306, encoding A1:
   2663   //   vcvt<c>.f32.s32 <Sd>, <Sm>
   2664   //
   2665   // cccc11101D111000dddd10011M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   2666   constexpr const char *Vcvtsi = "vcvtsi";
   2667   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtsi);
   2668   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtsi);
   2669   constexpr IValueT VcvtsiOpcode = B23 | B21 | B20 | B19 | B7 | B6;
   2670   constexpr IValueT S0 = 0;
   2671   emitVFPsss(Cond, VcvtsiOpcode, Sd, S0, Sm);
   2672 }
   2673 
   2674 void AssemblerARM32::vcvtsu(const Operand *OpSd, const Operand *OpSm,
   2675                             CondARM32::Cond Cond) {
   2676   // VCVT (between floating-point and integer, Floating-point)
   2677   //      - ARM Section A8.8.306, encoding A1:
   2678   //   vcvt<c>.f32.u32 <Sd>, <Sm>
   2679   //
   2680   // cccc11101D111000dddd10001M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   2681   constexpr const char *Vcvtsu = "vcvtsu";
   2682   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtsu);
   2683   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtsu);
   2684   constexpr IValueT VcvtsuOpcode = B23 | B21 | B20 | B19 | B6;
   2685   constexpr IValueT S0 = 0;
   2686   emitVFPsss(Cond, VcvtsuOpcode, Sd, S0, Sm);
   2687 }
   2688 
   2689 void AssemblerARM32::vcvtud(const Operand *OpSd, const Operand *OpDm,
   2690                             CondARM32::Cond Cond) {
   2691   // VCVT (between floating-point and integer, Floating-point)
   2692   //      - ARM Section A8.8.306, encoding A1:
   2693   //   vcvt<c>.u32.f64 <Sd>, <Dm>
   2694   //
   2695   // cccc11101D111100dddd10111M0mmmm where cccc=Cond, ddddD=Sd, and Mmmmm=Dm.
   2696   constexpr const char *Vcvtud = "vcvtud";
   2697   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtud);
   2698   IValueT Dm = encodeDRegister(OpDm, "Dm", Vcvtud);
   2699   constexpr IValueT VcvtudOpcode = B23 | B21 | B20 | B19 | B18 | B8 | B7 | B6;
   2700   emitVFPsd(Cond, VcvtudOpcode, Sd, Dm);
   2701 }
   2702 
   2703 void AssemblerARM32::vcvtus(const Operand *OpSd, const Operand *OpSm,
   2704                             CondARM32::Cond Cond) {
   2705   // VCVT (between floating-point and integer, Floating-point)
   2706   //      - ARM Section A8.8.306, encoding A1:
   2707   //   vcvt<c>.u32.f32 <Sd>, <Sm>
   2708   //
   2709   // cccc11101D111100dddd10011M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   2710   constexpr const char *Vcvtus = "vcvtus";
   2711   IValueT Sd = encodeSRegister(OpSd, "Sd", Vcvtus);
   2712   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtus);
   2713   constexpr IValueT VcvtsiOpcode = B23 | B21 | B20 | B19 | B18 | B7 | B6;
   2714   constexpr IValueT S0 = 0;
   2715   emitVFPsss(Cond, VcvtsiOpcode, Sd, S0, Sm);
   2716 }
   2717 
   2718 void AssemblerARM32::vcvtqsi(const Operand *OpQd, const Operand *OpQm) {
   2719   // VCVT (between floating-point and integer, Advanced SIMD)
   2720   //      - ARM Section A8.8.305, encoding A1:
   2721   //   vcvt<c>.f32.s32 <Qd>, <Qm>
   2722   //
   2723   // 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 10=op.
   2724   constexpr const char *Vcvtqsi = "vcvt.s32.f32";
   2725   constexpr IValueT VcvtqsiOpcode = B8;
   2726   emitSIMDCvtqq(VcvtqsiOpcode, OpQd, OpQm, Vcvtqsi);
   2727 }
   2728 
   2729 void AssemblerARM32::vcvtqsu(const Operand *OpQd, const Operand *OpQm) {
   2730   // VCVT (between floating-point and integer, Advanced SIMD)
   2731   //      - ARM Section A8.8.305, encoding A1:
   2732   //   vcvt<c>.f32.u32 <Qd>, <Qm>
   2733   //
   2734   // 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 11=op.
   2735   constexpr const char *Vcvtqsu = "vcvt.u32.f32";
   2736   constexpr IValueT VcvtqsuOpcode = B8 | B7;
   2737   emitSIMDCvtqq(VcvtqsuOpcode, OpQd, OpQm, Vcvtqsu);
   2738 }
   2739 
   2740 void AssemblerARM32::vcvtqis(const Operand *OpQd, const Operand *OpQm) {
   2741   // VCVT (between floating-point and integer, Advanced SIMD)
   2742   //      - ARM Section A8.8.305, encoding A1:
   2743   //   vcvt<c>.f32.s32 <Qd>, <Qm>
   2744   //
   2745   // 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
   2746   constexpr const char *Vcvtqis = "vcvt.f32.s32";
   2747   constexpr IValueT VcvtqisOpcode = 0;
   2748   emitSIMDCvtqq(VcvtqisOpcode, OpQd, OpQm, Vcvtqis);
   2749 }
   2750 
   2751 void AssemblerARM32::vcvtqus(const Operand *OpQd, const Operand *OpQm) {
   2752   // VCVT (between floating-point and integer, Advanced SIMD)
   2753   //      - ARM Section A8.8.305, encoding A1:
   2754   //   vcvt<c>.f32.u32 <Qd>, <Qm>
   2755   //
   2756   // 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
   2757   constexpr const char *Vcvtqus = "vcvt.f32.u32";
   2758   constexpr IValueT VcvtqusOpcode = B7;
   2759   emitSIMDCvtqq(VcvtqusOpcode, OpQd, OpQm, Vcvtqus);
   2760 }
   2761 
   2762 void AssemblerARM32::emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd,
   2763                                IValueT Sm) {
   2764   assert(Dd < RegARM32::getNumDRegs());
   2765   assert(Sm < RegARM32::getNumSRegs());
   2766   assert(CondARM32::isDefined(Cond));
   2767   constexpr IValueT VFPOpcode = B27 | B26 | B25 | B11 | B9;
   2768   const IValueT Encoding =
   2769       Opcode | VFPOpcode | (encodeCondition(Cond) << kConditionShift) |
   2770       (getYInRegYXXXX(Dd) << 22) | (getXXXXInRegYXXXX(Dd) << 12) |
   2771       (getYInRegXXXXY(Sm) << 5) | getXXXXInRegXXXXY(Sm);
   2772   emitInst(Encoding);
   2773 }
   2774 
   2775 void AssemblerARM32::vcvtds(const Operand *OpDd, const Operand *OpSm,
   2776                             CondARM32::Cond Cond) {
   2777   constexpr const char *Vcvtds = "Vctds";
   2778   IValueT Dd = encodeDRegister(OpDd, "Dd", Vcvtds);
   2779   IValueT Sm = encodeSRegister(OpSm, "Sm", Vcvtds);
   2780   constexpr IValueT VcvtdsOpcode = B23 | B21 | B20 | B18 | B17 | B16 | B7 | B6;
   2781   emitVFPds(Cond, VcvtdsOpcode, Dd, Sm);
   2782 }
   2783 
   2784 void AssemblerARM32::vdivs(const Operand *OpSd, const Operand *OpSn,
   2785                            const Operand *OpSm, CondARM32::Cond Cond) {
   2786   // VDIV (floating-point) - ARM section A8.8.283, encoding A2:
   2787   //   vdiv<c>.f32 <Sd>, <Sn>, <Sm>
   2788   //
   2789   // cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
   2790   // and mmmmM=Rm.
   2791   constexpr const char *Vdivs = "vdivs";
   2792   constexpr IValueT VdivsOpcode = B23;
   2793   emitVFPsss(Cond, VdivsOpcode, OpSd, OpSn, OpSm, Vdivs);
   2794 }
   2795 
   2796 void AssemblerARM32::vdivd(const Operand *OpDd, const Operand *OpDn,
   2797                            const Operand *OpDm, CondARM32::Cond Cond) {
   2798   // VDIV (floating-point) - ARM section A8.8.283, encoding A2:
   2799   //   vdiv<c>.f64 <Dd>, <Dn>, <Dm>
   2800   //
   2801   // cccc11101D00nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
   2802   // and Mmmmm=Rm.
   2803   constexpr const char *Vdivd = "vdivd";
   2804   constexpr IValueT VdivdOpcode = B23;
   2805   emitVFPddd(Cond, VdivdOpcode, OpDd, OpDn, OpDm, Vdivd);
   2806 }
   2807 
   2808 void AssemblerARM32::veord(const Operand *OpDd, const Operand *OpDn,
   2809                            const Operand *OpDm) {
   2810   // VEOR - ARM secdtion A8.8.315, encoding A1:
   2811   //   veor<c> <Dd>, <Dn>, <Dm>
   2812   //
   2813   // 111100110D00nnnndddd0001N0M1mmmm where Ddddd=Dd, Nnnnn=Dn, and Mmmmm=Dm.
   2814   constexpr const char *Veord = "veord";
   2815   IValueT Dd = encodeDRegister(OpDd, "Dd", Veord);
   2816   IValueT Dn = encodeDRegister(OpDn, "Dn", Veord);
   2817   IValueT Dm = encodeDRegister(OpDm, "Dm", Veord);
   2818   const IValueT Encoding =
   2819       B25 | B24 | B8 | B4 |
   2820       (encodeCondition(CondARM32::Cond::kNone) << kConditionShift) |
   2821       (getYInRegYXXXX(Dd) << 22) | (getXXXXInRegYXXXX(Dn) << 16) |
   2822       (getXXXXInRegYXXXX(Dd) << 12) | (getYInRegYXXXX(Dn) << 7) |
   2823       (getYInRegYXXXX(Dm) << 5) | getXXXXInRegYXXXX(Dm);
   2824   emitInst(Encoding);
   2825 }
   2826 
   2827 void AssemblerARM32::veorq(const Operand *OpQd, const Operand *OpQn,
   2828                            const Operand *OpQm) {
   2829   // VEOR - ARM section A8.8.316, encoding A1:
   2830   //   veor <Qd>, <Qn>, <Qm>
   2831   //
   2832   // 111100110D00nnn0ddd00001N1M1mmm0 where Dddd=Qd, Nnnn=Qn, and Mmmm=Qm.
   2833   constexpr const char *Veorq = "veorq";
   2834   constexpr IValueT VeorqOpcode = B24 | B8 | B4;
   2835   emitSIMDqqq(VeorqOpcode, IceType_i8, OpQd, OpQn, OpQm, Veorq);
   2836 }
   2837 
   2838 void AssemblerARM32::vldrd(const Operand *OpDd, const Operand *OpAddress,
   2839                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   2840   // VLDR - ARM section A8.8.333, encoding A1.
   2841   //   vldr<c> <Dd>, [<Rn>{, #+/-<imm>}]
   2842   //
   2843   // cccc1101UD01nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
   2844   // iiiiiiii=abs(Imm >> 2), and U=1 if Opcode>=0.
   2845   constexpr const char *Vldrd = "vldrd";
   2846   IValueT Dd = encodeDRegister(OpDd, "Dd", Vldrd);
   2847   assert(CondARM32::isDefined(Cond));
   2848   IValueT Address;
   2849   EncodedOperand AddressEncoding =
   2850       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   2851   (void)AddressEncoding;
   2852   assert(AddressEncoding == EncodedAsImmRegOffset);
   2853   IValueT Encoding = B27 | B26 | B24 | B20 | B11 | B9 | B8 |
   2854                      (encodeCondition(Cond) << kConditionShift) |
   2855                      (getYInRegYXXXX(Dd) << 22) |
   2856                      (getXXXXInRegYXXXX(Dd) << 12) | Address;
   2857   emitInst(Encoding);
   2858 }
   2859 
   2860 void AssemblerARM32::vldrq(const Operand *OpQd, const Operand *OpAddress,
   2861                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   2862   // This is a pseudo-instruction which loads 64-bit data into a quadword
   2863   // vector register. It is implemented by loading into the lower doubleword.
   2864 
   2865   // VLDR - ARM section A8.8.333, encoding A1.
   2866   //   vldr<c> <Dd>, [<Rn>{, #+/-<imm>}]
   2867   //
   2868   // cccc1101UD01nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
   2869   // iiiiiiii=abs(Imm >> 2), and U=1 if Opcode>=0.
   2870   constexpr const char *Vldrd = "vldrd";
   2871   IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vldrd));
   2872   assert(CondARM32::isDefined(Cond));
   2873   IValueT Address;
   2874   EncodedOperand AddressEncoding =
   2875       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   2876   (void)AddressEncoding;
   2877   assert(AddressEncoding == EncodedAsImmRegOffset);
   2878   IValueT Encoding = B27 | B26 | B24 | B20 | B11 | B9 | B8 |
   2879                      (encodeCondition(Cond) << kConditionShift) |
   2880                      (getYInRegYXXXX(Dd) << 22) |
   2881                      (getXXXXInRegYXXXX(Dd) << 12) | Address;
   2882   emitInst(Encoding);
   2883 }
   2884 
   2885 void AssemblerARM32::vldrs(const Operand *OpSd, const Operand *OpAddress,
   2886                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   2887   // VDLR - ARM section A8.8.333, encoding A2.
   2888   //   vldr<c> <Sd>, [<Rn>{, #+/-<imm>]]
   2889   //
   2890   // cccc1101UD01nnnndddd1010iiiiiiii where cccc=Cond, nnnn=Rn, ddddD=Sd,
   2891   // iiiiiiii=abs(Opcode), and U=1 if Opcode >= 0;
   2892   constexpr const char *Vldrs = "vldrs";
   2893   IValueT Sd = encodeSRegister(OpSd, "Sd", Vldrs);
   2894   assert(CondARM32::isDefined(Cond));
   2895   IValueT Address;
   2896   EncodedOperand AddressEncoding =
   2897       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   2898   (void)AddressEncoding;
   2899   assert(AddressEncoding == EncodedAsImmRegOffset);
   2900   IValueT Encoding = B27 | B26 | B24 | B20 | B11 | B9 |
   2901                      (encodeCondition(Cond) << kConditionShift) |
   2902                      (getYInRegXXXXY(Sd) << 22) |
   2903                      (getXXXXInRegXXXXY(Sd) << 12) | Address;
   2904   emitInst(Encoding);
   2905 }
   2906 
   2907 void AssemblerARM32::emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn,
   2908                                  IValueT Rm, DRegListSize NumDRegs,
   2909                                  size_t ElmtSize, IValueT Align,
   2910                                  const char *InstName) {
   2911   assert(Utils::IsAbsoluteUint(2, Align));
   2912   IValueT EncodedElmtSize;
   2913   switch (ElmtSize) {
   2914   default: {
   2915     std::string Buffer;
   2916     llvm::raw_string_ostream StrBuf(Buffer);
   2917     StrBuf << InstName << ": found invalid vector element size " << ElmtSize;
   2918     llvm::report_fatal_error(StrBuf.str());
   2919   }
   2920   case 8:
   2921     EncodedElmtSize = 0;
   2922     break;
   2923   case 16:
   2924     EncodedElmtSize = 1;
   2925     break;
   2926   case 32:
   2927     EncodedElmtSize = 2;
   2928     break;
   2929   case 64:
   2930     EncodedElmtSize = 3;
   2931   }
   2932   const IValueT Encoding =
   2933       Opcode | (encodeCondition(CondARM32::kNone) << kConditionShift) |
   2934       (getYInRegYXXXX(Dd) << 22) | (Rn << kRnShift) |
   2935       (getXXXXInRegYXXXX(Dd) << kRdShift) | (NumDRegs << 8) |
   2936       (EncodedElmtSize << 6) | (Align << 4) | Rm;
   2937   emitInst(Encoding);
   2938 }
   2939 
   2940 void AssemblerARM32::emitVMem1Op(IValueT Opcode, IValueT Dd, IValueT Rn,
   2941                                  IValueT Rm, size_t ElmtSize, IValueT Align,
   2942                                  const char *InstName) {
   2943   assert(Utils::IsAbsoluteUint(2, Align));
   2944   IValueT EncodedElmtSize;
   2945   switch (ElmtSize) {
   2946   default: {
   2947     std::string Buffer;
   2948     llvm::raw_string_ostream StrBuf(Buffer);
   2949     StrBuf << InstName << ": found invalid vector element size " << ElmtSize;
   2950     llvm::report_fatal_error(StrBuf.str());
   2951   }
   2952   case 8:
   2953     EncodedElmtSize = 0;
   2954     break;
   2955   case 16:
   2956     EncodedElmtSize = 1;
   2957     break;
   2958   case 32:
   2959     EncodedElmtSize = 2;
   2960     break;
   2961   case 64:
   2962     EncodedElmtSize = 3;
   2963   }
   2964   const IValueT Encoding =
   2965       Opcode | (encodeCondition(CondARM32::kNone) << kConditionShift) |
   2966       (getYInRegYXXXX(Dd) << 22) | (Rn << kRnShift) |
   2967       (getXXXXInRegYXXXX(Dd) << kRdShift) | (EncodedElmtSize << 10) |
   2968       (Align << 4) | Rm;
   2969   emitInst(Encoding);
   2970 }
   2971 
   2972 void AssemblerARM32::vld1qr(size_t ElmtSize, const Operand *OpQd,
   2973                             const Operand *OpAddress, const TargetInfo &TInfo) {
   2974   // VLD1 (multiple single elements) - ARM section A8.8.320, encoding A1:
   2975   //   vld1.<size> <Qd>, [<Rn>]
   2976   //
   2977   // 111101000D10nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
   2978   // nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
   2979   // encoding of ElmtSize.
   2980   constexpr const char *Vld1qr = "vld1qr";
   2981   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vld1qr);
   2982   const IValueT Dd = mapQRegToDReg(Qd);
   2983   IValueT Address;
   2984   if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
   2985       EncodedAsImmRegOffset)
   2986     llvm::report_fatal_error(std::string(Vld1qr) + ": malform memory address");
   2987   const IValueT Rn = mask(Address, kRnShift, 4);
   2988   constexpr IValueT Rm = RegARM32::Reg_pc;
   2989   constexpr IValueT Opcode = B26 | B21;
   2990   constexpr IValueT Align = 0; // use default alignment.
   2991   emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vld1qr);
   2992 }
   2993 
   2994 void AssemblerARM32::vld1(size_t ElmtSize, const Operand *OpQd,
   2995                           const Operand *OpAddress, const TargetInfo &TInfo) {
   2996   // This is a pseudo-instruction for loading a single element of a quadword
   2997   // vector. For 64-bit the lower doubleword vector is loaded.
   2998 
   2999   if (ElmtSize == 64) {
   3000     return vldrq(OpQd, OpAddress, Ice::CondARM32::AL, TInfo);
   3001   }
   3002 
   3003   // VLD1 (single elements to one lane) - ARMv7-A/R section A8.6.308, encoding
   3004   // A1:
   3005   //   VLD1<c>.<size> <list>, [<Rn>{@<align>}], <Rm>
   3006   //
   3007   // 111101001D10nnnnddddss00aaaammmm where tttt=DRegListSize2, Dddd=Qd,
   3008   // nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
   3009   // encoding of ElmtSize.
   3010   constexpr const char *Vld1qr = "vld1qr";
   3011   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vld1qr);
   3012   const IValueT Dd = mapQRegToDReg(Qd);
   3013   IValueT Address;
   3014   if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
   3015       EncodedAsImmRegOffset)
   3016     llvm::report_fatal_error(std::string(Vld1qr) + ": malform memory address");
   3017   const IValueT Rn = mask(Address, kRnShift, 4);
   3018   constexpr IValueT Rm = RegARM32::Reg_pc;
   3019   constexpr IValueT Opcode = B26 | B23 | B21;
   3020   constexpr IValueT Align = 0; // use default alignment.
   3021   emitVMem1Op(Opcode, Dd, Rn, Rm, ElmtSize, Align, Vld1qr);
   3022 }
   3023 
   3024 bool AssemblerARM32::vmovqc(const Operand *OpQd, const ConstantInteger32 *Imm) {
   3025   // VMOV (immediate) - ARM section A8.8.320, encoding A1:
   3026   //   VMOV.<dt> <Qd>, #<Imm>
   3027   // 1111001x1D000yyyddddcccc01p1zzzz where Qd=Ddddd, Imm=xyyyzzzz, cmode=cccc,
   3028   // and Op=p.
   3029   constexpr const char *Vmovc = "vmovc";
   3030   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmovc));
   3031   IValueT Value = Imm->getValue();
   3032   const Type VecTy = OpQd->getType();
   3033   if (!isVectorType(VecTy))
   3034     return false;
   3035 
   3036   IValueT Op;
   3037   IValueT Cmode;
   3038   IValueT Imm8;
   3039   if (!encodeAdvSIMDExpandImm(Value, typeElementType(VecTy), Op, Cmode, Imm8))
   3040     return false;
   3041   if (Op == 0 && mask(Cmode, 0, 1) == 1)
   3042     return false;
   3043   if (Op == 1 && Cmode != 13)
   3044     return false;
   3045   const IValueT Encoding =
   3046       (0xF << kConditionShift) | B25 | B23 | B6 | B4 |
   3047       (mask(Imm8, 7, 1) << 24) | (getYInRegYXXXX(Dd) << 22) |
   3048       (mask(Imm8, 4, 3) << 16) | (getXXXXInRegYXXXX(Dd) << 12) | (Cmode << 8) |
   3049       (Op << 5) | mask(Imm8, 0, 4);
   3050   emitInst(Encoding);
   3051   return true;
   3052 }
   3053 
   3054 void AssemblerARM32::vmovd(const Operand *OpDd,
   3055                            const OperandARM32FlexFpImm *OpFpImm,
   3056                            CondARM32::Cond Cond) {
   3057   // VMOV (immediate) - ARM section A8.8.339, encoding A2:
   3058   //   vmov<c>.f64 <Dd>, #<imm>
   3059   //
   3060   // cccc11101D11xxxxdddd10110000yyyy where cccc=Cond, ddddD=Sn, xxxxyyyy=imm.
   3061   constexpr const char *Vmovd = "vmovd";
   3062   IValueT Dd = encodeSRegister(OpDd, "Dd", Vmovd);
   3063   IValueT Imm8 = OpFpImm->getModifiedImm();
   3064   assert(Imm8 < (1 << 8));
   3065   constexpr IValueT VmovsOpcode = B23 | B21 | B20 | B8;
   3066   IValueT OpcodePlusImm8 = VmovsOpcode | ((Imm8 >> 4) << 16) | (Imm8 & 0xf);
   3067   constexpr IValueT D0 = 0;
   3068   emitVFPddd(Cond, OpcodePlusImm8, Dd, D0, D0);
   3069 }
   3070 
   3071 void AssemblerARM32::vmovdd(const Operand *OpDd, const Variable *OpDm,
   3072                             CondARM32::Cond Cond) {
   3073   // VMOV (register) - ARM section A8.8.340, encoding A2:
   3074   //   vmov<c>.f64 <Dd>, <Sm>
   3075   //
   3076   // cccc11101D110000dddd101101M0mmmm where cccc=Cond, Ddddd=Sd, and Mmmmm=Sm.
   3077   constexpr const char *Vmovdd = "Vmovdd";
   3078   IValueT Dd = encodeSRegister(OpDd, "Dd", Vmovdd);
   3079   IValueT Dm = encodeSRegister(OpDm, "Dm", Vmovdd);
   3080   constexpr IValueT VmovddOpcode = B23 | B21 | B20 | B6;
   3081   constexpr IValueT D0 = 0;
   3082   emitVFPddd(Cond, VmovddOpcode, Dd, D0, Dm);
   3083 }
   3084 
   3085 void AssemblerARM32::vmovdrr(const Operand *OpDm, const Operand *OpRt,
   3086                              const Operand *OpRt2, CondARM32::Cond Cond) {
   3087   // VMOV (between two ARM core registers and a doubleword extension register).
   3088   // ARM section A8.8.345, encoding A1:
   3089   //   vmov<c> <Dm>, <Rt>, <Rt2>
   3090   //
   3091   // cccc11000100xxxxyyyy101100M1mmmm where cccc=Cond, xxxx=Rt, yyyy=Rt2, and
   3092   // Mmmmm=Dm.
   3093   constexpr const char *Vmovdrr = "vmovdrr";
   3094   IValueT Dm = encodeDRegister(OpDm, "Dm", Vmovdrr);
   3095   IValueT Rt = encodeGPRegister(OpRt, "Rt", Vmovdrr);
   3096   IValueT Rt2 = encodeGPRegister(OpRt2, "Rt", Vmovdrr);
   3097   assert(Rt != RegARM32::Encoded_Reg_sp);
   3098   assert(Rt != RegARM32::Encoded_Reg_pc);
   3099   assert(Rt2 != RegARM32::Encoded_Reg_sp);
   3100   assert(Rt2 != RegARM32::Encoded_Reg_pc);
   3101   assert(Rt != Rt2);
   3102   assert(CondARM32::isDefined(Cond));
   3103   IValueT Encoding = B27 | B26 | B22 | B11 | B9 | B8 | B4 |
   3104                      (encodeCondition(Cond) << kConditionShift) | (Rt2 << 16) |
   3105                      (Rt << 12) | (getYInRegYXXXX(Dm) << 5) |
   3106                      getXXXXInRegYXXXX(Dm);
   3107   emitInst(Encoding);
   3108 }
   3109 
   3110 void AssemblerARM32::vmovqir(const Operand *OpQn, uint32_t Index,
   3111                              const Operand *OpRt, CondARM32::Cond Cond) {
   3112   // VMOV (ARM core register to scalar) - ARM section A8.8.341, encoding A1:
   3113   //   vmov<c>.<size> <Dn[x]>, <Rt>
   3114   constexpr const char *Vmovdr = "vmovdr";
   3115   constexpr bool IsExtract = true;
   3116   emitInsertExtractInt(Cond, OpQn, Index, OpRt, !IsExtract, Vmovdr);
   3117 }
   3118 
   3119 void AssemblerARM32::vmovqis(const Operand *OpQd, uint32_t Index,
   3120                              const Operand *OpSm, CondARM32::Cond Cond) {
   3121   constexpr const char *Vmovqis = "vmovqis";
   3122   assert(Index < 4);
   3123   IValueT Sd = mapQRegToSReg(encodeQRegister(OpQd, "Qd", Vmovqis)) + Index;
   3124   IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovqis);
   3125   emitMoveSS(Cond, Sd, Sm);
   3126 }
   3127 
   3128 void AssemblerARM32::vmovrqi(const Operand *OpRt, const Operand *OpQn,
   3129                              uint32_t Index, CondARM32::Cond Cond) {
   3130   // VMOV (scalar to ARM core register) - ARM section A8.8.342, encoding A1:
   3131   //   vmov<c>.<dt> <Rt>, <Dn[x]>
   3132   constexpr const char *Vmovrd = "vmovrd";
   3133   constexpr bool IsExtract = true;
   3134   emitInsertExtractInt(Cond, OpQn, Index, OpRt, IsExtract, Vmovrd);
   3135 }
   3136 
   3137 void AssemblerARM32::vmovrrd(const Operand *OpRt, const Operand *OpRt2,
   3138                              const Operand *OpDm, CondARM32::Cond Cond) {
   3139   // VMOV (between two ARM core registers and a doubleword extension register).
   3140   // ARM section A8.8.345, encoding A1:
   3141   //   vmov<c> <Rt>, <Rt2>, <Dm>
   3142   //
   3143   // cccc11000101xxxxyyyy101100M1mmmm where cccc=Cond, xxxx=Rt, yyyy=Rt2, and
   3144   // Mmmmm=Dm.
   3145   constexpr const char *Vmovrrd = "vmovrrd";
   3146   IValueT Rt = encodeGPRegister(OpRt, "Rt", Vmovrrd);
   3147   IValueT Rt2 = encodeGPRegister(OpRt2, "Rt", Vmovrrd);
   3148   IValueT Dm = encodeDRegister(OpDm, "Dm", Vmovrrd);
   3149   assert(Rt != RegARM32::Encoded_Reg_sp);
   3150   assert(Rt != RegARM32::Encoded_Reg_pc);
   3151   assert(Rt2 != RegARM32::Encoded_Reg_sp);
   3152   assert(Rt2 != RegARM32::Encoded_Reg_pc);
   3153   assert(Rt != Rt2);
   3154   assert(CondARM32::isDefined(Cond));
   3155   IValueT Encoding = B27 | B26 | B22 | B20 | B11 | B9 | B8 | B4 |
   3156                      (encodeCondition(Cond) << kConditionShift) | (Rt2 << 16) |
   3157                      (Rt << 12) | (getYInRegYXXXX(Dm) << 5) |
   3158                      getXXXXInRegYXXXX(Dm);
   3159   emitInst(Encoding);
   3160 }
   3161 
   3162 void AssemblerARM32::vmovrs(const Operand *OpRt, const Operand *OpSn,
   3163                             CondARM32::Cond Cond) {
   3164   // VMOV (between ARM core register and single-precision register)
   3165   //   ARM section A8.8.343, encoding A1.
   3166   //
   3167   //   vmov<c> <Rt>, <Sn>
   3168   //
   3169   // cccc11100001nnnntttt1010N0010000 where cccc=Cond, nnnnN = Sn, and tttt=Rt.
   3170   constexpr const char *Vmovrs = "vmovrs";
   3171   IValueT Rt = encodeGPRegister(OpRt, "Rt", Vmovrs);
   3172   IValueT Sn = encodeSRegister(OpSn, "Sn", Vmovrs);
   3173   assert(CondARM32::isDefined(Cond));
   3174   IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | B27 | B26 |
   3175                      B25 | B20 | B11 | B9 | B4 | (getXXXXInRegXXXXY(Sn) << 16) |
   3176                      (Rt << kRdShift) | (getYInRegXXXXY(Sn) << 7);
   3177   emitInst(Encoding);
   3178 }
   3179 
   3180 void AssemblerARM32::vmovs(const Operand *OpSd,
   3181                            const OperandARM32FlexFpImm *OpFpImm,
   3182                            CondARM32::Cond Cond) {
   3183   // VMOV (immediate) - ARM section A8.8.339, encoding A2:
   3184   //   vmov<c>.f32 <Sd>, #<imm>
   3185   //
   3186   // cccc11101D11xxxxdddd10100000yyyy where cccc=Cond, ddddD=Sn, xxxxyyyy=imm.
   3187   constexpr const char *Vmovs = "vmovs";
   3188   IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovs);
   3189   IValueT Imm8 = OpFpImm->getModifiedImm();
   3190   assert(Imm8 < (1 << 8));
   3191   constexpr IValueT VmovsOpcode = B23 | B21 | B20;
   3192   IValueT OpcodePlusImm8 = VmovsOpcode | ((Imm8 >> 4) << 16) | (Imm8 & 0xf);
   3193   constexpr IValueT S0 = 0;
   3194   emitVFPsss(Cond, OpcodePlusImm8, Sd, S0, S0);
   3195 }
   3196 
   3197 void AssemblerARM32::vmovss(const Operand *OpSd, const Variable *OpSm,
   3198                             CondARM32::Cond Cond) {
   3199   constexpr const char *Vmovss = "Vmovss";
   3200   IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovss);
   3201   IValueT Sm = encodeSRegister(OpSm, "Sm", Vmovss);
   3202   emitMoveSS(Cond, Sd, Sm);
   3203 }
   3204 
   3205 void AssemblerARM32::vmovsqi(const Operand *OpSd, const Operand *OpQm,
   3206                              uint32_t Index, CondARM32::Cond Cond) {
   3207   constexpr const char *Vmovsqi = "vmovsqi";
   3208   const IValueT Sd = encodeSRegister(OpSd, "Sd", Vmovsqi);
   3209   assert(Index < 4);
   3210   const IValueT Sm =
   3211       mapQRegToSReg(encodeQRegister(OpQm, "Qm", Vmovsqi)) + Index;
   3212   emitMoveSS(Cond, Sd, Sm);
   3213 }
   3214 
   3215 void AssemblerARM32::vmovsr(const Operand *OpSn, const Operand *OpRt,
   3216                             CondARM32::Cond Cond) {
   3217   // VMOV (between ARM core register and single-precision register)
   3218   //   ARM section A8.8.343, encoding A1.
   3219   //
   3220   //   vmov<c> <Sn>, <Rt>
   3221   //
   3222   // cccc11100000nnnntttt1010N0010000 where cccc=Cond, nnnnN = Sn, and tttt=Rt.
   3223   constexpr const char *Vmovsr = "vmovsr";
   3224   IValueT Sn = encodeSRegister(OpSn, "Sn", Vmovsr);
   3225   IValueT Rt = encodeGPRegister(OpRt, "Rt", Vmovsr);
   3226   assert(Sn < RegARM32::getNumSRegs());
   3227   assert(Rt < RegARM32::getNumGPRegs());
   3228   assert(CondARM32::isDefined(Cond));
   3229   IValueT Encoding = (encodeCondition(Cond) << kConditionShift) | B27 | B26 |
   3230                      B25 | B11 | B9 | B4 | (getXXXXInRegXXXXY(Sn) << 16) |
   3231                      (Rt << kRdShift) | (getYInRegXXXXY(Sn) << 7);
   3232   emitInst(Encoding);
   3233 }
   3234 
   3235 void AssemblerARM32::vmlad(const Operand *OpDd, const Operand *OpDn,
   3236                            const Operand *OpDm, CondARM32::Cond Cond) {
   3237   // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
   3238   //   vmla<c>.f64 <Dd>, <Dn>, <Dm>
   3239   //
   3240   // cccc11100d00nnnndddd1011n0M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
   3241   // Mmmmm=Dm
   3242   constexpr const char *Vmlad = "vmlad";
   3243   constexpr IValueT VmladOpcode = 0;
   3244   emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
   3245 }
   3246 
   3247 void AssemblerARM32::vmlas(const Operand *OpSd, const Operand *OpSn,
   3248                            const Operand *OpSm, CondARM32::Cond Cond) {
   3249   // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
   3250   //   vmla<c>.f32 <Sd>, <Sn>, <Sm>
   3251   //
   3252   // cccc11100d00nnnndddd1010n0M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
   3253   // mmmmM=Sm
   3254   constexpr const char *Vmlas = "vmlas";
   3255   constexpr IValueT VmlasOpcode = 0;
   3256   emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
   3257 }
   3258 
   3259 void AssemblerARM32::vmlsd(const Operand *OpDd, const Operand *OpDn,
   3260                            const Operand *OpDm, CondARM32::Cond Cond) {
   3261   // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
   3262   //   vmls<c>.f64 <Dd>, <Dn>, <Dm>
   3263   //
   3264   // cccc11100d00nnnndddd1011n1M0mmmm where cccc=Cond, Ddddd=Dd, Nnnnn=Dn, and
   3265   // Mmmmm=Dm
   3266   constexpr const char *Vmlad = "vmlad";
   3267   constexpr IValueT VmladOpcode = B6;
   3268   emitVFPddd(Cond, VmladOpcode, OpDd, OpDn, OpDm, Vmlad);
   3269 }
   3270 
   3271 void AssemblerARM32::vmlss(const Operand *OpSd, const Operand *OpSn,
   3272                            const Operand *OpSm, CondARM32::Cond Cond) {
   3273   // VMLA, VMLS (floating-point), ARM section A8.8.337, encoding A2:
   3274   //   vmls<c>.f32 <Sd>, <Sn>, <Sm>
   3275   //
   3276   // cccc11100d00nnnndddd1010n1M0mmmm where cccc=Cond, ddddD=Sd, nnnnN=Sn, and
   3277   // mmmmM=Sm
   3278   constexpr const char *Vmlas = "vmlas";
   3279   constexpr IValueT VmlasOpcode = B6;
   3280   emitVFPsss(Cond, VmlasOpcode, OpSd, OpSn, OpSm, Vmlas);
   3281 }
   3282 
   3283 void AssemblerARM32::vmrsAPSR_nzcv(CondARM32::Cond Cond) {
   3284   // MVRS - ARM section A*.8.348, encoding A1:
   3285   //   vmrs<c> APSR_nzcv, FPSCR
   3286   //
   3287   // cccc111011110001tttt101000010000 where tttt=0x15 (i.e. when Rt=pc, use
   3288   // APSR_nzcv instead).
   3289   assert(CondARM32::isDefined(Cond));
   3290   IValueT Encoding = B27 | B26 | B25 | B23 | B22 | B21 | B20 | B16 | B15 | B14 |
   3291                      B13 | B12 | B11 | B9 | B4 |
   3292                      (encodeCondition(Cond) << kConditionShift);
   3293   emitInst(Encoding);
   3294 }
   3295 
   3296 void AssemblerARM32::vmuls(const Operand *OpSd, const Operand *OpSn,
   3297                            const Operand *OpSm, CondARM32::Cond Cond) {
   3298   // VMUL (floating-point) - ARM section A8.8.351, encoding A2:
   3299   //   vmul<c>.f32 <Sd>, <Sn>, <Sm>
   3300   //
   3301   // cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
   3302   // and mmmmM=Rm.
   3303   constexpr const char *Vmuls = "vmuls";
   3304   constexpr IValueT VmulsOpcode = B21;
   3305   emitVFPsss(Cond, VmulsOpcode, OpSd, OpSn, OpSm, Vmuls);
   3306 }
   3307 
   3308 void AssemblerARM32::vmuld(const Operand *OpDd, const Operand *OpDn,
   3309                            const Operand *OpDm, CondARM32::Cond Cond) {
   3310   // VMUL (floating-point) - ARM section A8.8.351, encoding A2:
   3311   //   vmul<c>.f64 <Dd>, <Dn>, <Dm>
   3312   //
   3313   // cccc11100D10nnnndddd101sN0M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
   3314   // and Mmmmm=Rm.
   3315   constexpr const char *Vmuld = "vmuld";
   3316   constexpr IValueT VmuldOpcode = B21;
   3317   emitVFPddd(Cond, VmuldOpcode, OpDd, OpDn, OpDm, Vmuld);
   3318 }
   3319 
   3320 void AssemblerARM32::vmulqi(Type ElmtTy, const Operand *OpQd,
   3321                             const Operand *OpQn, const Operand *OpQm) {
   3322   // VMUL, VMULL (integer and polynomial) - ARM section A8.8.350, encoding A1:
   3323   //   vmul<c>.<dt> <Qd>, <Qn>, <Qm>
   3324   //
   3325   // 111100100Dssnnn0ddd01001NqM1mmm0 where Dddd=Qd, Nnnn=Qn, Mmmm=Qm, and
   3326   // dt in [i8, i16, i32] where ss is the index.
   3327   assert(isScalarIntegerType(ElmtTy) &&
   3328          "vmulqi expects vector with integer element type");
   3329   assert(ElmtTy != IceType_i64 && "vmulqi on i64 vector not allowed");
   3330   constexpr const char *Vmulqi = "vmulqi";
   3331   constexpr IValueT VmulqiOpcode = B11 | B8 | B4;
   3332   emitSIMDqqq(VmulqiOpcode, ElmtTy, OpQd, OpQn, OpQm, Vmulqi);
   3333 }
   3334 
   3335 void AssemblerARM32::vmulh(Type ElmtTy, const Operand *OpQd,
   3336                            const Operand *OpQn, const Operand *OpQm,
   3337                            bool Unsigned) {
   3338   // Pseudo-instruction for multiplying the corresponding elements in the lower
   3339   // halves of two quadword vectors, and returning the high halves.
   3340 
   3341   // VMULL (integer and polynomial) - ARMv7-A/R section A8.6.337, encoding A1:
   3342   //   VMUL<c>.<dt> <Dd>, <Dn>, <Dm>
   3343   //
   3344   // 1111001U1Dssnnnndddd11o0N0M0mmmm
   3345   assert(isScalarIntegerType(ElmtTy) &&
   3346          "vmull expects vector with integer element type");
   3347   assert(ElmtTy != IceType_i64 && "vmull on i64 vector not allowed");
   3348   constexpr const char *Vmull = "vmull";
   3349 
   3350   constexpr IValueT ElmtShift = 20;
   3351   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   3352   assert(Utils::IsUint(2, ElmtSize));
   3353 
   3354   const IValueT VmullOpcode =
   3355       B25 | (Unsigned ? B24 : 0) | B23 | (B20) | B11 | B10;
   3356 
   3357   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vmull);
   3358   const IValueT Qn = encodeQRegister(OpQn, "Qn", Vmull);
   3359   const IValueT Qm = encodeQRegister(OpQm, "Qm", Vmull);
   3360 
   3361   const IValueT Dd = mapQRegToDReg(Qd);
   3362   const IValueT Dn = mapQRegToDReg(Qn);
   3363   const IValueT Dm = mapQRegToDReg(Qm);
   3364 
   3365   constexpr bool UseQRegs = false;
   3366   constexpr bool IsFloatTy = false;
   3367   emitSIMDBase(VmullOpcode | (ElmtSize << ElmtShift), Dd, Dn, Dm, UseQRegs,
   3368                IsFloatTy);
   3369 
   3370   // Shift and narrow to obtain high halves.
   3371   constexpr IValueT VshrnOpcode = B25 | B23 | B11 | B4;
   3372   const IValueT Imm6 = encodeSIMDShiftImm6(ST_Vshr, IceType_i16, 16);
   3373   constexpr IValueT ImmShift = 16;
   3374 
   3375   emitSIMDBase(VshrnOpcode | (Imm6 << ImmShift), Dd, 0, Dd, UseQRegs,
   3376                IsFloatTy);
   3377 }
   3378 
   3379 void AssemblerARM32::vmlap(Type ElmtTy, const Operand *OpQd,
   3380                            const Operand *OpQn, const Operand *OpQm) {
   3381   // Pseudo-instruction for multiplying the corresponding elements in the lower
   3382   // halves of two quadword vectors, and pairwise-adding the results.
   3383 
   3384   // VMULL (integer and polynomial) - ARM section A8.8.350, encoding A1:
   3385   //   vmull<c>.<dt> <Qd>, <Qn>, <Qm>
   3386   //
   3387   // 1111001U1Dssnnnndddd11o0N0M0mmmm
   3388   assert(isScalarIntegerType(ElmtTy) &&
   3389          "vmull expects vector with integer element type");
   3390   assert(ElmtTy != IceType_i64 && "vmull on i64 vector not allowed");
   3391   constexpr const char *Vmull = "vmull";
   3392 
   3393   constexpr IValueT ElmtShift = 20;
   3394   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   3395   assert(Utils::IsUint(2, ElmtSize));
   3396 
   3397   bool Unsigned = false;
   3398   const IValueT VmullOpcode =
   3399       B25 | (Unsigned ? B24 : 0) | B23 | (B20) | B11 | B10;
   3400 
   3401   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmull));
   3402   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmull));
   3403   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmull));
   3404 
   3405   constexpr bool UseQRegs = false;
   3406   constexpr bool IsFloatTy = false;
   3407   emitSIMDBase(VmullOpcode | (ElmtSize << ElmtShift), Dd, Dn, Dm, UseQRegs,
   3408                IsFloatTy);
   3409 
   3410   // VPADD - ARM section A8.8.280, encoding A1:
   3411   //   vpadd.<dt> <Dd>, <Dm>, <Dn>
   3412   //
   3413   // 111100100Dssnnnndddd1011NQM1mmmm where Ddddd=<Dd>, Mmmmm=<Dm>, and
   3414   // Nnnnn=<Dn> and ss is the encoding of <dt>.
   3415   assert(ElmtTy != IceType_i64 && "vpadd doesn't allow i64!");
   3416   const IValueT VpaddOpcode =
   3417       B25 | B11 | B9 | B8 | B4 | ((encodeElmtType(ElmtTy) + 1) << 20);
   3418   emitSIMDBase(VpaddOpcode, Dd, Dd, Dd + 1, UseQRegs, IsFloatTy);
   3419 }
   3420 
   3421 void AssemblerARM32::vdup(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
   3422                           IValueT Idx) {
   3423   // VDUP (scalar) - ARMv7-A/R section A8.6.302, encoding A1:
   3424   //   VDUP<c>.<size> <Qd>, <Dm[x]>
   3425   //
   3426   // 111100111D11iiiiddd011000QM0mmmm where Dddd=<Qd>, Mmmmm=<Dm>, and
   3427   // iiii=imm4 encodes <size> and [x].
   3428   constexpr const char *Vdup = "vdup";
   3429 
   3430   const IValueT VdupOpcode = B25 | B24 | B23 | B21 | B20 | B11 | B10;
   3431 
   3432   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vdup));
   3433   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vdup));
   3434 
   3435   constexpr bool UseQRegs = true;
   3436   constexpr bool IsFloatTy = false;
   3437 
   3438   IValueT Imm4 = 0;
   3439   bool Lower = true;
   3440   switch (ElmtTy) {
   3441   case IceType_i8:
   3442     assert(Idx < 16);
   3443     Lower = Idx < 8;
   3444     Imm4 = 1 | ((Idx & 0x7) << 1);
   3445     break;
   3446   case IceType_i16:
   3447     assert(Idx < 8);
   3448     Lower = Idx < 4;
   3449     Imm4 = 2 | ((Idx & 0x3) << 2);
   3450     break;
   3451   case IceType_i32:
   3452   case IceType_f32:
   3453     assert(Idx < 4);
   3454     Lower = Idx < 2;
   3455     Imm4 = 4 | ((Idx & 0x1) << 3);
   3456     break;
   3457   default:
   3458     assert(false && "vdup only supports 8, 16, and 32-bit elements");
   3459     break;
   3460   }
   3461 
   3462   emitSIMDBase(VdupOpcode, Dd, Imm4, Dn + (Lower ? 0 : 1), UseQRegs, IsFloatTy);
   3463 }
   3464 
   3465 void AssemblerARM32::vzip(Type ElmtTy, const Operand *OpQd, const Operand *OpQn,
   3466                           const Operand *OpQm) {
   3467   // Pseudo-instruction which interleaves the elements of the lower halves of
   3468   // two quadword registers.
   3469 
   3470   // Vzip - ARMv7-A/R section A8.6.410, encoding A1:
   3471   //   VZIP<c>.<size> <Dd>, <Dm>
   3472   //
   3473   // 111100111D11ss10dddd00011QM0mmmm where Ddddd=<Dd>, Mmmmm=<Dm>, and
   3474   // ss=<size>
   3475   assert(ElmtTy != IceType_i64 && "vzip on i64 vector not allowed");
   3476 
   3477   constexpr const char *Vzip = "vzip";
   3478   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vzip));
   3479   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vzip));
   3480   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vzip));
   3481 
   3482   constexpr bool UseQRegs = false;
   3483   constexpr bool IsFloatTy = false;
   3484 
   3485   // VMOV Dd, Dm
   3486   // 111100100D10mmmmdddd0001MQM1mmmm
   3487   constexpr IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3488 
   3489   // Copy lower half of second source to upper half of destination.
   3490   emitSIMDBase(VmovOpcode, Dd + 1, Dm, Dm, UseQRegs, IsFloatTy);
   3491 
   3492   // Copy lower half of first source to lower half of destination.
   3493   if (Dd != Dn)
   3494     emitSIMDBase(VmovOpcode, Dd, Dn, Dn, UseQRegs, IsFloatTy);
   3495 
   3496   constexpr IValueT ElmtShift = 18;
   3497   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   3498   assert(Utils::IsUint(2, ElmtSize));
   3499 
   3500   if (ElmtTy != IceType_i32 && ElmtTy != IceType_f32) {
   3501     constexpr IValueT VzipOpcode = B25 | B24 | B23 | B21 | B20 | B17 | B8 | B7;
   3502     // Zip the lower and upper half of destination.
   3503     emitSIMDBase(VzipOpcode | (ElmtSize << ElmtShift), Dd, 0, Dd + 1, UseQRegs,
   3504                  IsFloatTy);
   3505   } else {
   3506     constexpr IValueT VtrnOpcode = B25 | B24 | B23 | B21 | B20 | B17 | B7;
   3507     emitSIMDBase(VtrnOpcode | (ElmtSize << ElmtShift), Dd, 0, Dd + 1, UseQRegs,
   3508                  IsFloatTy);
   3509   }
   3510 }
   3511 
   3512 void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
   3513                             const Operand *OpQm) {
   3514   // VMUL (floating-point) - ARM section A8.8.351, encoding A1:
   3515   //   vmul.f32 <Qd>, <Qn>, <Qm>
   3516   //
   3517   // 111100110D00nnn0ddd01101MqM1mmm0 where Dddd=Qd, Nnnn=Qn, and Mmmm=Qm.
   3518   assert(OpQd->getType() == IceType_v4f32 && "vmulqf expects type <4 x float>");
   3519   constexpr const char *Vmulqf = "vmulqf";
   3520   constexpr IValueT VmulqfOpcode = B24 | B11 | B8 | B4;
   3521   constexpr bool IsFloatTy = true;
   3522   emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf);
   3523 }
   3524 
   3525 void AssemblerARM32::vmvnq(const Operand *OpQd, const Operand *OpQm) {
   3526   // VMVN (integer) - ARM section A8.8.354, encoding A1:
   3527   //   vmvn <Qd>, <Qm>
   3528   //
   3529   // 111100111D110000dddd01011QM0mmmm where Dddd=Qd, Mmmm=Qm, and 1=Q.
   3530   // TODO(jpp) xxx: unify
   3531   constexpr const char *Vmvn = "vmvn";
   3532   constexpr IValueT VmvnOpcode = B24 | B23 | B21 | B20 | B10 | B8 | B7;
   3533   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vmvn);
   3534   constexpr IValueT Qn = 0;
   3535   const IValueT Qm = encodeQRegister(OpQm, "Qm", Vmvn);
   3536   constexpr bool UseQRegs = true;
   3537   constexpr bool IsFloat = false;
   3538   emitSIMDBase(VmvnOpcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
   3539                mapQRegToDReg(Qm), UseQRegs, IsFloat);
   3540 }
   3541 
   3542 void AssemblerARM32::vmovlq(const Operand *OpQd, const Operand *OpQn,
   3543                             const Operand *OpQm) {
   3544   // Pseudo-instruction to copy the first source operand and insert the lower
   3545   // half of the second operand into the lower half of the destination.
   3546 
   3547   // VMOV (register) - ARMv7-A/R section A8.6.327, encoding A1:
   3548   //   VMOV<c> <Dd>, <Dm>
   3549   //
   3550   // 111100111D110000ddd001011QM0mmm0 where Dddd=Qd, Mmmm=Qm, and Q=0.
   3551 
   3552   constexpr const char *Vmov = "vmov";
   3553   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmov));
   3554   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmov));
   3555   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmov));
   3556 
   3557   constexpr bool UseQRegs = false;
   3558   constexpr bool IsFloat = false;
   3559 
   3560   const IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3561 
   3562   if (Dd != Dm)
   3563     emitSIMDBase(VmovOpcode, Dd, Dm, Dm, UseQRegs, IsFloat);
   3564   if (Dd + 1 != Dn + 1)
   3565     emitSIMDBase(VmovOpcode, Dd + 1, Dn + 1, Dn + 1, UseQRegs, IsFloat);
   3566 }
   3567 
   3568 void AssemblerARM32::vmovhq(const Operand *OpQd, const Operand *OpQn,
   3569                             const Operand *OpQm) {
   3570   // Pseudo-instruction to copy the first source operand and insert the high
   3571   // half of the second operand into the high half of the destination.
   3572 
   3573   // VMOV (register) - ARMv7-A/R section A8.6.327, encoding A1:
   3574   //   VMOV<c> <Dd>, <Dm>
   3575   //
   3576   // 111100111D110000ddd001011QM0mmm0 where Dddd=Qd, Mmmm=Qm, and Q=0.
   3577 
   3578   constexpr const char *Vmov = "vmov";
   3579   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmov));
   3580   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmov));
   3581   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmov));
   3582 
   3583   constexpr bool UseQRegs = false;
   3584   constexpr bool IsFloat = false;
   3585 
   3586   const IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3587 
   3588   if (Dd != Dn)
   3589     emitSIMDBase(VmovOpcode, Dd, Dn, Dn, UseQRegs, IsFloat);
   3590   if (Dd + 1 != Dm + 1)
   3591     emitSIMDBase(VmovOpcode, Dd + 1, Dm + 1, Dm + 1, UseQRegs, IsFloat);
   3592 }
   3593 
   3594 void AssemblerARM32::vmovhlq(const Operand *OpQd, const Operand *OpQn,
   3595                              const Operand *OpQm) {
   3596   // Pseudo-instruction to copy the first source operand and insert the high
   3597   // half of the second operand into the lower half of the destination.
   3598 
   3599   // VMOV (register) - ARMv7-A/R section A8.6.327, encoding A1:
   3600   //   VMOV<c> <Dd>, <Dm>
   3601   //
   3602   // 111100111D110000ddd001011QM0mmm0 where Dddd=Qd, Mmmm=Qm, and Q=0.
   3603 
   3604   constexpr const char *Vmov = "vmov";
   3605   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmov));
   3606   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmov));
   3607   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmov));
   3608 
   3609   constexpr bool UseQRegs = false;
   3610   constexpr bool IsFloat = false;
   3611 
   3612   const IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3613 
   3614   if (Dd != Dm + 1)
   3615     emitSIMDBase(VmovOpcode, Dd, Dm + 1, Dm + 1, UseQRegs, IsFloat);
   3616   if (Dd + 1 != Dn + 1)
   3617     emitSIMDBase(VmovOpcode, Dd + 1, Dn + 1, Dn + 1, UseQRegs, IsFloat);
   3618 }
   3619 
   3620 void AssemblerARM32::vmovlhq(const Operand *OpQd, const Operand *OpQn,
   3621                              const Operand *OpQm) {
   3622   // Pseudo-instruction to copy the first source operand and insert the lower
   3623   // half of the second operand into the high half of the destination.
   3624 
   3625   // VMOV (register) - ARMv7-A/R section A8.6.327, encoding A1:
   3626   //   VMOV<c> <Dd>, <Dm>
   3627   //
   3628   // 111100111D110000ddd001011QM0mmm0 where Dddd=Qd, Mmmm=Qm, and Q=0.
   3629 
   3630   constexpr const char *Vmov = "vmov";
   3631   const IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Qd", Vmov));
   3632   const IValueT Dn = mapQRegToDReg(encodeQRegister(OpQn, "Qn", Vmov));
   3633   const IValueT Dm = mapQRegToDReg(encodeQRegister(OpQm, "Qm", Vmov));
   3634 
   3635   constexpr bool UseQRegs = false;
   3636   constexpr bool IsFloat = false;
   3637 
   3638   const IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3639 
   3640   if (Dd + 1 != Dm)
   3641     emitSIMDBase(VmovOpcode, Dd + 1, Dm, Dm, UseQRegs, IsFloat);
   3642   if (Dd != Dn)
   3643     emitSIMDBase(VmovOpcode, Dd, Dn, Dn, UseQRegs, IsFloat);
   3644 }
   3645 
   3646 void AssemblerARM32::vnegqs(Type ElmtTy, const Operand *OpQd,
   3647                             const Operand *OpQm) {
   3648   // VNEG - ARM section A8.8.355, encoding A1:
   3649   //   vneg.<dt> <Qd>, <Qm>
   3650   //
   3651   // 111111111D11ss01dddd0F111QM0mmmm where Dddd=Qd, and Mmmm=Qm, and:
   3652   //     * dt=s8  -> 00=ss, 0=F
   3653   //     * dt=s16 -> 01=ss, 0=F
   3654   //     * dt=s32 -> 10=ss, 0=F
   3655   //     * dt=s32 -> 10=ss, 1=F
   3656   constexpr const char *Vneg = "vneg";
   3657   constexpr IValueT VnegOpcode = B24 | B23 | B21 | B20 | B16 | B9 | B8 | B7;
   3658   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vneg);
   3659   constexpr IValueT Qn = 0;
   3660   const IValueT Qm = encodeQRegister(OpQm, "Qm", Vneg);
   3661   constexpr bool UseQRegs = true;
   3662   constexpr IValueT ElmtShift = 18;
   3663   const IValueT ElmtSize = encodeElmtType(ElmtTy);
   3664   assert(Utils::IsUint(2, ElmtSize));
   3665   emitSIMDBase(VnegOpcode | (ElmtSize << ElmtShift), mapQRegToDReg(Qd),
   3666                mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs,
   3667                isFloatingType(ElmtTy));
   3668 }
   3669 
   3670 void AssemblerARM32::vorrq(const Operand *OpQd, const Operand *OpQm,
   3671                            const Operand *OpQn) {
   3672   // VORR (register) - ARM section A8.8.360, encoding A1:
   3673   //   vorr <Qd>, <Qn>, <Qm>
   3674   //
   3675   // 111100100D10nnn0ddd00001N1M1mmm0 where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
   3676   constexpr const char *Vorrq = "vorrq";
   3677   constexpr IValueT VorrqOpcode = B21 | B8 | B4;
   3678   constexpr Type ElmtTy = IceType_i8;
   3679   emitSIMDqqq(VorrqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vorrq);
   3680 }
   3681 
   3682 void AssemblerARM32::vstrd(const Operand *OpDd, const Operand *OpAddress,
   3683                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   3684   // VSTR - ARM section A8.8.413, encoding A1:
   3685   //   vstr<c> <Dd>, [<Rn>{, #+/-<Imm>}]
   3686   //
   3687   // cccc1101UD00nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
   3688   // iiiiiiii=abs(Imm >> 2), and U=1 if Imm>=0.
   3689   constexpr const char *Vstrd = "vstrd";
   3690   IValueT Dd = encodeDRegister(OpDd, "Dd", Vstrd);
   3691   assert(CondARM32::isDefined(Cond));
   3692   IValueT Address;
   3693   IValueT AddressEncoding =
   3694       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   3695   (void)AddressEncoding;
   3696   assert(AddressEncoding == EncodedAsImmRegOffset);
   3697   IValueT Encoding = B27 | B26 | B24 | B11 | B9 | B8 |
   3698                      (encodeCondition(Cond) << kConditionShift) |
   3699                      (getYInRegYXXXX(Dd) << 22) |
   3700                      (getXXXXInRegYXXXX(Dd) << 12) | Address;
   3701   emitInst(Encoding);
   3702 }
   3703 
   3704 void AssemblerARM32::vstrq(const Operand *OpQd, const Operand *OpAddress,
   3705                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   3706   // This is a pseudo-instruction which stores 64-bit data into a quadword
   3707   // vector register. It is implemented by storing into the lower doubleword.
   3708 
   3709   // VSTR - ARM section A8.8.413, encoding A1:
   3710   //   vstr<c> <Dd>, [<Rn>{, #+/-<Imm>}]
   3711   //
   3712   // cccc1101UD00nnnndddd1011iiiiiiii where cccc=Cond, nnnn=Rn, Ddddd=Rd,
   3713   // iiiiiiii=abs(Imm >> 2), and U=1 if Imm>=0.
   3714   constexpr const char *Vstrd = "vstrd";
   3715   IValueT Dd = mapQRegToDReg(encodeQRegister(OpQd, "Dd", Vstrd));
   3716   assert(CondARM32::isDefined(Cond));
   3717   IValueT Address;
   3718   IValueT AddressEncoding =
   3719       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   3720   (void)AddressEncoding;
   3721   assert(AddressEncoding == EncodedAsImmRegOffset);
   3722   IValueT Encoding = B27 | B26 | B24 | B11 | B9 | B8 |
   3723                      (encodeCondition(Cond) << kConditionShift) |
   3724                      (getYInRegYXXXX(Dd) << 22) |
   3725                      (getXXXXInRegYXXXX(Dd) << 12) | Address;
   3726   emitInst(Encoding);
   3727 }
   3728 
   3729 void AssemblerARM32::vstrs(const Operand *OpSd, const Operand *OpAddress,
   3730                            CondARM32::Cond Cond, const TargetInfo &TInfo) {
   3731   // VSTR - ARM section A8.8.413, encoding A2:
   3732   //   vstr<c> <Sd>, [<Rn>{, #+/-<imm>]]
   3733   //
   3734   // cccc1101UD01nnnndddd1010iiiiiiii where cccc=Cond, nnnn=Rn, ddddD=Sd,
   3735   // iiiiiiii=abs(Opcode), and U=1 if Opcode >= 0;
   3736   constexpr const char *Vstrs = "vstrs";
   3737   IValueT Sd = encodeSRegister(OpSd, "Sd", Vstrs);
   3738   assert(CondARM32::isDefined(Cond));
   3739   IValueT Address;
   3740   IValueT AddressEncoding =
   3741       encodeAddress(OpAddress, Address, TInfo, RotatedImm8Div4Address);
   3742   (void)AddressEncoding;
   3743   assert(AddressEncoding == EncodedAsImmRegOffset);
   3744   IValueT Encoding =
   3745       B27 | B26 | B24 | B11 | B9 | (encodeCondition(Cond) << kConditionShift) |
   3746       (getYInRegXXXXY(Sd) << 22) | (getXXXXInRegXXXXY(Sd) << 12) | Address;
   3747   emitInst(Encoding);
   3748 }
   3749 
   3750 void AssemblerARM32::vst1qr(size_t ElmtSize, const Operand *OpQd,
   3751                             const Operand *OpAddress, const TargetInfo &TInfo) {
   3752   // VST1 (multiple single elements) - ARM section A8.8.404, encoding A1:
   3753   //   vst1.<size> <Qd>, [<Rn>]
   3754   //
   3755   // 111101000D00nnnnddd0ttttssaammmm where tttt=DRegListSize2, Dddd=Qd,
   3756   // nnnn=Rn, aa=0 (use default alignment), size=ElmtSize, and ss is the
   3757   // encoding of ElmtSize.
   3758   constexpr const char *Vst1qr = "vst1qr";
   3759   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vst1qr);
   3760   const IValueT Dd = mapQRegToDReg(Qd);
   3761   IValueT Address;
   3762   if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
   3763       EncodedAsImmRegOffset)
   3764     llvm::report_fatal_error(std::string(Vst1qr) + ": malform memory address");
   3765   const IValueT Rn = mask(Address, kRnShift, 4);
   3766   constexpr IValueT Rm = RegARM32::Reg_pc;
   3767   constexpr IValueT Opcode = B26;
   3768   constexpr IValueT Align = 0; // use default alignment.
   3769   emitVMem1Op(Opcode, Dd, Rn, Rm, DRegListSize2, ElmtSize, Align, Vst1qr);
   3770 }
   3771 
   3772 void AssemblerARM32::vst1(size_t ElmtSize, const Operand *OpQd,
   3773                           const Operand *OpAddress, const TargetInfo &TInfo) {
   3774 
   3775   // This is a pseudo-instruction for storing a single element of a quadword
   3776   // vector. For 64-bit the lower doubleword vector is stored.
   3777 
   3778   if (ElmtSize == 64) {
   3779     return vstrq(OpQd, OpAddress, Ice::CondARM32::AL, TInfo);
   3780   }
   3781 
   3782   // VST1 (single element from one lane) - ARMv7-A/R section A8.6.392, encoding
   3783   // A1:
   3784   //   VST1<c>.<size> <list>, [<Rn>{@<align>}], <Rm>
   3785   //
   3786   // 111101001D00nnnnddd0ss00aaaammmm where Dddd=Qd, nnnn=Rn,
   3787   // aaaa=0 (use default alignment), size=ElmtSize, and ss is the
   3788   // encoding of ElmtSize.
   3789   constexpr const char *Vst1qr = "vst1qr";
   3790   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vst1qr);
   3791   const IValueT Dd = mapQRegToDReg(Qd);
   3792   IValueT Address;
   3793   if (encodeAddress(OpAddress, Address, TInfo, NoImmOffsetAddress) !=
   3794       EncodedAsImmRegOffset)
   3795     llvm::report_fatal_error(std::string(Vst1qr) + ": malform memory address");
   3796   const IValueT Rn = mask(Address, kRnShift, 4);
   3797   constexpr IValueT Rm = RegARM32::Reg_pc;
   3798   constexpr IValueT Opcode = B26 | B23;
   3799   constexpr IValueT Align = 0; // use default alignment.
   3800   emitVMem1Op(Opcode, Dd, Rn, Rm, ElmtSize, Align, Vst1qr);
   3801 }
   3802 
   3803 void AssemblerARM32::vsubs(const Operand *OpSd, const Operand *OpSn,
   3804                            const Operand *OpSm, CondARM32::Cond Cond) {
   3805   // VSUB (floating-point) - ARM section A8.8.415, encoding A2:
   3806   //   vsub<c>.f32 <Sd>, <Sn>, <Sm>
   3807   //
   3808   // cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=0, ddddD=Rd, nnnnN=Rn,
   3809   // and mmmmM=Rm.
   3810   constexpr const char *Vsubs = "vsubs";
   3811   constexpr IValueT VsubsOpcode = B21 | B20 | B6;
   3812   emitVFPsss(Cond, VsubsOpcode, OpSd, OpSn, OpSm, Vsubs);
   3813 }
   3814 
   3815 void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
   3816                            const Operand *OpDm, CondARM32::Cond Cond) {
   3817   // VSUB (floating-point) - ARM section A8.8.415, encoding A2:
   3818   //   vsub<c>.f64 <Dd>, <Dn>, <Dm>
   3819   //
   3820   // cccc11100D11nnnndddd101sN1M0mmmm where cccc=Cond, s=1, Ddddd=Rd, Nnnnn=Rn,
   3821   // and Mmmmm=Rm.
   3822   constexpr const char *Vsubd = "vsubd";
   3823   constexpr IValueT VsubdOpcode = B21 | B20 | B6;
   3824   emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
   3825 }
   3826 
   3827 void AssemblerARM32::vqaddqi(Type ElmtTy, const Operand *OpQd,
   3828                              const Operand *OpQm, const Operand *OpQn) {
   3829   // VQADD (integer) - ARM section A8.6.369, encoding A1:
   3830   //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
   3831   //
   3832   // 111100100Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
   3833   // size is 8, 16, 32, or 64.
   3834   assert(isScalarIntegerType(ElmtTy) &&
   3835          "vqaddqi expects vector with integer element type");
   3836   constexpr const char *Vqaddqi = "vqaddqi";
   3837   constexpr IValueT VqaddqiOpcode = B4;
   3838   emitSIMDqqq(VqaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqi);
   3839 }
   3840 
   3841 void AssemblerARM32::vqaddqu(Type ElmtTy, const Operand *OpQd,
   3842                              const Operand *OpQm, const Operand *OpQn) {
   3843   // VQADD (integer) - ARM section A8.6.369, encoding A1:
   3844   //   vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
   3845   //
   3846   // 111100110Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
   3847   // size is 8, 16, 32, or 64.
   3848   assert(isScalarIntegerType(ElmtTy) &&
   3849          "vqaddqu expects vector with integer element type");
   3850   constexpr const char *Vqaddqu = "vqaddqu";
   3851   constexpr IValueT VqaddquOpcode = B24 | B4;
   3852   emitSIMDqqq(VqaddquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqu);
   3853 }
   3854 
   3855 void AssemblerARM32::vqsubqi(Type ElmtTy, const Operand *OpQd,
   3856                              const Operand *OpQm, const Operand *OpQn) {
   3857   // VQSUB (integer) - ARM section A8.6.369, encoding A1:
   3858   //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
   3859   //
   3860   // 111100100Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
   3861   // size is 8, 16, 32, or 64.
   3862   assert(isScalarIntegerType(ElmtTy) &&
   3863          "vqsubqi expects vector with integer element type");
   3864   constexpr const char *Vqsubqi = "vqsubqi";
   3865   constexpr IValueT VqsubqiOpcode = B9 | B4;
   3866   emitSIMDqqq(VqsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqi);
   3867 }
   3868 
   3869 void AssemblerARM32::vqsubqu(Type ElmtTy, const Operand *OpQd,
   3870                              const Operand *OpQm, const Operand *OpQn) {
   3871   // VQSUB (integer) - ARM section A8.6.369, encoding A1:
   3872   //   vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
   3873   //
   3874   // 111100110Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
   3875   // size is 8, 16, 32, or 64.
   3876   assert(isScalarIntegerType(ElmtTy) &&
   3877          "vqsubqu expects vector with integer element type");
   3878   constexpr const char *Vqsubqu = "vqsubqu";
   3879   constexpr IValueT VqsubquOpcode = B24 | B9 | B4;
   3880   emitSIMDqqq(VqsubquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqu);
   3881 }
   3882 
   3883 void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd,
   3884                             const Operand *OpQm, const Operand *OpQn) {
   3885   // VSUB (integer) - ARM section A8.8.414, encoding A1:
   3886   //   vsub.<dt> <Qd>, <Qn>, <Qm>
   3887   //
   3888   // 111100110Dssnnn0ddd01000N1M0mmm0 where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
   3889   // and dt in [i8, i16, i32, i64] where ss is the index.
   3890   assert(isScalarIntegerType(ElmtTy) &&
   3891          "vsubqi expects vector with integer element type");
   3892   constexpr const char *Vsubqi = "vsubqi";
   3893   constexpr IValueT VsubqiOpcode = B24 | B11;
   3894   emitSIMDqqq(VsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vsubqi);
   3895 }
   3896 
   3897 void AssemblerARM32::vqmovn2(Type DestElmtTy, const Operand *OpQd,
   3898                              const Operand *OpQm, const Operand *OpQn,
   3899                              bool Unsigned, bool Saturating) {
   3900   // Pseudo-instruction for packing two quadword vectors into one quadword
   3901   // vector, narrowing each element using saturation or truncation.
   3902 
   3903   // VQMOVN - ARMv7-A/R section A8.6.361, encoding A1:
   3904   //   V{Q}MOVN{U}N<c>.<type><size> <Dd>, <Qm>
   3905   //
   3906   // 111100111D11ss10dddd0010opM0mmm0 where Ddddd=OpQd, op = 10, Mmmm=OpQm,
   3907   // ss is 00 (16-bit), 01 (32-bit), or 10 (64-bit).
   3908 
   3909   assert(DestElmtTy != IceType_i64 &&
   3910          "vmovn doesn't allow i64 destination vector elements!");
   3911 
   3912   constexpr const char *Vqmovn = "vqmovn";
   3913   constexpr bool UseQRegs = false;
   3914   constexpr bool IsFloatTy = false;
   3915   const IValueT Qd = encodeQRegister(OpQd, "Qd", Vqmovn);
   3916   const IValueT Qm = encodeQRegister(OpQm, "Qm", Vqmovn);
   3917   const IValueT Qn = encodeQRegister(OpQn, "Qn", Vqmovn);
   3918   const IValueT Dd = mapQRegToDReg(Qd);
   3919   const IValueT Dm = mapQRegToDReg(Qm);
   3920   const IValueT Dn = mapQRegToDReg(Qn);
   3921 
   3922   IValueT VqmovnOpcode = B25 | B24 | B23 | B21 | B20 | B17 | B9 |
   3923                          (Saturating ? (Unsigned ? B6 : B7) : 0);
   3924 
   3925   constexpr IValueT ElmtShift = 18;
   3926   VqmovnOpcode |= (encodeElmtType(DestElmtTy) << ElmtShift);
   3927 
   3928   if (Qm != Qd) {
   3929     // Narrow second source operand to upper half of destination.
   3930     emitSIMDBase(VqmovnOpcode, Dd + 1, 0, Dn, UseQRegs, IsFloatTy);
   3931     // Narrow first source operand to lower half of destination.
   3932     emitSIMDBase(VqmovnOpcode, Dd + 0, 0, Dm, UseQRegs, IsFloatTy);
   3933   } else if (Qn != Qd) {
   3934     // Narrow first source operand to lower half of destination.
   3935     emitSIMDBase(VqmovnOpcode, Dd + 0, 0, Dm, UseQRegs, IsFloatTy);
   3936     // Narrow second source operand to upper half of destination.
   3937     emitSIMDBase(VqmovnOpcode, Dd + 1, 0, Dn, UseQRegs, IsFloatTy);
   3938   } else {
   3939     // Narrow first source operand to lower half of destination.
   3940     emitSIMDBase(VqmovnOpcode, Dd, 0, Dm, UseQRegs, IsFloatTy);
   3941 
   3942     // VMOV Dd, Dm
   3943     // 111100100D10mmmmdddd0001MQM1mmmm
   3944     const IValueT VmovOpcode = B25 | B21 | B8 | B4;
   3945 
   3946     emitSIMDBase(VmovOpcode, Dd + 1, Dd, Dd, UseQRegs, IsFloatTy);
   3947   }
   3948 }
   3949 
   3950 void AssemblerARM32::vsubqf(const Operand *OpQd, const Operand *OpQn,
   3951                             const Operand *OpQm) {
   3952   // VSUB (floating-point) - ARM section A8.8.415, Encoding A1:
   3953   //   vsub.f32 <Qd>, <Qn>, <Qm>
   3954   //
   3955   // 111100100D10nnn0ddd01101N1M0mmm0 where Dddd=Qd, Nnnn=Qn, and Mmmm=Qm.
   3956   assert(OpQd->getType() == IceType_v4f32 && "vsubqf expects type <4 x float>");
   3957   constexpr const char *Vsubqf = "vsubqf";
   3958   constexpr IValueT VsubqfOpcode = B21 | B11 | B8;
   3959   emitSIMDqqq(VsubqfOpcode, IceType_f32, OpQd, OpQn, OpQm, Vsubqf);
   3960 }
   3961 
   3962 void AssemblerARM32::emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
   3963                                   const Variable *OpBaseReg,
   3964                                   SizeT NumConsecRegs) {
   3965   const IValueT BaseReg = getEncodedSRegNum(OpBaseReg);
   3966   const IValueT DLastBit = mask(BaseReg, 0, 1); // Last bit of base register.
   3967   const IValueT Rd = mask(BaseReg, 1, 4);       // Top 4 bits of base register.
   3968   assert(0 < NumConsecRegs);
   3969   (void)VpushVpopMaxConsecRegs;
   3970   assert(NumConsecRegs <= VpushVpopMaxConsecRegs);
   3971   assert((BaseReg + NumConsecRegs) <= RegARM32::getNumSRegs());
   3972   assert(CondARM32::isDefined(Cond));
   3973   const IValueT Encoding = Opcode | (Cond << kConditionShift) | DLastBit |
   3974                            (Rd << kRdShift) | NumConsecRegs;
   3975   emitInst(Encoding);
   3976 }
   3977 
   3978 void AssemblerARM32::vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
   3979                           CondARM32::Cond Cond) {
   3980   // Note: Current implementation assumes that OpBaseReg is defined using S
   3981   // registers. It doesn't implement the D register form.
   3982   //
   3983   // VPOP - ARM section A8.8.367, encoding A2:
   3984   //  vpop<c> <RegList>
   3985   //
   3986   // cccc11001D111101dddd1010iiiiiiii where cccc=Cond, ddddD=BaseReg, and
   3987   // iiiiiiii=NumConsecRegs.
   3988   constexpr IValueT VpopOpcode =
   3989       B27 | B26 | B23 | B21 | B20 | B19 | B18 | B16 | B11 | B9;
   3990   emitVStackOp(Cond, VpopOpcode, OpBaseReg, NumConsecRegs);
   3991 }
   3992 
   3993 void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
   3994                            CondARM32::Cond Cond) {
   3995   // Note: Current implementation assumes that OpBaseReg is defined using S
   3996   // registers. It doesn't implement the D register form.
   3997   //
   3998   // VPUSH - ARM section A8.8.368, encoding A2:
   3999   //   vpush<c> <RegList>
   4000   //
   4001   // cccc11010D101101dddd1010iiiiiiii where cccc=Cond, ddddD=BaseReg, and
   4002   // iiiiiiii=NumConsecRegs.
   4003   constexpr IValueT VpushOpcode =
   4004       B27 | B26 | B24 | B21 | B19 | B18 | B16 | B11 | B9;
   4005   emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs);
   4006 }
   4007 
   4008 void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
   4009                             const Operand *OpQm, const Operand *OpQn) {
   4010   // VSHL - ARM section A8.8.396, encoding A1:
   4011   //   vshl Qd, Qm, Qn
   4012   //
   4013   // 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 0=U,
   4014   // 1=Q
   4015   assert(isScalarIntegerType(ElmtTy) &&
   4016          "vshl expects vector with integer element type");
   4017   constexpr const char *Vshl = "vshl";
   4018   constexpr IValueT VshlOpcode = B10 | B6;
   4019   emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
   4020 }
   4021 
   4022 void AssemblerARM32::vshlqc(Type ElmtTy, const Operand *OpQd,
   4023                             const Operand *OpQm,
   4024                             const ConstantInteger32 *Imm6) {
   4025   // VSHL - ARM section A8.8.395, encoding A1:
   4026   //   vshl Qd, Qm, #Imm
   4027   //
   4028   // 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
   4029   // 0=U, 1=Q, 0=L.
   4030   assert(isScalarIntegerType(ElmtTy) &&
   4031          "vshl expects vector with integer element type");
   4032   constexpr const char *Vshl = "vshl";
   4033   constexpr IValueT VshlOpcode = B23 | B10 | B8 | B4;
   4034   emitSIMDShiftqqc(VshlOpcode, OpQd, OpQm,
   4035                    encodeSIMDShiftImm6(ST_Vshl, ElmtTy, Imm6), Vshl);
   4036 }
   4037 
   4038 void AssemblerARM32::vshrqc(Type ElmtTy, const Operand *OpQd,
   4039                             const Operand *OpQm, const ConstantInteger32 *Imm6,
   4040                             InstARM32::FPSign Sign) {
   4041   // VSHR - ARM section A8.8.398, encoding A1:
   4042   //   vshr Qd, Qm, #Imm
   4043   //
   4044   // 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
   4045   // U=Unsigned, Q=1, L=0.
   4046   assert(isScalarIntegerType(ElmtTy) &&
   4047          "vshr expects vector with integer element type");
   4048   constexpr const char *Vshr = "vshr";
   4049   const IValueT VshrOpcode =
   4050       (Sign == InstARM32::FS_Unsigned ? B24 : 0) | B23 | B4;
   4051   emitSIMDShiftqqc(VshrOpcode, OpQd, OpQm,
   4052                    encodeSIMDShiftImm6(ST_Vshr, ElmtTy, Imm6), Vshr);
   4053 }
   4054 
   4055 void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
   4056                             const Operand *OpQm, const Operand *OpQn) {
   4057   // VSHL - ARM section A8.8.396, encoding A1:
   4058   //   vshl Qd, Qm, Qn
   4059   //
   4060   // 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 1=U,
   4061   // 1=Q
   4062   assert(isScalarIntegerType(ElmtTy) &&
   4063          "vshl expects vector with integer element type");
   4064   constexpr const char *Vshl = "vshl";
   4065   constexpr IValueT VshlOpcode = B24 | B10 | B6;
   4066   emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
   4067 }
   4068 
   4069 void AssemblerARM32::vsqrtd(const Operand *OpDd, const Operand *OpDm,
   4070                             CondARM32::Cond Cond) {
   4071   // VSQRT - ARM section A8.8.401, encoding A1:
   4072   //   vsqrt<c>.f64 <Dd>, <Dm>
   4073   //
   4074   // cccc11101D110001dddd101111M0mmmm where cccc=Cond, Ddddd=Sd, and Mmmmm=Sm.
   4075   constexpr const char *Vsqrtd = "vsqrtd";
   4076   IValueT Dd = encodeDRegister(OpDd, "Dd", Vsqrtd);
   4077   IValueT Dm = encodeDRegister(OpDm, "Dm", Vsqrtd);
   4078   constexpr IValueT VsqrtdOpcode = B23 | B21 | B20 | B16 | B7 | B6;
   4079   constexpr IValueT D0 = 0;
   4080   emitVFPddd(Cond, VsqrtdOpcode, Dd, D0, Dm);
   4081 }
   4082 
   4083 void AssemblerARM32::vsqrts(const Operand *OpSd, const Operand *OpSm,
   4084                             CondARM32::Cond Cond) {
   4085   // VSQRT - ARM section A8.8.401, encoding A1:
   4086   //   vsqrt<c>.f32 <Sd>, <Sm>
   4087   //
   4088   // cccc11101D110001dddd101011M0mmmm where cccc=Cond, ddddD=Sd, and mmmmM=Sm.
   4089   constexpr const char *Vsqrts = "vsqrts";
   4090   IValueT Sd = encodeSRegister(OpSd, "Sd", Vsqrts);
   4091   IValueT Sm = encodeSRegister(OpSm, "Sm", Vsqrts);
   4092   constexpr IValueT VsqrtsOpcode = B23 | B21 | B20 | B16 | B7 | B6;
   4093   constexpr IValueT S0 = 0;
   4094   emitVFPsss(Cond, VsqrtsOpcode, Sd, S0, Sm);
   4095 }
   4096 
   4097 } // end of namespace ARM32
   4098 } // end of namespace Ice
   4099