Home | History | Annotate | Download | only in mips64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
     18 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
     19 
     20 #include <deque>
     21 #include <utility>
     22 #include <vector>
     23 
     24 #include "arch/mips64/instruction_set_features_mips64.h"
     25 #include "base/arena_containers.h"
     26 #include "base/enums.h"
     27 #include "base/macros.h"
     28 #include "base/stl_util_identity.h"
     29 #include "constants_mips64.h"
     30 #include "globals.h"
     31 #include "heap_poisoning.h"
     32 #include "managed_register_mips64.h"
     33 #include "offsets.h"
     34 #include "utils/assembler.h"
     35 #include "utils/jni_macro_assembler.h"
     36 #include "utils/label.h"
     37 
     38 namespace art {
     39 namespace mips64 {
     40 
     41 enum LoadConst64Path {
     42   kLoadConst64PathZero           = 0x0,
     43   kLoadConst64PathOri            = 0x1,
     44   kLoadConst64PathDaddiu         = 0x2,
     45   kLoadConst64PathLui            = 0x4,
     46   kLoadConst64PathLuiOri         = 0x8,
     47   kLoadConst64PathOriDahi        = 0x10,
     48   kLoadConst64PathOriDati        = 0x20,
     49   kLoadConst64PathLuiDahi        = 0x40,
     50   kLoadConst64PathLuiDati        = 0x80,
     51   kLoadConst64PathDaddiuDsrlX    = 0x100,
     52   kLoadConst64PathOriDsllX       = 0x200,
     53   kLoadConst64PathDaddiuDsllX    = 0x400,
     54   kLoadConst64PathLuiOriDsllX    = 0x800,
     55   kLoadConst64PathOriDsllXOri    = 0x1000,
     56   kLoadConst64PathDaddiuDsllXOri = 0x2000,
     57   kLoadConst64PathDaddiuDahi     = 0x4000,
     58   kLoadConst64PathDaddiuDati     = 0x8000,
     59   kLoadConst64PathDinsu1         = 0x10000,
     60   kLoadConst64PathDinsu2         = 0x20000,
     61   kLoadConst64PathCatchAll       = 0x40000,
     62   kLoadConst64PathAllPaths       = 0x7ffff,
     63 };
     64 
     65 template <typename Asm>
     66 void TemplateLoadConst32(Asm* a, GpuRegister rd, int32_t value) {
     67   if (IsUint<16>(value)) {
     68     // Use OR with (unsigned) immediate to encode 16b unsigned int.
     69     a->Ori(rd, ZERO, value);
     70   } else if (IsInt<16>(value)) {
     71     // Use ADD with (signed) immediate to encode 16b signed int.
     72     a->Addiu(rd, ZERO, value);
     73   } else {
     74     // Set 16 most significant bits of value. The "lui" instruction
     75     // also clears the 16 least significant bits to zero.
     76     a->Lui(rd, value >> 16);
     77     if (value & 0xFFFF) {
     78       // If the 16 least significant bits are non-zero, set them
     79       // here.
     80       a->Ori(rd, rd, value);
     81     }
     82   }
     83 }
     84 
     85 static inline int InstrCountForLoadReplicatedConst32(int64_t value) {
     86   int32_t x = Low32Bits(value);
     87   int32_t y = High32Bits(value);
     88 
     89   if (x == y) {
     90     return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3;
     91   }
     92 
     93   return INT_MAX;
     94 }
     95 
     96 template <typename Asm, typename Rtype, typename Vtype>
     97 void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) {
     98   int bit31 = (value & UINT64_C(0x80000000)) != 0;
     99   int rep32_count = InstrCountForLoadReplicatedConst32(value);
    100 
    101   // Loads with 1 instruction.
    102   if (IsUint<16>(value)) {
    103     // 64-bit value can be loaded as an unsigned 16-bit number.
    104     a->RecordLoadConst64Path(kLoadConst64PathOri);
    105     a->Ori(rd, ZERO, value);
    106   } else if (IsInt<16>(value)) {
    107     // 64-bit value can be loaded as an signed 16-bit number.
    108     a->RecordLoadConst64Path(kLoadConst64PathDaddiu);
    109     a->Daddiu(rd, ZERO, value);
    110   } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
    111     // 64-bit value can be loaded as an signed 32-bit number which has all
    112     // of its 16 least significant bits set to zero.
    113     a->RecordLoadConst64Path(kLoadConst64PathLui);
    114     a->Lui(rd, value >> 16);
    115   } else if (IsInt<32>(value)) {
    116     // Loads with 2 instructions.
    117     // 64-bit value can be loaded as an signed 32-bit number which has some
    118     // or all of its 16 least significant bits set to one.
    119     a->RecordLoadConst64Path(kLoadConst64PathLuiOri);
    120     a->Lui(rd, value >> 16);
    121     a->Ori(rd, rd, value);
    122   } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
    123     // 64-bit value which consists of an unsigned 16-bit value in its
    124     // least significant 32-bits, and a signed 16-bit value in its
    125     // most significant 32-bits.
    126     a->RecordLoadConst64Path(kLoadConst64PathOriDahi);
    127     a->Ori(rd, ZERO, value);
    128     a->Dahi(rd, value >> 32);
    129   } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
    130     // 64-bit value which consists of an unsigned 16-bit value in its
    131     // least significant 48-bits, and a signed 16-bit value in its
    132     // most significant 16-bits.
    133     a->RecordLoadConst64Path(kLoadConst64PathOriDati);
    134     a->Ori(rd, ZERO, value);
    135     a->Dati(rd, value >> 48);
    136   } else if ((value & 0xFFFF) == 0 &&
    137              (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
    138     // 16 LSBs (Least Significant Bits) all set to zero.
    139     // 48 MSBs (Most Significant Bits) hold a signed 32-bit value.
    140     a->RecordLoadConst64Path(kLoadConst64PathLuiDahi);
    141     a->Lui(rd, value >> 16);
    142     a->Dahi(rd, (value >> 32) + bit31);
    143   } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
    144     // 16 LSBs all set to zero.
    145     // 48 MSBs hold a signed value which can't be represented by signed
    146     // 32-bit number, and the middle 16 bits are all zero, or all one.
    147     a->RecordLoadConst64Path(kLoadConst64PathLuiDati);
    148     a->Lui(rd, value >> 16);
    149     a->Dati(rd, (value >> 48) + bit31);
    150   } else if (IsInt<16>(static_cast<int32_t>(value)) &&
    151              (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
    152     // 32 LSBs contain an unsigned 16-bit number.
    153     // 32 MSBs contain a signed 16-bit number.
    154     a->RecordLoadConst64Path(kLoadConst64PathDaddiuDahi);
    155     a->Daddiu(rd, ZERO, value);
    156     a->Dahi(rd, (value >> 32) + bit31);
    157   } else if (IsInt<16>(static_cast<int32_t>(value)) &&
    158              ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
    159     // 48 LSBs contain an unsigned 16-bit number.
    160     // 16 MSBs contain a signed 16-bit number.
    161     a->RecordLoadConst64Path(kLoadConst64PathDaddiuDati);
    162     a->Daddiu(rd, ZERO, value);
    163     a->Dati(rd, (value >> 48) + bit31);
    164   } else if (IsPowerOfTwo(value + UINT64_C(1))) {
    165     // 64-bit values which have their "n" MSBs set to one, and their
    166     // "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64.
    167     int shift_cnt = 64 - CTZ(value + UINT64_C(1));
    168     a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsrlX);
    169     a->Daddiu(rd, ZERO, -1);
    170     if (shift_cnt < 32) {
    171       a->Dsrl(rd, rd, shift_cnt);
    172     } else {
    173       a->Dsrl32(rd, rd, shift_cnt & 31);
    174     }
    175   } else {
    176     int shift_cnt = CTZ(value);
    177     int64_t tmp = value >> shift_cnt;
    178     a->RecordLoadConst64Path(kLoadConst64PathOriDsllX);
    179     if (IsUint<16>(tmp)) {
    180       // Value can be computed by loading a 16-bit unsigned value, and
    181       // then shifting left.
    182       a->Ori(rd, ZERO, tmp);
    183       if (shift_cnt < 32) {
    184         a->Dsll(rd, rd, shift_cnt);
    185       } else {
    186         a->Dsll32(rd, rd, shift_cnt & 31);
    187       }
    188     } else if (IsInt<16>(tmp)) {
    189       // Value can be computed by loading a 16-bit signed value, and
    190       // then shifting left.
    191       a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllX);
    192       a->Daddiu(rd, ZERO, tmp);
    193       if (shift_cnt < 32) {
    194         a->Dsll(rd, rd, shift_cnt);
    195       } else {
    196         a->Dsll32(rd, rd, shift_cnt & 31);
    197       }
    198     } else if (rep32_count < 3) {
    199       // Value being loaded has 32 LSBs equal to the 32 MSBs, and the
    200       // value loaded into the 32 LSBs can be loaded with a single
    201       // MIPS instruction.
    202       a->LoadConst32(rd, value);
    203       a->Dinsu(rd, rd, 32, 32);
    204       a->RecordLoadConst64Path(kLoadConst64PathDinsu1);
    205     } else if (IsInt<32>(tmp)) {
    206       // Loads with 3 instructions.
    207       // Value can be computed by loading a 32-bit signed value, and
    208       // then shifting left.
    209       a->RecordLoadConst64Path(kLoadConst64PathLuiOriDsllX);
    210       a->Lui(rd, tmp >> 16);
    211       a->Ori(rd, rd, tmp);
    212       if (shift_cnt < 32) {
    213         a->Dsll(rd, rd, shift_cnt);
    214       } else {
    215         a->Dsll32(rd, rd, shift_cnt & 31);
    216       }
    217     } else {
    218       shift_cnt = 16 + CTZ(value >> 16);
    219       tmp = value >> shift_cnt;
    220       if (IsUint<16>(tmp)) {
    221         // Value can be computed by loading a 16-bit unsigned value,
    222         // shifting left, and "or"ing in another 16-bit unsigned value.
    223         a->RecordLoadConst64Path(kLoadConst64PathOriDsllXOri);
    224         a->Ori(rd, ZERO, tmp);
    225         if (shift_cnt < 32) {
    226           a->Dsll(rd, rd, shift_cnt);
    227         } else {
    228           a->Dsll32(rd, rd, shift_cnt & 31);
    229         }
    230         a->Ori(rd, rd, value);
    231       } else if (IsInt<16>(tmp)) {
    232         // Value can be computed by loading a 16-bit signed value,
    233         // shifting left, and "or"ing in a 16-bit unsigned value.
    234         a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllXOri);
    235         a->Daddiu(rd, ZERO, tmp);
    236         if (shift_cnt < 32) {
    237           a->Dsll(rd, rd, shift_cnt);
    238         } else {
    239           a->Dsll32(rd, rd, shift_cnt & 31);
    240         }
    241         a->Ori(rd, rd, value);
    242       } else if (rep32_count < 4) {
    243         // Value being loaded has 32 LSBs equal to the 32 MSBs, and the
    244         // value in the 32 LSBs requires 2 MIPS instructions to load.
    245         a->LoadConst32(rd, value);
    246         a->Dinsu(rd, rd, 32, 32);
    247         a->RecordLoadConst64Path(kLoadConst64PathDinsu2);
    248       } else {
    249         // Loads with 3-4 instructions.
    250         // Catch-all case to get any other 64-bit values which aren't
    251         // handled by special cases above.
    252         uint64_t tmp2 = value;
    253         a->RecordLoadConst64Path(kLoadConst64PathCatchAll);
    254         a->LoadConst32(rd, value);
    255         if (bit31) {
    256           tmp2 += UINT64_C(0x100000000);
    257         }
    258         if (((tmp2 >> 32) & 0xFFFF) != 0) {
    259           a->Dahi(rd, tmp2 >> 32);
    260         }
    261         if (tmp2 & UINT64_C(0x800000000000)) {
    262           tmp2 += UINT64_C(0x1000000000000);
    263         }
    264         if ((tmp2 >> 48) != 0) {
    265           a->Dati(rd, tmp2 >> 48);
    266         }
    267       }
    268     }
    269   }
    270 }
    271 
    272 static constexpr size_t kMips64HalfwordSize = 2;
    273 static constexpr size_t kMips64WordSize = 4;
    274 static constexpr size_t kMips64DoublewordSize = 8;
    275 
    276 enum LoadOperandType {
    277   kLoadSignedByte,
    278   kLoadUnsignedByte,
    279   kLoadSignedHalfword,
    280   kLoadUnsignedHalfword,
    281   kLoadWord,
    282   kLoadUnsignedWord,
    283   kLoadDoubleword,
    284   kLoadQuadword
    285 };
    286 
    287 enum StoreOperandType {
    288   kStoreByte,
    289   kStoreHalfword,
    290   kStoreWord,
    291   kStoreDoubleword,
    292   kStoreQuadword
    293 };
    294 
    295 // Used to test the values returned by ClassS/ClassD.
    296 enum FPClassMaskType {
    297   kSignalingNaN      = 0x001,
    298   kQuietNaN          = 0x002,
    299   kNegativeInfinity  = 0x004,
    300   kNegativeNormal    = 0x008,
    301   kNegativeSubnormal = 0x010,
    302   kNegativeZero      = 0x020,
    303   kPositiveInfinity  = 0x040,
    304   kPositiveNormal    = 0x080,
    305   kPositiveSubnormal = 0x100,
    306   kPositiveZero      = 0x200,
    307 };
    308 
    309 class Mips64Label : public Label {
    310  public:
    311   Mips64Label() : prev_branch_id_plus_one_(0) {}
    312 
    313   Mips64Label(Mips64Label&& src)
    314       : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
    315 
    316  private:
    317   uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
    318 
    319   friend class Mips64Assembler;
    320   DISALLOW_COPY_AND_ASSIGN(Mips64Label);
    321 };
    322 
    323 // Assembler literal is a value embedded in code, retrieved using a PC-relative load.
    324 class Literal {
    325  public:
    326   static constexpr size_t kMaxSize = 8;
    327 
    328   Literal(uint32_t size, const uint8_t* data)
    329       : label_(), size_(size) {
    330     DCHECK_LE(size, Literal::kMaxSize);
    331     memcpy(data_, data, size);
    332   }
    333 
    334   template <typename T>
    335   T GetValue() const {
    336     DCHECK_EQ(size_, sizeof(T));
    337     T value;
    338     memcpy(&value, data_, sizeof(T));
    339     return value;
    340   }
    341 
    342   uint32_t GetSize() const {
    343     return size_;
    344   }
    345 
    346   const uint8_t* GetData() const {
    347     return data_;
    348   }
    349 
    350   Mips64Label* GetLabel() {
    351     return &label_;
    352   }
    353 
    354   const Mips64Label* GetLabel() const {
    355     return &label_;
    356   }
    357 
    358  private:
    359   Mips64Label label_;
    360   const uint32_t size_;
    361   uint8_t data_[kMaxSize];
    362 
    363   DISALLOW_COPY_AND_ASSIGN(Literal);
    364 };
    365 
    366 // Jump table: table of labels emitted after the code and before the literals. Similar to literals.
    367 class JumpTable {
    368  public:
    369   explicit JumpTable(std::vector<Mips64Label*>&& labels)
    370       : label_(), labels_(std::move(labels)) {
    371   }
    372 
    373   size_t GetSize() const {
    374     return labels_.size() * sizeof(uint32_t);
    375   }
    376 
    377   const std::vector<Mips64Label*>& GetData() const {
    378     return labels_;
    379   }
    380 
    381   Mips64Label* GetLabel() {
    382     return &label_;
    383   }
    384 
    385   const Mips64Label* GetLabel() const {
    386     return &label_;
    387   }
    388 
    389  private:
    390   Mips64Label label_;
    391   std::vector<Mips64Label*> labels_;
    392 
    393   DISALLOW_COPY_AND_ASSIGN(JumpTable);
    394 };
    395 
    396 // Slowpath entered when Thread::Current()->_exception is non-null.
    397 class Mips64ExceptionSlowPath {
    398  public:
    399   explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust)
    400       : scratch_(scratch), stack_adjust_(stack_adjust) {}
    401 
    402   Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src)
    403       : scratch_(src.scratch_),
    404         stack_adjust_(src.stack_adjust_),
    405         exception_entry_(std::move(src.exception_entry_)) {}
    406 
    407  private:
    408   Mips64Label* Entry() { return &exception_entry_; }
    409   const Mips64ManagedRegister scratch_;
    410   const size_t stack_adjust_;
    411   Mips64Label exception_entry_;
    412 
    413   friend class Mips64Assembler;
    414   DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath);
    415 };
    416 
    417 class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
    418  public:
    419   using JNIBase = JNIMacroAssembler<PointerSize::k64>;
    420 
    421   explicit Mips64Assembler(ArenaAllocator* allocator,
    422                            const Mips64InstructionSetFeatures* instruction_set_features = nullptr)
    423       : Assembler(allocator),
    424         overwriting_(false),
    425         overwrite_location_(0),
    426         literals_(allocator->Adapter(kArenaAllocAssembler)),
    427         long_literals_(allocator->Adapter(kArenaAllocAssembler)),
    428         jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
    429         last_position_adjustment_(0),
    430         last_old_position_(0),
    431         last_branch_id_(0),
    432         has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) {
    433     cfi().DelayEmittingAdvancePCs();
    434   }
    435 
    436   virtual ~Mips64Assembler() {
    437     for (auto& branch : branches_) {
    438       CHECK(branch.IsResolved());
    439     }
    440   }
    441 
    442   size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); }
    443   DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
    444 
    445   // Emit Machine Instructions.
    446   void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    447   void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    448   void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    449   void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
    450   void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    451   void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    452 
    453   void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    454   void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    455   void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    456   void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    457   void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    458   void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    459   void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    460   void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    461   void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    462   void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    463   void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    464   void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt);  // MIPS64
    465 
    466   void And(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    467   void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    468   void Or(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    469   void Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    470   void Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    471   void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    472   void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    473 
    474   void Bitswap(GpuRegister rd, GpuRegister rt);
    475   void Dbitswap(GpuRegister rd, GpuRegister rt);  // MIPS64
    476   void Seb(GpuRegister rd, GpuRegister rt);
    477   void Seh(GpuRegister rd, GpuRegister rt);
    478   void Dsbh(GpuRegister rd, GpuRegister rt);  // MIPS64
    479   void Dshd(GpuRegister rd, GpuRegister rt);  // MIPS64
    480   void Dext(GpuRegister rs, GpuRegister rt, int pos, int size);  // MIPS64
    481   void Ins(GpuRegister rt, GpuRegister rs, int pos, int size);
    482   void Dins(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
    483   void Dinsm(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
    484   void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
    485   void DblIns(GpuRegister rt, GpuRegister rs, int pos, int size);  // MIPS64
    486   void Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne);
    487   void Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne);  // MIPS64
    488   void Wsbh(GpuRegister rd, GpuRegister rt);
    489   void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
    490   void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);  // MIPS64
    491   void Ll(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);
    492   void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0);  // MIPS64
    493 
    494   void Sll(GpuRegister rd, GpuRegister rt, int shamt);
    495   void Srl(GpuRegister rd, GpuRegister rt, int shamt);
    496   void Rotr(GpuRegister rd, GpuRegister rt, int shamt);
    497   void Sra(GpuRegister rd, GpuRegister rt, int shamt);
    498   void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
    499   void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
    500   void Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);
    501   void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs);
    502   void Dsll(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    503   void Dsrl(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    504   void Drotr(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    505   void Dsra(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    506   void Dsll32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    507   void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    508   void Drotr32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    509   void Dsra32(GpuRegister rd, GpuRegister rt, int shamt);  // MIPS64
    510   void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
    511   void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
    512   void Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
    513   void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs);  // MIPS64
    514 
    515   void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    516   void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    517   void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    518   void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
    519   void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    520   void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    521   void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
    522   void Lwpc(GpuRegister rs, uint32_t imm19);
    523   void Lwupc(GpuRegister rs, uint32_t imm19);  // MIPS64
    524   void Ldpc(GpuRegister rs, uint32_t imm18);  // MIPS64
    525   void Lui(GpuRegister rt, uint16_t imm16);
    526   void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    527   void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
    528   void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
    529   void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
    530   void Sync(uint32_t stype);
    531 
    532   void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    533   void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    534   void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    535   void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
    536 
    537   void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    538   void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    539   void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    540   void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16);
    541   void Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    542   void Selnez(GpuRegister rd, GpuRegister rs, GpuRegister rt);
    543   void Clz(GpuRegister rd, GpuRegister rs);
    544   void Clo(GpuRegister rd, GpuRegister rs);
    545   void Dclz(GpuRegister rd, GpuRegister rs);  // MIPS64
    546   void Dclo(GpuRegister rd, GpuRegister rs);  // MIPS64
    547 
    548   void Jalr(GpuRegister rd, GpuRegister rs);
    549   void Jalr(GpuRegister rs);
    550   void Jr(GpuRegister rs);
    551   void Auipc(GpuRegister rs, uint16_t imm16);
    552   void Addiupc(GpuRegister rs, uint32_t imm19);
    553   void Bc(uint32_t imm26);
    554   void Balc(uint32_t imm26);
    555   void Jic(GpuRegister rt, uint16_t imm16);
    556   void Jialc(GpuRegister rt, uint16_t imm16);
    557   void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    558   void Bltzc(GpuRegister rt, uint16_t imm16);
    559   void Bgtzc(GpuRegister rt, uint16_t imm16);
    560   void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    561   void Bgezc(GpuRegister rt, uint16_t imm16);
    562   void Blezc(GpuRegister rt, uint16_t imm16);
    563   void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    564   void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    565   void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    566   void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16);
    567   void Beqzc(GpuRegister rs, uint32_t imm21);
    568   void Bnezc(GpuRegister rs, uint32_t imm21);
    569   void Bc1eqz(FpuRegister ft, uint16_t imm16);
    570   void Bc1nez(FpuRegister ft, uint16_t imm16);
    571   void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R2
    572   void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16);  // R2
    573   void Beqz(GpuRegister rt, uint16_t imm16);  // R2
    574   void Bnez(GpuRegister rt, uint16_t imm16);  // R2
    575   void Bltz(GpuRegister rt, uint16_t imm16);  // R2
    576   void Bgez(GpuRegister rt, uint16_t imm16);  // R2
    577   void Blez(GpuRegister rt, uint16_t imm16);  // R2
    578   void Bgtz(GpuRegister rt, uint16_t imm16);  // R2
    579 
    580   void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    581   void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    582   void MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    583   void DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    584   void AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    585   void SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    586   void MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    587   void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    588   void SqrtS(FpuRegister fd, FpuRegister fs);
    589   void SqrtD(FpuRegister fd, FpuRegister fs);
    590   void AbsS(FpuRegister fd, FpuRegister fs);
    591   void AbsD(FpuRegister fd, FpuRegister fs);
    592   void MovS(FpuRegister fd, FpuRegister fs);
    593   void MovD(FpuRegister fd, FpuRegister fs);
    594   void NegS(FpuRegister fd, FpuRegister fs);
    595   void NegD(FpuRegister fd, FpuRegister fs);
    596   void RoundLS(FpuRegister fd, FpuRegister fs);
    597   void RoundLD(FpuRegister fd, FpuRegister fs);
    598   void RoundWS(FpuRegister fd, FpuRegister fs);
    599   void RoundWD(FpuRegister fd, FpuRegister fs);
    600   void TruncLS(FpuRegister fd, FpuRegister fs);
    601   void TruncLD(FpuRegister fd, FpuRegister fs);
    602   void TruncWS(FpuRegister fd, FpuRegister fs);
    603   void TruncWD(FpuRegister fd, FpuRegister fs);
    604   void CeilLS(FpuRegister fd, FpuRegister fs);
    605   void CeilLD(FpuRegister fd, FpuRegister fs);
    606   void CeilWS(FpuRegister fd, FpuRegister fs);
    607   void CeilWD(FpuRegister fd, FpuRegister fs);
    608   void FloorLS(FpuRegister fd, FpuRegister fs);
    609   void FloorLD(FpuRegister fd, FpuRegister fs);
    610   void FloorWS(FpuRegister fd, FpuRegister fs);
    611   void FloorWD(FpuRegister fd, FpuRegister fs);
    612   void SelS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    613   void SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    614   void SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    615   void SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    616   void SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    617   void SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    618   void RintS(FpuRegister fd, FpuRegister fs);
    619   void RintD(FpuRegister fd, FpuRegister fs);
    620   void ClassS(FpuRegister fd, FpuRegister fs);
    621   void ClassD(FpuRegister fd, FpuRegister fs);
    622   void MinS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    623   void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    624   void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    625   void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    626   void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    627   void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    628   void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    629   void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    630   void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    631   void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    632   void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    633   void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    634   void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    635   void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    636   void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    637   void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    638   void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    639   void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    640   void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    641   void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    642   void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    643   void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    644   void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    645   void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
    646 
    647   void Cvtsw(FpuRegister fd, FpuRegister fs);
    648   void Cvtdw(FpuRegister fd, FpuRegister fs);
    649   void Cvtsd(FpuRegister fd, FpuRegister fs);
    650   void Cvtds(FpuRegister fd, FpuRegister fs);
    651   void Cvtsl(FpuRegister fd, FpuRegister fs);
    652   void Cvtdl(FpuRegister fd, FpuRegister fs);
    653 
    654   void Mfc1(GpuRegister rt, FpuRegister fs);
    655   void Mfhc1(GpuRegister rt, FpuRegister fs);
    656   void Mtc1(GpuRegister rt, FpuRegister fs);
    657   void Mthc1(GpuRegister rt, FpuRegister fs);
    658   void Dmfc1(GpuRegister rt, FpuRegister fs);  // MIPS64
    659   void Dmtc1(GpuRegister rt, FpuRegister fs);  // MIPS64
    660   void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
    661   void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
    662   void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
    663   void Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16);
    664 
    665   void Break();
    666   void Nop();
    667   void Move(GpuRegister rd, GpuRegister rs);
    668   void Clear(GpuRegister rd);
    669   void Not(GpuRegister rd, GpuRegister rs);
    670 
    671   // MSA instructions.
    672   void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    673   void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    674   void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    675   void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    676 
    677   void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    678   void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    679   void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    680   void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    681   void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    682   void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    683   void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    684   void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    685   void Asub_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    686   void Asub_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    687   void Asub_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    688   void Asub_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    689   void Asub_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    690   void Asub_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    691   void Asub_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    692   void Asub_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    693   void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    694   void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    695   void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    696   void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    697   void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    698   void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    699   void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    700   void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    701   void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    702   void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    703   void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    704   void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    705   void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    706   void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    707   void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    708   void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    709   void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    710   void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    711   void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    712   void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    713   void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    714   void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    715   void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    716   void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    717   void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    718   void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    719   void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    720   void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    721   void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    722   void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    723   void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    724   void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    725   void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    726   void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    727   void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    728   void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    729   void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    730   void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    731   void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    732   void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    733   void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    734   void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    735   void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    736   void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    737   void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    738   void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    739   void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    740   void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    741   void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    742   void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    743   void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    744   void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    745   void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    746   void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    747   void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    748   void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    749 
    750   void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    751   void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    752   void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    753   void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    754   void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    755   void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    756   void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    757   void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    758   void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    759   void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    760   void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    761   void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    762 
    763   void Ffint_sW(VectorRegister wd, VectorRegister ws);
    764   void Ffint_sD(VectorRegister wd, VectorRegister ws);
    765   void Ftint_sW(VectorRegister wd, VectorRegister ws);
    766   void Ftint_sD(VectorRegister wd, VectorRegister ws);
    767 
    768   void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    769   void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    770   void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    771   void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    772   void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    773   void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    774   void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    775   void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    776   void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    777   void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    778   void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    779   void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    780 
    781   // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1).
    782   void SlliB(VectorRegister wd, VectorRegister ws, int shamt3);
    783   void SlliH(VectorRegister wd, VectorRegister ws, int shamt4);
    784   void SlliW(VectorRegister wd, VectorRegister ws, int shamt5);
    785   void SlliD(VectorRegister wd, VectorRegister ws, int shamt6);
    786   void SraiB(VectorRegister wd, VectorRegister ws, int shamt3);
    787   void SraiH(VectorRegister wd, VectorRegister ws, int shamt4);
    788   void SraiW(VectorRegister wd, VectorRegister ws, int shamt5);
    789   void SraiD(VectorRegister wd, VectorRegister ws, int shamt6);
    790   void SrliB(VectorRegister wd, VectorRegister ws, int shamt3);
    791   void SrliH(VectorRegister wd, VectorRegister ws, int shamt4);
    792   void SrliW(VectorRegister wd, VectorRegister ws, int shamt5);
    793   void SrliD(VectorRegister wd, VectorRegister ws, int shamt6);
    794 
    795   void MoveV(VectorRegister wd, VectorRegister ws);
    796   void SplatiB(VectorRegister wd, VectorRegister ws, int n4);
    797   void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
    798   void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
    799   void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
    800   void Copy_sB(GpuRegister rd, VectorRegister ws, int n4);
    801   void Copy_sH(GpuRegister rd, VectorRegister ws, int n3);
    802   void Copy_sW(GpuRegister rd, VectorRegister ws, int n2);
    803   void Copy_sD(GpuRegister rd, VectorRegister ws, int n1);
    804   void Copy_uB(GpuRegister rd, VectorRegister ws, int n4);
    805   void Copy_uH(GpuRegister rd, VectorRegister ws, int n3);
    806   void Copy_uW(GpuRegister rd, VectorRegister ws, int n2);
    807   void InsertB(VectorRegister wd, GpuRegister rs, int n4);
    808   void InsertH(VectorRegister wd, GpuRegister rs, int n3);
    809   void InsertW(VectorRegister wd, GpuRegister rs, int n2);
    810   void InsertD(VectorRegister wd, GpuRegister rs, int n1);
    811   void FillB(VectorRegister wd, GpuRegister rs);
    812   void FillH(VectorRegister wd, GpuRegister rs);
    813   void FillW(VectorRegister wd, GpuRegister rs);
    814   void FillD(VectorRegister wd, GpuRegister rs);
    815 
    816   void LdiB(VectorRegister wd, int imm8);
    817   void LdiH(VectorRegister wd, int imm10);
    818   void LdiW(VectorRegister wd, int imm10);
    819   void LdiD(VectorRegister wd, int imm10);
    820   void LdB(VectorRegister wd, GpuRegister rs, int offset);
    821   void LdH(VectorRegister wd, GpuRegister rs, int offset);
    822   void LdW(VectorRegister wd, GpuRegister rs, int offset);
    823   void LdD(VectorRegister wd, GpuRegister rs, int offset);
    824   void StB(VectorRegister wd, GpuRegister rs, int offset);
    825   void StH(VectorRegister wd, GpuRegister rs, int offset);
    826   void StW(VectorRegister wd, GpuRegister rs, int offset);
    827   void StD(VectorRegister wd, GpuRegister rs, int offset);
    828 
    829   void IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    830   void IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    831   void IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    832   void IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    833   void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    834   void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    835   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    836   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    837   void IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    838   void IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    839   void IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    840   void IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    841   void IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    842   void IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    843   void IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    844   void IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    845 
    846   void MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    847   void MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    848   void MaddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    849   void MaddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    850   void MsubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    851   void MsubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    852   void MsubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    853   void MsubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    854   void FmaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    855   void FmaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    856   void FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    857   void FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    858 
    859   void Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    860   void Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    861   void Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    862   void Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    863   void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    864   void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
    865 
    866   // Helper for replicating floating point value in all destination elements.
    867   void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
    868 
    869   // Higher level composite instructions.
    870   int InstrCountForLoadReplicatedConst32(int64_t);
    871   void LoadConst32(GpuRegister rd, int32_t value);
    872   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
    873 
    874   // This function is only used for testing purposes.
    875   void RecordLoadConst64Path(int value);
    876 
    877   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value);
    878   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
    879 
    880   //
    881   // Heap poisoning.
    882   //
    883 
    884   // Poison a heap reference contained in `src` and store it in `dst`.
    885   void PoisonHeapReference(GpuRegister dst, GpuRegister src) {
    886     // dst = -src.
    887     // Negate the 32-bit ref.
    888     Dsubu(dst, ZERO, src);
    889     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
    890     Dext(dst, dst, 0, 32);
    891   }
    892   // Poison a heap reference contained in `reg`.
    893   void PoisonHeapReference(GpuRegister reg) {
    894     // reg = -reg.
    895     PoisonHeapReference(reg, reg);
    896   }
    897   // Unpoison a heap reference contained in `reg`.
    898   void UnpoisonHeapReference(GpuRegister reg) {
    899     // reg = -reg.
    900     // Negate the 32-bit ref.
    901     Dsubu(reg, ZERO, reg);
    902     // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
    903     Dext(reg, reg, 0, 32);
    904   }
    905   // Poison a heap reference contained in `reg` if heap poisoning is enabled.
    906   void MaybePoisonHeapReference(GpuRegister reg) {
    907     if (kPoisonHeapReferences) {
    908       PoisonHeapReference(reg);
    909     }
    910   }
    911   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
    912   void MaybeUnpoisonHeapReference(GpuRegister reg) {
    913     if (kPoisonHeapReferences) {
    914       UnpoisonHeapReference(reg);
    915     }
    916   }
    917 
    918   void Bind(Label* label) OVERRIDE {
    919     Bind(down_cast<Mips64Label*>(label));
    920   }
    921   void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
    922     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64";
    923   }
    924 
    925   void Bind(Mips64Label* label);
    926 
    927   // Don't warn about a different virtual Bind/Jump in the base class.
    928   using JNIBase::Bind;
    929   using JNIBase::Jump;
    930 
    931   // Create a new label that can be used with Jump/Bind calls.
    932   std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE {
    933     LOG(FATAL) << "Not implemented on MIPS64";
    934     UNREACHABLE();
    935   }
    936   // Emit an unconditional jump to the label.
    937   void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
    938     LOG(FATAL) << "Not implemented on MIPS64";
    939     UNREACHABLE();
    940   }
    941   // Emit a conditional jump to the label by applying a unary condition test to the register.
    942   void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED,
    943             JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED,
    944             ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE {
    945     LOG(FATAL) << "Not implemented on MIPS64";
    946     UNREACHABLE();
    947   }
    948 
    949   // Code at this offset will serve as the target for the Jump call.
    950   void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
    951     LOG(FATAL) << "Not implemented on MIPS64";
    952     UNREACHABLE();
    953   }
    954 
    955   // Create a new literal with a given value.
    956   // NOTE: Force the template parameter to be explicitly specified.
    957   template <typename T>
    958   Literal* NewLiteral(typename Identity<T>::type value) {
    959     static_assert(std::is_integral<T>::value, "T must be an integral type.");
    960     return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
    961   }
    962 
    963   // Load label address using PC-relative loads. To be used with data labels in the literal /
    964   // jump table area only and not with regular code labels.
    965   void LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label);
    966 
    967   // Create a new literal with the given data.
    968   Literal* NewLiteral(size_t size, const uint8_t* data);
    969 
    970   // Load literal using PC-relative loads.
    971   void LoadLiteral(GpuRegister dest_reg, LoadOperandType load_type, Literal* literal);
    972 
    973   // Create a jump table for the given labels that will be emitted when finalizing.
    974   // When the table is emitted, offsets will be relative to the location of the table.
    975   // The table location is determined by the location of its label (the label precedes
    976   // the table data) and should be loaded using LoadLabelAddress().
    977   JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels);
    978 
    979   // When `is_bare` is false, the branches will promote to long (if the range
    980   // of the individual branch instruction is insufficient) and the delay/
    981   // forbidden slots will be taken care of.
    982   // Use `is_bare = false` when the branch target may be out of reach of the
    983   // individual branch instruction. IOW, this is for general purpose use.
    984   //
    985   // When `is_bare` is true, just the branch instructions will be generated
    986   // leaving delay/forbidden slot filling up to the caller and the branches
    987   // won't promote to long if the range is insufficient (you'll get a
    988   // compilation error when the range is exceeded).
    989   // Use `is_bare = true` when the branch target is known to be within reach
    990   // of the individual branch instruction. This is intended for small local
    991   // optimizations around delay/forbidden slots.
    992   // Also prefer using `is_bare = true` if the code near the branch is to be
    993   // patched or analyzed at run time (e.g. introspection) to
    994   // - show the intent and
    995   // - fail during compilation rather than during patching/execution if the
    996   //   bare branch range is insufficent but the code size and layout are
    997   //   expected to remain unchanged
    998   //
    999   // R6 compact branches without delay/forbidden slots.
   1000   void Bc(Mips64Label* label, bool is_bare = false);
   1001   void Balc(Mips64Label* label, bool is_bare = false);
   1002   // R6 compact branches with forbidden slots.
   1003   void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1004   void Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1005   void Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1006   void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1007   void Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1008   void Blezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1009   void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1010   void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1011   void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1012   void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
   1013   void Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
   1014   void Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
   1015   // R6 branches with delay slots.
   1016   void Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare = false);
   1017   void Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare = false);
   1018   // R2 branches with delay slots that are also available on R6.
   1019   // The `is_bare` parameter exists and is checked in these branches only to
   1020   // prevent programming mistakes. These branches never promote to long, not
   1021   // even if `is_bare` is false.
   1022   void Bltz(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1023   void Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1024   void Bgez(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1025   void Blez(GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1026   void Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1027   void Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);  // R2
   1028   void Beqz(GpuRegister rs, Mips64Label* label, bool is_bare = false);  // R2
   1029   void Bnez(GpuRegister rs, Mips64Label* label, bool is_bare = false);  // R2
   1030 
   1031   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
   1032   void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
   1033   // If element_size_shift is negative at entry, its value will be calculated based on the offset.
   1034   void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base,
   1035                                            int32_t& offset,
   1036                                            int& element_size_shift);
   1037 
   1038  private:
   1039   // This will be used as an argument for loads/stores
   1040   // when there is no need for implicit null checks.
   1041   struct NoImplicitNullChecker {
   1042     void operator()() const {}
   1043   };
   1044 
   1045  public:
   1046   template <typename ImplicitNullChecker = NoImplicitNullChecker>
   1047   void StoreConstToOffset(StoreOperandType type,
   1048                           int64_t value,
   1049                           GpuRegister base,
   1050                           int32_t offset,
   1051                           GpuRegister temp,
   1052                           ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
   1053     // We permit `base` and `temp` to coincide (however, we check that neither is AT),
   1054     // in which case the `base` register may be overwritten in the process.
   1055     CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
   1056     AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
   1057     GpuRegister reg;
   1058     // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp`
   1059     // to load and hold the value but we can use AT instead as AT hasn't been used yet.
   1060     // Otherwise, `temp` can be used for the value. And if `temp` is the same as the
   1061     // original `base` (that is, `base` prior to the adjustment), the original `base`
   1062     // register will be overwritten.
   1063     if (base == temp) {
   1064       temp = AT;
   1065     }
   1066 
   1067     if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) {
   1068       if (value == 0) {
   1069         reg = ZERO;
   1070       } else {
   1071         reg = temp;
   1072         LoadConst64(reg, value);
   1073       }
   1074       Sd(reg, base, offset);
   1075       null_checker();
   1076     } else {
   1077       uint32_t low = Low32Bits(value);
   1078       uint32_t high = High32Bits(value);
   1079       if (low == 0) {
   1080         reg = ZERO;
   1081       } else {
   1082         reg = temp;
   1083         LoadConst32(reg, low);
   1084       }
   1085       switch (type) {
   1086         case kStoreByte:
   1087           Sb(reg, base, offset);
   1088           break;
   1089         case kStoreHalfword:
   1090           Sh(reg, base, offset);
   1091           break;
   1092         case kStoreWord:
   1093           Sw(reg, base, offset);
   1094           break;
   1095         case kStoreDoubleword:
   1096           // not aligned to kMips64DoublewordSize
   1097           CHECK_ALIGNED(offset, kMips64WordSize);
   1098           Sw(reg, base, offset);
   1099           null_checker();
   1100           if (high == 0) {
   1101             reg = ZERO;
   1102           } else {
   1103             reg = temp;
   1104             if (high != low) {
   1105               LoadConst32(reg, high);
   1106             }
   1107           }
   1108           Sw(reg, base, offset + kMips64WordSize);
   1109           break;
   1110         default:
   1111           LOG(FATAL) << "UNREACHABLE";
   1112       }
   1113       if (type != kStoreDoubleword) {
   1114         null_checker();
   1115       }
   1116     }
   1117   }
   1118 
   1119   template <typename ImplicitNullChecker = NoImplicitNullChecker>
   1120   void LoadFromOffset(LoadOperandType type,
   1121                       GpuRegister reg,
   1122                       GpuRegister base,
   1123                       int32_t offset,
   1124                       ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
   1125     AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
   1126 
   1127     switch (type) {
   1128       case kLoadSignedByte:
   1129         Lb(reg, base, offset);
   1130         break;
   1131       case kLoadUnsignedByte:
   1132         Lbu(reg, base, offset);
   1133         break;
   1134       case kLoadSignedHalfword:
   1135         Lh(reg, base, offset);
   1136         break;
   1137       case kLoadUnsignedHalfword:
   1138         Lhu(reg, base, offset);
   1139         break;
   1140       case kLoadWord:
   1141         CHECK_ALIGNED(offset, kMips64WordSize);
   1142         Lw(reg, base, offset);
   1143         break;
   1144       case kLoadUnsignedWord:
   1145         CHECK_ALIGNED(offset, kMips64WordSize);
   1146         Lwu(reg, base, offset);
   1147         break;
   1148       case kLoadDoubleword:
   1149         if (!IsAligned<kMips64DoublewordSize>(offset)) {
   1150           CHECK_ALIGNED(offset, kMips64WordSize);
   1151           Lwu(reg, base, offset);
   1152           null_checker();
   1153           Lwu(TMP2, base, offset + kMips64WordSize);
   1154           Dinsu(reg, TMP2, 32, 32);
   1155         } else {
   1156           Ld(reg, base, offset);
   1157           null_checker();
   1158         }
   1159         break;
   1160       default:
   1161         LOG(FATAL) << "UNREACHABLE";
   1162     }
   1163     if (type != kLoadDoubleword) {
   1164       null_checker();
   1165     }
   1166   }
   1167 
   1168   template <typename ImplicitNullChecker = NoImplicitNullChecker>
   1169   void LoadFpuFromOffset(LoadOperandType type,
   1170                          FpuRegister reg,
   1171                          GpuRegister base,
   1172                          int32_t offset,
   1173                          ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
   1174     int element_size_shift = -1;
   1175     if (type != kLoadQuadword) {
   1176       AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
   1177     } else {
   1178       AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
   1179     }
   1180 
   1181     switch (type) {
   1182       case kLoadWord:
   1183         CHECK_ALIGNED(offset, kMips64WordSize);
   1184         Lwc1(reg, base, offset);
   1185         null_checker();
   1186         break;
   1187       case kLoadDoubleword:
   1188         if (!IsAligned<kMips64DoublewordSize>(offset)) {
   1189           CHECK_ALIGNED(offset, kMips64WordSize);
   1190           Lwc1(reg, base, offset);
   1191           null_checker();
   1192           Lw(TMP2, base, offset + kMips64WordSize);
   1193           Mthc1(TMP2, reg);
   1194         } else {
   1195           Ldc1(reg, base, offset);
   1196           null_checker();
   1197         }
   1198         break;
   1199       case kLoadQuadword:
   1200         switch (element_size_shift) {
   1201           case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break;
   1202           case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break;
   1203           case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break;
   1204           case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break;
   1205           default:
   1206             LOG(FATAL) << "UNREACHABLE";
   1207         }
   1208         null_checker();
   1209         break;
   1210       default:
   1211         LOG(FATAL) << "UNREACHABLE";
   1212     }
   1213   }
   1214 
   1215   template <typename ImplicitNullChecker = NoImplicitNullChecker>
   1216   void StoreToOffset(StoreOperandType type,
   1217                      GpuRegister reg,
   1218                      GpuRegister base,
   1219                      int32_t offset,
   1220                      ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
   1221     // Must not use AT as `reg`, so as not to overwrite the value being stored
   1222     // with the adjusted `base`.
   1223     CHECK_NE(reg, AT);
   1224     AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
   1225 
   1226     switch (type) {
   1227       case kStoreByte:
   1228         Sb(reg, base, offset);
   1229         break;
   1230       case kStoreHalfword:
   1231         Sh(reg, base, offset);
   1232         break;
   1233       case kStoreWord:
   1234         CHECK_ALIGNED(offset, kMips64WordSize);
   1235         Sw(reg, base, offset);
   1236         break;
   1237       case kStoreDoubleword:
   1238         if (!IsAligned<kMips64DoublewordSize>(offset)) {
   1239           CHECK_ALIGNED(offset, kMips64WordSize);
   1240           Sw(reg, base, offset);
   1241           null_checker();
   1242           Dsrl32(TMP2, reg, 0);
   1243           Sw(TMP2, base, offset + kMips64WordSize);
   1244         } else {
   1245           Sd(reg, base, offset);
   1246           null_checker();
   1247         }
   1248         break;
   1249       default:
   1250         LOG(FATAL) << "UNREACHABLE";
   1251     }
   1252     if (type != kStoreDoubleword) {
   1253       null_checker();
   1254     }
   1255   }
   1256 
   1257   template <typename ImplicitNullChecker = NoImplicitNullChecker>
   1258   void StoreFpuToOffset(StoreOperandType type,
   1259                         FpuRegister reg,
   1260                         GpuRegister base,
   1261                         int32_t offset,
   1262                         ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
   1263     int element_size_shift = -1;
   1264     if (type != kStoreQuadword) {
   1265       AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
   1266     } else {
   1267       AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift);
   1268     }
   1269 
   1270     switch (type) {
   1271       case kStoreWord:
   1272         CHECK_ALIGNED(offset, kMips64WordSize);
   1273         Swc1(reg, base, offset);
   1274         null_checker();
   1275         break;
   1276       case kStoreDoubleword:
   1277         if (!IsAligned<kMips64DoublewordSize>(offset)) {
   1278           CHECK_ALIGNED(offset, kMips64WordSize);
   1279           Mfhc1(TMP2, reg);
   1280           Swc1(reg, base, offset);
   1281           null_checker();
   1282           Sw(TMP2, base, offset + kMips64WordSize);
   1283         } else {
   1284           Sdc1(reg, base, offset);
   1285           null_checker();
   1286         }
   1287         break;
   1288       case kStoreQuadword:
   1289         switch (element_size_shift) {
   1290           case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break;
   1291           case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break;
   1292           case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break;
   1293           case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break;
   1294           default:
   1295             LOG(FATAL) << "UNREACHABLE";
   1296         }
   1297         null_checker();
   1298         break;
   1299       default:
   1300         LOG(FATAL) << "UNREACHABLE";
   1301     }
   1302   }
   1303 
   1304   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
   1305   void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
   1306   void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
   1307   void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
   1308 
   1309   // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
   1310   void Emit(uint32_t value);
   1311 
   1312   //
   1313   // Overridden common assembler high-level functionality.
   1314   //
   1315 
   1316   // Emit code that will create an activation on the stack.
   1317   void BuildFrame(size_t frame_size,
   1318                   ManagedRegister method_reg,
   1319                   ArrayRef<const ManagedRegister> callee_save_regs,
   1320                   const ManagedRegisterEntrySpills& entry_spills) OVERRIDE;
   1321 
   1322   // Emit code that will remove an activation from the stack.
   1323   void RemoveFrame(size_t frame_size,
   1324                    ArrayRef<const ManagedRegister> callee_save_regs,
   1325                    bool may_suspend) OVERRIDE;
   1326 
   1327   void IncreaseFrameSize(size_t adjust) OVERRIDE;
   1328   void DecreaseFrameSize(size_t adjust) OVERRIDE;
   1329 
   1330   // Store routines.
   1331   void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE;
   1332   void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   1333   void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE;
   1334 
   1335   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE;
   1336 
   1337   void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
   1338                                 FrameOffset fr_offs,
   1339                                 ManagedRegister mscratch) OVERRIDE;
   1340 
   1341   void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE;
   1342 
   1343   void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off,
   1344                      ManagedRegister mscratch) OVERRIDE;
   1345 
   1346   // Load routines.
   1347   void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE;
   1348 
   1349   void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE;
   1350 
   1351   void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE;
   1352 
   1353   void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs,
   1354                bool unpoison_reference) OVERRIDE;
   1355 
   1356   void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE;
   1357 
   1358   void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE;
   1359 
   1360   // Copying routines.
   1361   void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE;
   1362 
   1363   void CopyRawPtrFromThread(FrameOffset fr_offs,
   1364                             ThreadOffset64 thr_offs,
   1365                             ManagedRegister mscratch) OVERRIDE;
   1366 
   1367   void CopyRawPtrToThread(ThreadOffset64 thr_offs,
   1368                           FrameOffset fr_offs,
   1369                           ManagedRegister mscratch) OVERRIDE;
   1370 
   1371   void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE;
   1372 
   1373   void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE;
   1374 
   1375   void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch,
   1376             size_t size) OVERRIDE;
   1377 
   1378   void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src,
   1379             ManagedRegister mscratch, size_t size) OVERRIDE;
   1380 
   1381   void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch,
   1382             size_t size) OVERRIDE;
   1383 
   1384   void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset,
   1385             ManagedRegister mscratch, size_t size) OVERRIDE;
   1386 
   1387   void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset,
   1388             ManagedRegister mscratch, size_t size) OVERRIDE;
   1389 
   1390   void MemoryBarrier(ManagedRegister) OVERRIDE;
   1391 
   1392   // Sign extension.
   1393   void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE;
   1394 
   1395   // Zero extension.
   1396   void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE;
   1397 
   1398   // Exploit fast access in managed code to Thread::Current().
   1399   void GetCurrentThread(ManagedRegister tr) OVERRIDE;
   1400   void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE;
   1401 
   1402   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   1403   // value is null and null_allowed. in_reg holds a possibly stale reference
   1404   // that can be used to avoid loading the handle scope entry to see if the value is
   1405   // null.
   1406   void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
   1407                               ManagedRegister in_reg, bool null_allowed) OVERRIDE;
   1408 
   1409   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   1410   // value is null and null_allowed.
   1411   void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister
   1412                               mscratch, bool null_allowed) OVERRIDE;
   1413 
   1414   // src holds a handle scope entry (Object**) load this into dst.
   1415   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE;
   1416 
   1417   // Heap::VerifyObject on src. In some cases (such as a reference to this) we
   1418   // know that src may not be null.
   1419   void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE;
   1420   void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE;
   1421 
   1422   // Call to address held at [base+offset].
   1423   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   1424   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE;
   1425   void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE;
   1426 
   1427   // Generate code to check if Thread::Current()->exception_ is non-null
   1428   // and branch to a ExceptionSlowPath if it is.
   1429   void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE;
   1430 
   1431   // Emit slow paths queued during assembly and promote short branches to long if needed.
   1432   void FinalizeCode() OVERRIDE;
   1433 
   1434   // Emit branches and finalize all instructions.
   1435   void FinalizeInstructions(const MemoryRegion& region);
   1436 
   1437   // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64,
   1438   // must be used instead of Mips64Label::GetPosition()).
   1439   uint32_t GetLabelLocation(const Mips64Label* label) const;
   1440 
   1441   // Get the final position of a label after local fixup based on the old position
   1442   // recorded before FinalizeCode().
   1443   uint32_t GetAdjustedPosition(uint32_t old_position);
   1444 
   1445   // Note that PC-relative literal loads are handled as pseudo branches because they need very
   1446   // similar relocation and may similarly expand in size to accomodate for larger offsets relative
   1447   // to PC.
   1448   enum BranchCondition {
   1449     kCondLT,
   1450     kCondGE,
   1451     kCondLE,
   1452     kCondGT,
   1453     kCondLTZ,
   1454     kCondGEZ,
   1455     kCondLEZ,
   1456     kCondGTZ,
   1457     kCondEQ,
   1458     kCondNE,
   1459     kCondEQZ,
   1460     kCondNEZ,
   1461     kCondLTU,
   1462     kCondGEU,
   1463     kCondF,    // Floating-point predicate false.
   1464     kCondT,    // Floating-point predicate true.
   1465     kUncond,
   1466   };
   1467   friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs);
   1468 
   1469  private:
   1470   class Branch {
   1471    public:
   1472     enum Type {
   1473       // R6 short branches (can be promoted to long).
   1474       kUncondBranch,
   1475       kCondBranch,
   1476       kCall,
   1477       // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
   1478       kBareUncondBranch,
   1479       kBareCondBranch,
   1480       kBareCall,
   1481       // R2 short branches (can't be promoted to long), delay slots filled manually.
   1482       kR2BareCondBranch,
   1483       // Near label.
   1484       kLabel,
   1485       // Near literals.
   1486       kLiteral,
   1487       kLiteralUnsigned,
   1488       kLiteralLong,
   1489       // Long branches.
   1490       kLongUncondBranch,
   1491       kLongCondBranch,
   1492       kLongCall,
   1493       // Far label.
   1494       kFarLabel,
   1495       // Far literals.
   1496       kFarLiteral,
   1497       kFarLiteralUnsigned,
   1498       kFarLiteralLong,
   1499     };
   1500 
   1501     // Bit sizes of offsets defined as enums to minimize chance of typos.
   1502     enum OffsetBits {
   1503       kOffset16 = 16,
   1504       kOffset18 = 18,
   1505       kOffset21 = 21,
   1506       kOffset23 = 23,
   1507       kOffset28 = 28,
   1508       kOffset32 = 32,
   1509     };
   1510 
   1511     static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
   1512     static constexpr int32_t kMaxBranchLength = 32;
   1513     static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t);
   1514 
   1515     struct BranchInfo {
   1516       // Branch length as a number of 4-byte-long instructions.
   1517       uint32_t length;
   1518       // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's
   1519       // PC-relative offset (or its most significant 16-bit half, which goes first).
   1520       uint32_t instr_offset;
   1521       // Different MIPS instructions with PC-relative offsets apply said offsets to slightly
   1522       // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
   1523       // instructions) from the instruction containing the offset.
   1524       uint32_t pc_org;
   1525       // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch
   1526       // and kBareCondBranch are an exception: use kOffset23 for beqzc/bnezc).
   1527       OffsetBits offset_size;
   1528       // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
   1529       // count.
   1530       int offset_shift;
   1531     };
   1532     static const BranchInfo branch_info_[/* Type */];
   1533 
   1534     // Unconditional branch or call.
   1535     Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare);
   1536     // Conditional branch.
   1537     Branch(bool is_r6,
   1538            uint32_t location,
   1539            uint32_t target,
   1540            BranchCondition condition,
   1541            GpuRegister lhs_reg,
   1542            GpuRegister rhs_reg,
   1543            bool is_bare);
   1544     // Label address (in literal area) or literal.
   1545     Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type);
   1546 
   1547     // Some conditional branches with lhs = rhs are effectively NOPs, while some
   1548     // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs.
   1549     // So, we need a way to identify such branches in order to emit no instructions for them
   1550     // or change them to unconditional.
   1551     static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
   1552     static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs);
   1553 
   1554     static BranchCondition OppositeCondition(BranchCondition cond);
   1555 
   1556     Type GetType() const;
   1557     BranchCondition GetCondition() const;
   1558     GpuRegister GetLeftRegister() const;
   1559     GpuRegister GetRightRegister() const;
   1560     uint32_t GetTarget() const;
   1561     uint32_t GetLocation() const;
   1562     uint32_t GetOldLocation() const;
   1563     uint32_t GetLength() const;
   1564     uint32_t GetOldLength() const;
   1565     uint32_t GetSize() const;
   1566     uint32_t GetOldSize() const;
   1567     uint32_t GetEndLocation() const;
   1568     uint32_t GetOldEndLocation() const;
   1569     bool IsBare() const;
   1570     bool IsLong() const;
   1571     bool IsResolved() const;
   1572 
   1573     // Returns the bit size of the signed offset that the branch instruction can handle.
   1574     OffsetBits GetOffsetSize() const;
   1575 
   1576     // Calculates the distance between two byte locations in the assembler buffer and
   1577     // returns the number of bits needed to represent the distance as a signed integer.
   1578     //
   1579     // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc),
   1580     // and 26 (bc) bits, which are additionally shifted left 2 positions at run time.
   1581     //
   1582     // Composite branches (made of several instructions) with longer reach have 32-bit
   1583     // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first).
   1584     // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end,
   1585     // however. Consider the following implementation of a long unconditional branch, for
   1586     // example:
   1587     //
   1588     //   auipc at, offset_31_16  // at = pc + sign_extend(offset_31_16) << 16
   1589     //   jic   at, offset_15_0   // pc = at + sign_extend(offset_15_0)
   1590     //
   1591     // Both of the above instructions take 16-bit signed offsets as immediate operands.
   1592     // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000
   1593     // due to sign extension. This must be compensated for by incrementing offset_31_16
   1594     // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is
   1595     // 0x7FFF, adding 1 will overflow the positive offset into the negative range.
   1596     // Therefore, the long branch range is something like from PC - 0x80000000 to
   1597     // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side.
   1598     //
   1599     // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special
   1600     // case with the addiu instruction and a 16 bit offset.
   1601     static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
   1602 
   1603     // Resolve a branch when the target is known.
   1604     void Resolve(uint32_t target);
   1605 
   1606     // Relocate a branch by a given delta if needed due to expansion of this or another
   1607     // branch at a given location by this delta (just changes location_ and target_).
   1608     void Relocate(uint32_t expand_location, uint32_t delta);
   1609 
   1610     // If the branch is short, changes its type to long.
   1611     void PromoteToLong();
   1612 
   1613     // If necessary, updates the type by promoting a short branch to a long branch
   1614     // based on the branch location and target. Returns the amount (in bytes) by
   1615     // which the branch size has increased.
   1616     // max_short_distance caps the maximum distance between location_ and target_
   1617     // that is allowed for short branches. This is for debugging/testing purposes.
   1618     // max_short_distance = 0 forces all short branches to become long.
   1619     // Use the implicit default argument when not debugging/testing.
   1620     uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max());
   1621 
   1622     // Returns the location of the instruction(s) containing the offset.
   1623     uint32_t GetOffsetLocation() const;
   1624 
   1625     // Calculates and returns the offset ready for encoding in the branch instruction(s).
   1626     uint32_t GetOffset() const;
   1627 
   1628    private:
   1629     // Completes branch construction by determining and recording its type.
   1630     void InitializeType(Type initial_type, bool is_r6);
   1631     // Helper for the above.
   1632     void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
   1633 
   1634     uint32_t old_location_;      // Offset into assembler buffer in bytes.
   1635     uint32_t location_;          // Offset into assembler buffer in bytes.
   1636     uint32_t target_;            // Offset into assembler buffer in bytes.
   1637 
   1638     GpuRegister lhs_reg_;        // Left-hand side register in conditional branches or
   1639                                  // destination register in literals.
   1640     GpuRegister rhs_reg_;        // Right-hand side register in conditional branches.
   1641     BranchCondition condition_;  // Condition for conditional branches.
   1642 
   1643     Type type_;                  // Current type of the branch.
   1644     Type old_type_;              // Initial type of the branch.
   1645   };
   1646   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   1647   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
   1648 
   1649   void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   1650   void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct);
   1651   void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct);
   1652   void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm);
   1653   void EmitI21(int opcode, GpuRegister rs, uint32_t imm21);
   1654   void EmitI26(int opcode, uint32_t imm26);
   1655   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   1656   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
   1657   void EmitBcondR6(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
   1658   void EmitBcondR2(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint16_t imm16);
   1659   void EmitMsa3R(int operation,
   1660                  int df,
   1661                  VectorRegister wt,
   1662                  VectorRegister ws,
   1663                  VectorRegister wd,
   1664                  int minor_opcode);
   1665   void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode);
   1666   void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode);
   1667   void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df);
   1668   void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
   1669   void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
   1670   void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
   1671 
   1672   void Buncond(Mips64Label* label, bool is_bare);
   1673   void Bcond(Mips64Label* label,
   1674              bool is_r6,
   1675              bool is_bare,
   1676              BranchCondition condition,
   1677              GpuRegister lhs,
   1678              GpuRegister rhs = ZERO);
   1679   void Call(Mips64Label* label, bool is_bare);
   1680   void FinalizeLabeledBranch(Mips64Label* label);
   1681 
   1682   Branch* GetBranch(uint32_t branch_id);
   1683   const Branch* GetBranch(uint32_t branch_id) const;
   1684 
   1685   void EmitLiterals();
   1686   void ReserveJumpTableSpace();
   1687   void EmitJumpTables();
   1688   void PromoteBranches();
   1689   void EmitBranch(Branch* branch);
   1690   void EmitBranches();
   1691   void PatchCFI();
   1692 
   1693   // Emits exception block.
   1694   void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
   1695 
   1696   bool HasMsa() const {
   1697     return has_msa_;
   1698   }
   1699 
   1700   // List of exception blocks to generate at the end of the code cache.
   1701   std::vector<Mips64ExceptionSlowPath> exception_blocks_;
   1702 
   1703   std::vector<Branch> branches_;
   1704 
   1705   // Whether appending instructions at the end of the buffer or overwriting the existing ones.
   1706   bool overwriting_;
   1707   // The current overwrite location.
   1708   uint32_t overwrite_location_;
   1709 
   1710   // Use std::deque<> for literal labels to allow insertions at the end
   1711   // without invalidating pointers and references to existing elements.
   1712   ArenaDeque<Literal> literals_;
   1713   ArenaDeque<Literal> long_literals_;  // 64-bit literals separated for alignment reasons.
   1714 
   1715   // Jump table list.
   1716   ArenaDeque<JumpTable> jump_tables_;
   1717 
   1718   // Data for AdjustedPosition(), see the description there.
   1719   uint32_t last_position_adjustment_;
   1720   uint32_t last_old_position_;
   1721   uint32_t last_branch_id_;
   1722 
   1723   const bool has_msa_;
   1724 
   1725   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
   1726 };
   1727 
   1728 }  // namespace mips64
   1729 }  // namespace art
   1730 
   1731 #endif  // ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_
   1732