1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ 18 #define ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ 19 20 #include <deque> 21 #include <utility> 22 #include <vector> 23 24 #include "arch/mips64/instruction_set_features_mips64.h" 25 #include "base/arena_containers.h" 26 #include "base/enums.h" 27 #include "base/macros.h" 28 #include "base/stl_util_identity.h" 29 #include "constants_mips64.h" 30 #include "globals.h" 31 #include "heap_poisoning.h" 32 #include "managed_register_mips64.h" 33 #include "offsets.h" 34 #include "utils/assembler.h" 35 #include "utils/jni_macro_assembler.h" 36 #include "utils/label.h" 37 38 namespace art { 39 namespace mips64 { 40 41 enum LoadConst64Path { 42 kLoadConst64PathZero = 0x0, 43 kLoadConst64PathOri = 0x1, 44 kLoadConst64PathDaddiu = 0x2, 45 kLoadConst64PathLui = 0x4, 46 kLoadConst64PathLuiOri = 0x8, 47 kLoadConst64PathOriDahi = 0x10, 48 kLoadConst64PathOriDati = 0x20, 49 kLoadConst64PathLuiDahi = 0x40, 50 kLoadConst64PathLuiDati = 0x80, 51 kLoadConst64PathDaddiuDsrlX = 0x100, 52 kLoadConst64PathOriDsllX = 0x200, 53 kLoadConst64PathDaddiuDsllX = 0x400, 54 kLoadConst64PathLuiOriDsllX = 0x800, 55 kLoadConst64PathOriDsllXOri = 0x1000, 56 kLoadConst64PathDaddiuDsllXOri = 0x2000, 57 kLoadConst64PathDaddiuDahi = 0x4000, 58 kLoadConst64PathDaddiuDati = 0x8000, 59 kLoadConst64PathDinsu1 = 0x10000, 60 kLoadConst64PathDinsu2 = 0x20000, 61 kLoadConst64PathCatchAll = 0x40000, 62 kLoadConst64PathAllPaths = 0x7ffff, 63 }; 64 65 template <typename Asm> 66 void TemplateLoadConst32(Asm* a, GpuRegister rd, int32_t value) { 67 if (IsUint<16>(value)) { 68 // Use OR with (unsigned) immediate to encode 16b unsigned int. 69 a->Ori(rd, ZERO, value); 70 } else if (IsInt<16>(value)) { 71 // Use ADD with (signed) immediate to encode 16b signed int. 72 a->Addiu(rd, ZERO, value); 73 } else { 74 // Set 16 most significant bits of value. The "lui" instruction 75 // also clears the 16 least significant bits to zero. 76 a->Lui(rd, value >> 16); 77 if (value & 0xFFFF) { 78 // If the 16 least significant bits are non-zero, set them 79 // here. 80 a->Ori(rd, rd, value); 81 } 82 } 83 } 84 85 static inline int InstrCountForLoadReplicatedConst32(int64_t value) { 86 int32_t x = Low32Bits(value); 87 int32_t y = High32Bits(value); 88 89 if (x == y) { 90 return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3; 91 } 92 93 return INT_MAX; 94 } 95 96 template <typename Asm, typename Rtype, typename Vtype> 97 void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) { 98 int bit31 = (value & UINT64_C(0x80000000)) != 0; 99 int rep32_count = InstrCountForLoadReplicatedConst32(value); 100 101 // Loads with 1 instruction. 102 if (IsUint<16>(value)) { 103 // 64-bit value can be loaded as an unsigned 16-bit number. 104 a->RecordLoadConst64Path(kLoadConst64PathOri); 105 a->Ori(rd, ZERO, value); 106 } else if (IsInt<16>(value)) { 107 // 64-bit value can be loaded as an signed 16-bit number. 108 a->RecordLoadConst64Path(kLoadConst64PathDaddiu); 109 a->Daddiu(rd, ZERO, value); 110 } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) { 111 // 64-bit value can be loaded as an signed 32-bit number which has all 112 // of its 16 least significant bits set to zero. 113 a->RecordLoadConst64Path(kLoadConst64PathLui); 114 a->Lui(rd, value >> 16); 115 } else if (IsInt<32>(value)) { 116 // Loads with 2 instructions. 117 // 64-bit value can be loaded as an signed 32-bit number which has some 118 // or all of its 16 least significant bits set to one. 119 a->RecordLoadConst64Path(kLoadConst64PathLuiOri); 120 a->Lui(rd, value >> 16); 121 a->Ori(rd, rd, value); 122 } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) { 123 // 64-bit value which consists of an unsigned 16-bit value in its 124 // least significant 32-bits, and a signed 16-bit value in its 125 // most significant 32-bits. 126 a->RecordLoadConst64Path(kLoadConst64PathOriDahi); 127 a->Ori(rd, ZERO, value); 128 a->Dahi(rd, value >> 32); 129 } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) { 130 // 64-bit value which consists of an unsigned 16-bit value in its 131 // least significant 48-bits, and a signed 16-bit value in its 132 // most significant 16-bits. 133 a->RecordLoadConst64Path(kLoadConst64PathOriDati); 134 a->Ori(rd, ZERO, value); 135 a->Dati(rd, value >> 48); 136 } else if ((value & 0xFFFF) == 0 && 137 (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) { 138 // 16 LSBs (Least Significant Bits) all set to zero. 139 // 48 MSBs (Most Significant Bits) hold a signed 32-bit value. 140 a->RecordLoadConst64Path(kLoadConst64PathLuiDahi); 141 a->Lui(rd, value >> 16); 142 a->Dahi(rd, (value >> 32) + bit31); 143 } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { 144 // 16 LSBs all set to zero. 145 // 48 MSBs hold a signed value which can't be represented by signed 146 // 32-bit number, and the middle 16 bits are all zero, or all one. 147 a->RecordLoadConst64Path(kLoadConst64PathLuiDati); 148 a->Lui(rd, value >> 16); 149 a->Dati(rd, (value >> 48) + bit31); 150 } else if (IsInt<16>(static_cast<int32_t>(value)) && 151 (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) { 152 // 32 LSBs contain an unsigned 16-bit number. 153 // 32 MSBs contain a signed 16-bit number. 154 a->RecordLoadConst64Path(kLoadConst64PathDaddiuDahi); 155 a->Daddiu(rd, ZERO, value); 156 a->Dahi(rd, (value >> 32) + bit31); 157 } else if (IsInt<16>(static_cast<int32_t>(value)) && 158 ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { 159 // 48 LSBs contain an unsigned 16-bit number. 160 // 16 MSBs contain a signed 16-bit number. 161 a->RecordLoadConst64Path(kLoadConst64PathDaddiuDati); 162 a->Daddiu(rd, ZERO, value); 163 a->Dati(rd, (value >> 48) + bit31); 164 } else if (IsPowerOfTwo(value + UINT64_C(1))) { 165 // 64-bit values which have their "n" MSBs set to one, and their 166 // "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64. 167 int shift_cnt = 64 - CTZ(value + UINT64_C(1)); 168 a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsrlX); 169 a->Daddiu(rd, ZERO, -1); 170 if (shift_cnt < 32) { 171 a->Dsrl(rd, rd, shift_cnt); 172 } else { 173 a->Dsrl32(rd, rd, shift_cnt & 31); 174 } 175 } else { 176 int shift_cnt = CTZ(value); 177 int64_t tmp = value >> shift_cnt; 178 a->RecordLoadConst64Path(kLoadConst64PathOriDsllX); 179 if (IsUint<16>(tmp)) { 180 // Value can be computed by loading a 16-bit unsigned value, and 181 // then shifting left. 182 a->Ori(rd, ZERO, tmp); 183 if (shift_cnt < 32) { 184 a->Dsll(rd, rd, shift_cnt); 185 } else { 186 a->Dsll32(rd, rd, shift_cnt & 31); 187 } 188 } else if (IsInt<16>(tmp)) { 189 // Value can be computed by loading a 16-bit signed value, and 190 // then shifting left. 191 a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllX); 192 a->Daddiu(rd, ZERO, tmp); 193 if (shift_cnt < 32) { 194 a->Dsll(rd, rd, shift_cnt); 195 } else { 196 a->Dsll32(rd, rd, shift_cnt & 31); 197 } 198 } else if (rep32_count < 3) { 199 // Value being loaded has 32 LSBs equal to the 32 MSBs, and the 200 // value loaded into the 32 LSBs can be loaded with a single 201 // MIPS instruction. 202 a->LoadConst32(rd, value); 203 a->Dinsu(rd, rd, 32, 32); 204 a->RecordLoadConst64Path(kLoadConst64PathDinsu1); 205 } else if (IsInt<32>(tmp)) { 206 // Loads with 3 instructions. 207 // Value can be computed by loading a 32-bit signed value, and 208 // then shifting left. 209 a->RecordLoadConst64Path(kLoadConst64PathLuiOriDsllX); 210 a->Lui(rd, tmp >> 16); 211 a->Ori(rd, rd, tmp); 212 if (shift_cnt < 32) { 213 a->Dsll(rd, rd, shift_cnt); 214 } else { 215 a->Dsll32(rd, rd, shift_cnt & 31); 216 } 217 } else { 218 shift_cnt = 16 + CTZ(value >> 16); 219 tmp = value >> shift_cnt; 220 if (IsUint<16>(tmp)) { 221 // Value can be computed by loading a 16-bit unsigned value, 222 // shifting left, and "or"ing in another 16-bit unsigned value. 223 a->RecordLoadConst64Path(kLoadConst64PathOriDsllXOri); 224 a->Ori(rd, ZERO, tmp); 225 if (shift_cnt < 32) { 226 a->Dsll(rd, rd, shift_cnt); 227 } else { 228 a->Dsll32(rd, rd, shift_cnt & 31); 229 } 230 a->Ori(rd, rd, value); 231 } else if (IsInt<16>(tmp)) { 232 // Value can be computed by loading a 16-bit signed value, 233 // shifting left, and "or"ing in a 16-bit unsigned value. 234 a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllXOri); 235 a->Daddiu(rd, ZERO, tmp); 236 if (shift_cnt < 32) { 237 a->Dsll(rd, rd, shift_cnt); 238 } else { 239 a->Dsll32(rd, rd, shift_cnt & 31); 240 } 241 a->Ori(rd, rd, value); 242 } else if (rep32_count < 4) { 243 // Value being loaded has 32 LSBs equal to the 32 MSBs, and the 244 // value in the 32 LSBs requires 2 MIPS instructions to load. 245 a->LoadConst32(rd, value); 246 a->Dinsu(rd, rd, 32, 32); 247 a->RecordLoadConst64Path(kLoadConst64PathDinsu2); 248 } else { 249 // Loads with 3-4 instructions. 250 // Catch-all case to get any other 64-bit values which aren't 251 // handled by special cases above. 252 uint64_t tmp2 = value; 253 a->RecordLoadConst64Path(kLoadConst64PathCatchAll); 254 a->LoadConst32(rd, value); 255 if (bit31) { 256 tmp2 += UINT64_C(0x100000000); 257 } 258 if (((tmp2 >> 32) & 0xFFFF) != 0) { 259 a->Dahi(rd, tmp2 >> 32); 260 } 261 if (tmp2 & UINT64_C(0x800000000000)) { 262 tmp2 += UINT64_C(0x1000000000000); 263 } 264 if ((tmp2 >> 48) != 0) { 265 a->Dati(rd, tmp2 >> 48); 266 } 267 } 268 } 269 } 270 } 271 272 static constexpr size_t kMips64HalfwordSize = 2; 273 static constexpr size_t kMips64WordSize = 4; 274 static constexpr size_t kMips64DoublewordSize = 8; 275 276 enum LoadOperandType { 277 kLoadSignedByte, 278 kLoadUnsignedByte, 279 kLoadSignedHalfword, 280 kLoadUnsignedHalfword, 281 kLoadWord, 282 kLoadUnsignedWord, 283 kLoadDoubleword, 284 kLoadQuadword 285 }; 286 287 enum StoreOperandType { 288 kStoreByte, 289 kStoreHalfword, 290 kStoreWord, 291 kStoreDoubleword, 292 kStoreQuadword 293 }; 294 295 // Used to test the values returned by ClassS/ClassD. 296 enum FPClassMaskType { 297 kSignalingNaN = 0x001, 298 kQuietNaN = 0x002, 299 kNegativeInfinity = 0x004, 300 kNegativeNormal = 0x008, 301 kNegativeSubnormal = 0x010, 302 kNegativeZero = 0x020, 303 kPositiveInfinity = 0x040, 304 kPositiveNormal = 0x080, 305 kPositiveSubnormal = 0x100, 306 kPositiveZero = 0x200, 307 }; 308 309 class Mips64Label : public Label { 310 public: 311 Mips64Label() : prev_branch_id_plus_one_(0) {} 312 313 Mips64Label(Mips64Label&& src) 314 : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {} 315 316 private: 317 uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any. 318 319 friend class Mips64Assembler; 320 DISALLOW_COPY_AND_ASSIGN(Mips64Label); 321 }; 322 323 // Assembler literal is a value embedded in code, retrieved using a PC-relative load. 324 class Literal { 325 public: 326 static constexpr size_t kMaxSize = 8; 327 328 Literal(uint32_t size, const uint8_t* data) 329 : label_(), size_(size) { 330 DCHECK_LE(size, Literal::kMaxSize); 331 memcpy(data_, data, size); 332 } 333 334 template <typename T> 335 T GetValue() const { 336 DCHECK_EQ(size_, sizeof(T)); 337 T value; 338 memcpy(&value, data_, sizeof(T)); 339 return value; 340 } 341 342 uint32_t GetSize() const { 343 return size_; 344 } 345 346 const uint8_t* GetData() const { 347 return data_; 348 } 349 350 Mips64Label* GetLabel() { 351 return &label_; 352 } 353 354 const Mips64Label* GetLabel() const { 355 return &label_; 356 } 357 358 private: 359 Mips64Label label_; 360 const uint32_t size_; 361 uint8_t data_[kMaxSize]; 362 363 DISALLOW_COPY_AND_ASSIGN(Literal); 364 }; 365 366 // Jump table: table of labels emitted after the code and before the literals. Similar to literals. 367 class JumpTable { 368 public: 369 explicit JumpTable(std::vector<Mips64Label*>&& labels) 370 : label_(), labels_(std::move(labels)) { 371 } 372 373 size_t GetSize() const { 374 return labels_.size() * sizeof(uint32_t); 375 } 376 377 const std::vector<Mips64Label*>& GetData() const { 378 return labels_; 379 } 380 381 Mips64Label* GetLabel() { 382 return &label_; 383 } 384 385 const Mips64Label* GetLabel() const { 386 return &label_; 387 } 388 389 private: 390 Mips64Label label_; 391 std::vector<Mips64Label*> labels_; 392 393 DISALLOW_COPY_AND_ASSIGN(JumpTable); 394 }; 395 396 // Slowpath entered when Thread::Current()->_exception is non-null. 397 class Mips64ExceptionSlowPath { 398 public: 399 explicit Mips64ExceptionSlowPath(Mips64ManagedRegister scratch, size_t stack_adjust) 400 : scratch_(scratch), stack_adjust_(stack_adjust) {} 401 402 Mips64ExceptionSlowPath(Mips64ExceptionSlowPath&& src) 403 : scratch_(src.scratch_), 404 stack_adjust_(src.stack_adjust_), 405 exception_entry_(std::move(src.exception_entry_)) {} 406 407 private: 408 Mips64Label* Entry() { return &exception_entry_; } 409 const Mips64ManagedRegister scratch_; 410 const size_t stack_adjust_; 411 Mips64Label exception_entry_; 412 413 friend class Mips64Assembler; 414 DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); 415 }; 416 417 class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> { 418 public: 419 using JNIBase = JNIMacroAssembler<PointerSize::k64>; 420 421 explicit Mips64Assembler(ArenaAllocator* allocator, 422 const Mips64InstructionSetFeatures* instruction_set_features = nullptr) 423 : Assembler(allocator), 424 overwriting_(false), 425 overwrite_location_(0), 426 literals_(allocator->Adapter(kArenaAllocAssembler)), 427 long_literals_(allocator->Adapter(kArenaAllocAssembler)), 428 jump_tables_(allocator->Adapter(kArenaAllocAssembler)), 429 last_position_adjustment_(0), 430 last_old_position_(0), 431 last_branch_id_(0), 432 has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) { 433 cfi().DelayEmittingAdvancePCs(); 434 } 435 436 virtual ~Mips64Assembler() { 437 for (auto& branch : branches_) { 438 CHECK(branch.IsResolved()); 439 } 440 } 441 442 size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } 443 DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } 444 445 // Emit Machine Instructions. 446 void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); 447 void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); 448 void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 449 void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 450 void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt); 451 void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 452 453 void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 454 void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 455 void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 456 void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 457 void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 458 void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); 459 void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 460 void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 461 void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 462 void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 463 void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 464 void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 465 466 void And(GpuRegister rd, GpuRegister rs, GpuRegister rt); 467 void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16); 468 void Or(GpuRegister rd, GpuRegister rs, GpuRegister rt); 469 void Ori(GpuRegister rt, GpuRegister rs, uint16_t imm16); 470 void Xor(GpuRegister rd, GpuRegister rs, GpuRegister rt); 471 void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16); 472 void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt); 473 474 void Bitswap(GpuRegister rd, GpuRegister rt); 475 void Dbitswap(GpuRegister rd, GpuRegister rt); // MIPS64 476 void Seb(GpuRegister rd, GpuRegister rt); 477 void Seh(GpuRegister rd, GpuRegister rt); 478 void Dsbh(GpuRegister rd, GpuRegister rt); // MIPS64 479 void Dshd(GpuRegister rd, GpuRegister rt); // MIPS64 480 void Dext(GpuRegister rs, GpuRegister rt, int pos, int size); // MIPS64 481 void Ins(GpuRegister rt, GpuRegister rs, int pos, int size); 482 void Dins(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 483 void Dinsm(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 484 void Dinsu(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 485 void DblIns(GpuRegister rt, GpuRegister rs, int pos, int size); // MIPS64 486 void Lsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); 487 void Dlsa(GpuRegister rd, GpuRegister rs, GpuRegister rt, int saPlusOne); // MIPS64 488 void Wsbh(GpuRegister rd, GpuRegister rt); 489 void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); 490 void Scd(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 491 void Ll(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); 492 void Lld(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); // MIPS64 493 494 void Sll(GpuRegister rd, GpuRegister rt, int shamt); 495 void Srl(GpuRegister rd, GpuRegister rt, int shamt); 496 void Rotr(GpuRegister rd, GpuRegister rt, int shamt); 497 void Sra(GpuRegister rd, GpuRegister rt, int shamt); 498 void Sllv(GpuRegister rd, GpuRegister rt, GpuRegister rs); 499 void Srlv(GpuRegister rd, GpuRegister rt, GpuRegister rs); 500 void Rotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs); 501 void Srav(GpuRegister rd, GpuRegister rt, GpuRegister rs); 502 void Dsll(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 503 void Dsrl(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 504 void Drotr(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 505 void Dsra(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 506 void Dsll32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 507 void Dsrl32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 508 void Drotr32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 509 void Dsra32(GpuRegister rd, GpuRegister rt, int shamt); // MIPS64 510 void Dsllv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 511 void Dsrlv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 512 void Drotrv(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 513 void Dsrav(GpuRegister rd, GpuRegister rt, GpuRegister rs); // MIPS64 514 515 void Lb(GpuRegister rt, GpuRegister rs, uint16_t imm16); 516 void Lh(GpuRegister rt, GpuRegister rs, uint16_t imm16); 517 void Lw(GpuRegister rt, GpuRegister rs, uint16_t imm16); 518 void Ld(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 519 void Lbu(GpuRegister rt, GpuRegister rs, uint16_t imm16); 520 void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); 521 void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 522 void Lwpc(GpuRegister rs, uint32_t imm19); 523 void Lwupc(GpuRegister rs, uint32_t imm19); // MIPS64 524 void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 525 void Lui(GpuRegister rt, uint16_t imm16); 526 void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16); 527 void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 528 void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 529 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 530 void Sync(uint32_t stype); 531 532 void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16); 533 void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16); 534 void Sw(GpuRegister rt, GpuRegister rs, uint16_t imm16); 535 void Sd(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 536 537 void Slt(GpuRegister rd, GpuRegister rs, GpuRegister rt); 538 void Sltu(GpuRegister rd, GpuRegister rs, GpuRegister rt); 539 void Slti(GpuRegister rt, GpuRegister rs, uint16_t imm16); 540 void Sltiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); 541 void Seleqz(GpuRegister rd, GpuRegister rs, GpuRegister rt); 542 void Selnez(GpuRegister rd, GpuRegister rs, GpuRegister rt); 543 void Clz(GpuRegister rd, GpuRegister rs); 544 void Clo(GpuRegister rd, GpuRegister rs); 545 void Dclz(GpuRegister rd, GpuRegister rs); // MIPS64 546 void Dclo(GpuRegister rd, GpuRegister rs); // MIPS64 547 548 void Jalr(GpuRegister rd, GpuRegister rs); 549 void Jalr(GpuRegister rs); 550 void Jr(GpuRegister rs); 551 void Auipc(GpuRegister rs, uint16_t imm16); 552 void Addiupc(GpuRegister rs, uint32_t imm19); 553 void Bc(uint32_t imm26); 554 void Balc(uint32_t imm26); 555 void Jic(GpuRegister rt, uint16_t imm16); 556 void Jialc(GpuRegister rt, uint16_t imm16); 557 void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); 558 void Bltzc(GpuRegister rt, uint16_t imm16); 559 void Bgtzc(GpuRegister rt, uint16_t imm16); 560 void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16); 561 void Bgezc(GpuRegister rt, uint16_t imm16); 562 void Blezc(GpuRegister rt, uint16_t imm16); 563 void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); 564 void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); 565 void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16); 566 void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); 567 void Beqzc(GpuRegister rs, uint32_t imm21); 568 void Bnezc(GpuRegister rs, uint32_t imm21); 569 void Bc1eqz(FpuRegister ft, uint16_t imm16); 570 void Bc1nez(FpuRegister ft, uint16_t imm16); 571 void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2 572 void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2 573 void Beqz(GpuRegister rt, uint16_t imm16); // R2 574 void Bnez(GpuRegister rt, uint16_t imm16); // R2 575 void Bltz(GpuRegister rt, uint16_t imm16); // R2 576 void Bgez(GpuRegister rt, uint16_t imm16); // R2 577 void Blez(GpuRegister rt, uint16_t imm16); // R2 578 void Bgtz(GpuRegister rt, uint16_t imm16); // R2 579 580 void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 581 void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 582 void MulS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 583 void DivS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 584 void AddD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 585 void SubD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 586 void MulD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 587 void DivD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 588 void SqrtS(FpuRegister fd, FpuRegister fs); 589 void SqrtD(FpuRegister fd, FpuRegister fs); 590 void AbsS(FpuRegister fd, FpuRegister fs); 591 void AbsD(FpuRegister fd, FpuRegister fs); 592 void MovS(FpuRegister fd, FpuRegister fs); 593 void MovD(FpuRegister fd, FpuRegister fs); 594 void NegS(FpuRegister fd, FpuRegister fs); 595 void NegD(FpuRegister fd, FpuRegister fs); 596 void RoundLS(FpuRegister fd, FpuRegister fs); 597 void RoundLD(FpuRegister fd, FpuRegister fs); 598 void RoundWS(FpuRegister fd, FpuRegister fs); 599 void RoundWD(FpuRegister fd, FpuRegister fs); 600 void TruncLS(FpuRegister fd, FpuRegister fs); 601 void TruncLD(FpuRegister fd, FpuRegister fs); 602 void TruncWS(FpuRegister fd, FpuRegister fs); 603 void TruncWD(FpuRegister fd, FpuRegister fs); 604 void CeilLS(FpuRegister fd, FpuRegister fs); 605 void CeilLD(FpuRegister fd, FpuRegister fs); 606 void CeilWS(FpuRegister fd, FpuRegister fs); 607 void CeilWD(FpuRegister fd, FpuRegister fs); 608 void FloorLS(FpuRegister fd, FpuRegister fs); 609 void FloorLD(FpuRegister fd, FpuRegister fs); 610 void FloorWS(FpuRegister fd, FpuRegister fs); 611 void FloorWD(FpuRegister fd, FpuRegister fs); 612 void SelS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 613 void SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 614 void SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 615 void SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 616 void SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 617 void SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 618 void RintS(FpuRegister fd, FpuRegister fs); 619 void RintD(FpuRegister fd, FpuRegister fs); 620 void ClassS(FpuRegister fd, FpuRegister fs); 621 void ClassD(FpuRegister fd, FpuRegister fs); 622 void MinS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 623 void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 624 void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 625 void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 626 void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 627 void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 628 void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 629 void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 630 void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 631 void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 632 void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 633 void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 634 void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 635 void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); 636 void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 637 void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 638 void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 639 void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 640 void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 641 void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 642 void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 643 void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 644 void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 645 void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); 646 647 void Cvtsw(FpuRegister fd, FpuRegister fs); 648 void Cvtdw(FpuRegister fd, FpuRegister fs); 649 void Cvtsd(FpuRegister fd, FpuRegister fs); 650 void Cvtds(FpuRegister fd, FpuRegister fs); 651 void Cvtsl(FpuRegister fd, FpuRegister fs); 652 void Cvtdl(FpuRegister fd, FpuRegister fs); 653 654 void Mfc1(GpuRegister rt, FpuRegister fs); 655 void Mfhc1(GpuRegister rt, FpuRegister fs); 656 void Mtc1(GpuRegister rt, FpuRegister fs); 657 void Mthc1(GpuRegister rt, FpuRegister fs); 658 void Dmfc1(GpuRegister rt, FpuRegister fs); // MIPS64 659 void Dmtc1(GpuRegister rt, FpuRegister fs); // MIPS64 660 void Lwc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); 661 void Ldc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); 662 void Swc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); 663 void Sdc1(FpuRegister ft, GpuRegister rs, uint16_t imm16); 664 665 void Break(); 666 void Nop(); 667 void Move(GpuRegister rd, GpuRegister rs); 668 void Clear(GpuRegister rd); 669 void Not(GpuRegister rd, GpuRegister rs); 670 671 // MSA instructions. 672 void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); 673 void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); 674 void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); 675 void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); 676 677 void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 678 void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 679 void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 680 void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 681 void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 682 void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 683 void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 684 void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 685 void Asub_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 686 void Asub_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 687 void Asub_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 688 void Asub_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 689 void Asub_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 690 void Asub_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 691 void Asub_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 692 void Asub_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 693 void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 694 void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 695 void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 696 void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 697 void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 698 void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 699 void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 700 void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 701 void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 702 void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 703 void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 704 void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 705 void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 706 void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 707 void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 708 void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 709 void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 710 void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 711 void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 712 void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 713 void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 714 void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 715 void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 716 void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 717 void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 718 void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 719 void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 720 void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 721 void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 722 void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 723 void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 724 void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 725 void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 726 void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 727 void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 728 void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 729 void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 730 void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 731 void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 732 void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 733 void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 734 void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 735 void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 736 void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 737 void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 738 void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 739 void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 740 void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 741 void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 742 void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 743 void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 744 void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 745 void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 746 void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 747 void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 748 void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 749 750 void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 751 void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 752 void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 753 void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 754 void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 755 void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 756 void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 757 void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 758 void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 759 void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 760 void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 761 void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 762 763 void Ffint_sW(VectorRegister wd, VectorRegister ws); 764 void Ffint_sD(VectorRegister wd, VectorRegister ws); 765 void Ftint_sW(VectorRegister wd, VectorRegister ws); 766 void Ftint_sD(VectorRegister wd, VectorRegister ws); 767 768 void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 769 void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 770 void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 771 void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 772 void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 773 void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 774 void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 775 void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 776 void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 777 void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 778 void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 779 void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 780 781 // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). 782 void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); 783 void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); 784 void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); 785 void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); 786 void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); 787 void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); 788 void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); 789 void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); 790 void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); 791 void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); 792 void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); 793 void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); 794 795 void MoveV(VectorRegister wd, VectorRegister ws); 796 void SplatiB(VectorRegister wd, VectorRegister ws, int n4); 797 void SplatiH(VectorRegister wd, VectorRegister ws, int n3); 798 void SplatiW(VectorRegister wd, VectorRegister ws, int n2); 799 void SplatiD(VectorRegister wd, VectorRegister ws, int n1); 800 void Copy_sB(GpuRegister rd, VectorRegister ws, int n4); 801 void Copy_sH(GpuRegister rd, VectorRegister ws, int n3); 802 void Copy_sW(GpuRegister rd, VectorRegister ws, int n2); 803 void Copy_sD(GpuRegister rd, VectorRegister ws, int n1); 804 void Copy_uB(GpuRegister rd, VectorRegister ws, int n4); 805 void Copy_uH(GpuRegister rd, VectorRegister ws, int n3); 806 void Copy_uW(GpuRegister rd, VectorRegister ws, int n2); 807 void InsertB(VectorRegister wd, GpuRegister rs, int n4); 808 void InsertH(VectorRegister wd, GpuRegister rs, int n3); 809 void InsertW(VectorRegister wd, GpuRegister rs, int n2); 810 void InsertD(VectorRegister wd, GpuRegister rs, int n1); 811 void FillB(VectorRegister wd, GpuRegister rs); 812 void FillH(VectorRegister wd, GpuRegister rs); 813 void FillW(VectorRegister wd, GpuRegister rs); 814 void FillD(VectorRegister wd, GpuRegister rs); 815 816 void LdiB(VectorRegister wd, int imm8); 817 void LdiH(VectorRegister wd, int imm10); 818 void LdiW(VectorRegister wd, int imm10); 819 void LdiD(VectorRegister wd, int imm10); 820 void LdB(VectorRegister wd, GpuRegister rs, int offset); 821 void LdH(VectorRegister wd, GpuRegister rs, int offset); 822 void LdW(VectorRegister wd, GpuRegister rs, int offset); 823 void LdD(VectorRegister wd, GpuRegister rs, int offset); 824 void StB(VectorRegister wd, GpuRegister rs, int offset); 825 void StH(VectorRegister wd, GpuRegister rs, int offset); 826 void StW(VectorRegister wd, GpuRegister rs, int offset); 827 void StD(VectorRegister wd, GpuRegister rs, int offset); 828 829 void IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 830 void IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 831 void IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 832 void IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 833 void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 834 void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 835 void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 836 void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 837 void IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 838 void IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 839 void IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 840 void IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 841 void IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 842 void IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 843 void IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 844 void IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 845 846 void MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 847 void MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 848 void MaddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 849 void MaddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 850 void MsubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); 851 void MsubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 852 void MsubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 853 void MsubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 854 void FmaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 855 void FmaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 856 void FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 857 void FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 858 859 void Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 860 void Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 861 void Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 862 void Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); 863 void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); 864 void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); 865 866 // Helper for replicating floating point value in all destination elements. 867 void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double); 868 869 // Higher level composite instructions. 870 int InstrCountForLoadReplicatedConst32(int64_t); 871 void LoadConst32(GpuRegister rd, int32_t value); 872 void LoadConst64(GpuRegister rd, int64_t value); // MIPS64 873 874 // This function is only used for testing purposes. 875 void RecordLoadConst64Path(int value); 876 877 void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value); 878 void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 879 880 // 881 // Heap poisoning. 882 // 883 884 // Poison a heap reference contained in `src` and store it in `dst`. 885 void PoisonHeapReference(GpuRegister dst, GpuRegister src) { 886 // dst = -src. 887 // Negate the 32-bit ref. 888 Dsubu(dst, ZERO, src); 889 // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. 890 Dext(dst, dst, 0, 32); 891 } 892 // Poison a heap reference contained in `reg`. 893 void PoisonHeapReference(GpuRegister reg) { 894 // reg = -reg. 895 PoisonHeapReference(reg, reg); 896 } 897 // Unpoison a heap reference contained in `reg`. 898 void UnpoisonHeapReference(GpuRegister reg) { 899 // reg = -reg. 900 // Negate the 32-bit ref. 901 Dsubu(reg, ZERO, reg); 902 // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. 903 Dext(reg, reg, 0, 32); 904 } 905 // Poison a heap reference contained in `reg` if heap poisoning is enabled. 906 void MaybePoisonHeapReference(GpuRegister reg) { 907 if (kPoisonHeapReferences) { 908 PoisonHeapReference(reg); 909 } 910 } 911 // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. 912 void MaybeUnpoisonHeapReference(GpuRegister reg) { 913 if (kPoisonHeapReferences) { 914 UnpoisonHeapReference(reg); 915 } 916 } 917 918 void Bind(Label* label) OVERRIDE { 919 Bind(down_cast<Mips64Label*>(label)); 920 } 921 void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { 922 UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; 923 } 924 925 void Bind(Mips64Label* label); 926 927 // Don't warn about a different virtual Bind/Jump in the base class. 928 using JNIBase::Bind; 929 using JNIBase::Jump; 930 931 // Create a new label that can be used with Jump/Bind calls. 932 std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE { 933 LOG(FATAL) << "Not implemented on MIPS64"; 934 UNREACHABLE(); 935 } 936 // Emit an unconditional jump to the label. 937 void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { 938 LOG(FATAL) << "Not implemented on MIPS64"; 939 UNREACHABLE(); 940 } 941 // Emit a conditional jump to the label by applying a unary condition test to the register. 942 void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED, 943 JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED, 944 ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE { 945 LOG(FATAL) << "Not implemented on MIPS64"; 946 UNREACHABLE(); 947 } 948 949 // Code at this offset will serve as the target for the Jump call. 950 void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { 951 LOG(FATAL) << "Not implemented on MIPS64"; 952 UNREACHABLE(); 953 } 954 955 // Create a new literal with a given value. 956 // NOTE: Force the template parameter to be explicitly specified. 957 template <typename T> 958 Literal* NewLiteral(typename Identity<T>::type value) { 959 static_assert(std::is_integral<T>::value, "T must be an integral type."); 960 return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); 961 } 962 963 // Load label address using PC-relative loads. To be used with data labels in the literal / 964 // jump table area only and not with regular code labels. 965 void LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label); 966 967 // Create a new literal with the given data. 968 Literal* NewLiteral(size_t size, const uint8_t* data); 969 970 // Load literal using PC-relative loads. 971 void LoadLiteral(GpuRegister dest_reg, LoadOperandType load_type, Literal* literal); 972 973 // Create a jump table for the given labels that will be emitted when finalizing. 974 // When the table is emitted, offsets will be relative to the location of the table. 975 // The table location is determined by the location of its label (the label precedes 976 // the table data) and should be loaded using LoadLabelAddress(). 977 JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels); 978 979 // When `is_bare` is false, the branches will promote to long (if the range 980 // of the individual branch instruction is insufficient) and the delay/ 981 // forbidden slots will be taken care of. 982 // Use `is_bare = false` when the branch target may be out of reach of the 983 // individual branch instruction. IOW, this is for general purpose use. 984 // 985 // When `is_bare` is true, just the branch instructions will be generated 986 // leaving delay/forbidden slot filling up to the caller and the branches 987 // won't promote to long if the range is insufficient (you'll get a 988 // compilation error when the range is exceeded). 989 // Use `is_bare = true` when the branch target is known to be within reach 990 // of the individual branch instruction. This is intended for small local 991 // optimizations around delay/forbidden slots. 992 // Also prefer using `is_bare = true` if the code near the branch is to be 993 // patched or analyzed at run time (e.g. introspection) to 994 // - show the intent and 995 // - fail during compilation rather than during patching/execution if the 996 // bare branch range is insufficent but the code size and layout are 997 // expected to remain unchanged 998 // 999 // R6 compact branches without delay/forbidden slots. 1000 void Bc(Mips64Label* label, bool is_bare = false); 1001 void Balc(Mips64Label* label, bool is_bare = false); 1002 // R6 compact branches with forbidden slots. 1003 void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1004 void Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare = false); 1005 void Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare = false); 1006 void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1007 void Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare = false); 1008 void Blezc(GpuRegister rt, Mips64Label* label, bool is_bare = false); 1009 void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1010 void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1011 void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1012 void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); 1013 void Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare = false); 1014 void Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare = false); 1015 // R6 branches with delay slots. 1016 void Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare = false); 1017 void Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare = false); 1018 // R2 branches with delay slots that are also available on R6. 1019 // The `is_bare` parameter exists and is checked in these branches only to 1020 // prevent programming mistakes. These branches never promote to long, not 1021 // even if `is_bare` is false. 1022 void Bltz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1023 void Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1024 void Bgez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1025 void Blez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1026 void Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1027 void Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2 1028 void Beqz(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2 1029 void Bnez(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2 1030 1031 void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); 1032 void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword); 1033 // If element_size_shift is negative at entry, its value will be calculated based on the offset. 1034 void AdjustBaseOffsetAndElementSizeShift(GpuRegister& base, 1035 int32_t& offset, 1036 int& element_size_shift); 1037 1038 private: 1039 // This will be used as an argument for loads/stores 1040 // when there is no need for implicit null checks. 1041 struct NoImplicitNullChecker { 1042 void operator()() const {} 1043 }; 1044 1045 public: 1046 template <typename ImplicitNullChecker = NoImplicitNullChecker> 1047 void StoreConstToOffset(StoreOperandType type, 1048 int64_t value, 1049 GpuRegister base, 1050 int32_t offset, 1051 GpuRegister temp, 1052 ImplicitNullChecker null_checker = NoImplicitNullChecker()) { 1053 // We permit `base` and `temp` to coincide (however, we check that neither is AT), 1054 // in which case the `base` register may be overwritten in the process. 1055 CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. 1056 AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); 1057 GpuRegister reg; 1058 // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` 1059 // to load and hold the value but we can use AT instead as AT hasn't been used yet. 1060 // Otherwise, `temp` can be used for the value. And if `temp` is the same as the 1061 // original `base` (that is, `base` prior to the adjustment), the original `base` 1062 // register will be overwritten. 1063 if (base == temp) { 1064 temp = AT; 1065 } 1066 1067 if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) { 1068 if (value == 0) { 1069 reg = ZERO; 1070 } else { 1071 reg = temp; 1072 LoadConst64(reg, value); 1073 } 1074 Sd(reg, base, offset); 1075 null_checker(); 1076 } else { 1077 uint32_t low = Low32Bits(value); 1078 uint32_t high = High32Bits(value); 1079 if (low == 0) { 1080 reg = ZERO; 1081 } else { 1082 reg = temp; 1083 LoadConst32(reg, low); 1084 } 1085 switch (type) { 1086 case kStoreByte: 1087 Sb(reg, base, offset); 1088 break; 1089 case kStoreHalfword: 1090 Sh(reg, base, offset); 1091 break; 1092 case kStoreWord: 1093 Sw(reg, base, offset); 1094 break; 1095 case kStoreDoubleword: 1096 // not aligned to kMips64DoublewordSize 1097 CHECK_ALIGNED(offset, kMips64WordSize); 1098 Sw(reg, base, offset); 1099 null_checker(); 1100 if (high == 0) { 1101 reg = ZERO; 1102 } else { 1103 reg = temp; 1104 if (high != low) { 1105 LoadConst32(reg, high); 1106 } 1107 } 1108 Sw(reg, base, offset + kMips64WordSize); 1109 break; 1110 default: 1111 LOG(FATAL) << "UNREACHABLE"; 1112 } 1113 if (type != kStoreDoubleword) { 1114 null_checker(); 1115 } 1116 } 1117 } 1118 1119 template <typename ImplicitNullChecker = NoImplicitNullChecker> 1120 void LoadFromOffset(LoadOperandType type, 1121 GpuRegister reg, 1122 GpuRegister base, 1123 int32_t offset, 1124 ImplicitNullChecker null_checker = NoImplicitNullChecker()) { 1125 AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); 1126 1127 switch (type) { 1128 case kLoadSignedByte: 1129 Lb(reg, base, offset); 1130 break; 1131 case kLoadUnsignedByte: 1132 Lbu(reg, base, offset); 1133 break; 1134 case kLoadSignedHalfword: 1135 Lh(reg, base, offset); 1136 break; 1137 case kLoadUnsignedHalfword: 1138 Lhu(reg, base, offset); 1139 break; 1140 case kLoadWord: 1141 CHECK_ALIGNED(offset, kMips64WordSize); 1142 Lw(reg, base, offset); 1143 break; 1144 case kLoadUnsignedWord: 1145 CHECK_ALIGNED(offset, kMips64WordSize); 1146 Lwu(reg, base, offset); 1147 break; 1148 case kLoadDoubleword: 1149 if (!IsAligned<kMips64DoublewordSize>(offset)) { 1150 CHECK_ALIGNED(offset, kMips64WordSize); 1151 Lwu(reg, base, offset); 1152 null_checker(); 1153 Lwu(TMP2, base, offset + kMips64WordSize); 1154 Dinsu(reg, TMP2, 32, 32); 1155 } else { 1156 Ld(reg, base, offset); 1157 null_checker(); 1158 } 1159 break; 1160 default: 1161 LOG(FATAL) << "UNREACHABLE"; 1162 } 1163 if (type != kLoadDoubleword) { 1164 null_checker(); 1165 } 1166 } 1167 1168 template <typename ImplicitNullChecker = NoImplicitNullChecker> 1169 void LoadFpuFromOffset(LoadOperandType type, 1170 FpuRegister reg, 1171 GpuRegister base, 1172 int32_t offset, 1173 ImplicitNullChecker null_checker = NoImplicitNullChecker()) { 1174 int element_size_shift = -1; 1175 if (type != kLoadQuadword) { 1176 AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); 1177 } else { 1178 AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); 1179 } 1180 1181 switch (type) { 1182 case kLoadWord: 1183 CHECK_ALIGNED(offset, kMips64WordSize); 1184 Lwc1(reg, base, offset); 1185 null_checker(); 1186 break; 1187 case kLoadDoubleword: 1188 if (!IsAligned<kMips64DoublewordSize>(offset)) { 1189 CHECK_ALIGNED(offset, kMips64WordSize); 1190 Lwc1(reg, base, offset); 1191 null_checker(); 1192 Lw(TMP2, base, offset + kMips64WordSize); 1193 Mthc1(TMP2, reg); 1194 } else { 1195 Ldc1(reg, base, offset); 1196 null_checker(); 1197 } 1198 break; 1199 case kLoadQuadword: 1200 switch (element_size_shift) { 1201 case TIMES_1: LdB(static_cast<VectorRegister>(reg), base, offset); break; 1202 case TIMES_2: LdH(static_cast<VectorRegister>(reg), base, offset); break; 1203 case TIMES_4: LdW(static_cast<VectorRegister>(reg), base, offset); break; 1204 case TIMES_8: LdD(static_cast<VectorRegister>(reg), base, offset); break; 1205 default: 1206 LOG(FATAL) << "UNREACHABLE"; 1207 } 1208 null_checker(); 1209 break; 1210 default: 1211 LOG(FATAL) << "UNREACHABLE"; 1212 } 1213 } 1214 1215 template <typename ImplicitNullChecker = NoImplicitNullChecker> 1216 void StoreToOffset(StoreOperandType type, 1217 GpuRegister reg, 1218 GpuRegister base, 1219 int32_t offset, 1220 ImplicitNullChecker null_checker = NoImplicitNullChecker()) { 1221 // Must not use AT as `reg`, so as not to overwrite the value being stored 1222 // with the adjusted `base`. 1223 CHECK_NE(reg, AT); 1224 AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); 1225 1226 switch (type) { 1227 case kStoreByte: 1228 Sb(reg, base, offset); 1229 break; 1230 case kStoreHalfword: 1231 Sh(reg, base, offset); 1232 break; 1233 case kStoreWord: 1234 CHECK_ALIGNED(offset, kMips64WordSize); 1235 Sw(reg, base, offset); 1236 break; 1237 case kStoreDoubleword: 1238 if (!IsAligned<kMips64DoublewordSize>(offset)) { 1239 CHECK_ALIGNED(offset, kMips64WordSize); 1240 Sw(reg, base, offset); 1241 null_checker(); 1242 Dsrl32(TMP2, reg, 0); 1243 Sw(TMP2, base, offset + kMips64WordSize); 1244 } else { 1245 Sd(reg, base, offset); 1246 null_checker(); 1247 } 1248 break; 1249 default: 1250 LOG(FATAL) << "UNREACHABLE"; 1251 } 1252 if (type != kStoreDoubleword) { 1253 null_checker(); 1254 } 1255 } 1256 1257 template <typename ImplicitNullChecker = NoImplicitNullChecker> 1258 void StoreFpuToOffset(StoreOperandType type, 1259 FpuRegister reg, 1260 GpuRegister base, 1261 int32_t offset, 1262 ImplicitNullChecker null_checker = NoImplicitNullChecker()) { 1263 int element_size_shift = -1; 1264 if (type != kStoreQuadword) { 1265 AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); 1266 } else { 1267 AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); 1268 } 1269 1270 switch (type) { 1271 case kStoreWord: 1272 CHECK_ALIGNED(offset, kMips64WordSize); 1273 Swc1(reg, base, offset); 1274 null_checker(); 1275 break; 1276 case kStoreDoubleword: 1277 if (!IsAligned<kMips64DoublewordSize>(offset)) { 1278 CHECK_ALIGNED(offset, kMips64WordSize); 1279 Mfhc1(TMP2, reg); 1280 Swc1(reg, base, offset); 1281 null_checker(); 1282 Sw(TMP2, base, offset + kMips64WordSize); 1283 } else { 1284 Sdc1(reg, base, offset); 1285 null_checker(); 1286 } 1287 break; 1288 case kStoreQuadword: 1289 switch (element_size_shift) { 1290 case TIMES_1: StB(static_cast<VectorRegister>(reg), base, offset); break; 1291 case TIMES_2: StH(static_cast<VectorRegister>(reg), base, offset); break; 1292 case TIMES_4: StW(static_cast<VectorRegister>(reg), base, offset); break; 1293 case TIMES_8: StD(static_cast<VectorRegister>(reg), base, offset); break; 1294 default: 1295 LOG(FATAL) << "UNREACHABLE"; 1296 } 1297 null_checker(); 1298 break; 1299 default: 1300 LOG(FATAL) << "UNREACHABLE"; 1301 } 1302 } 1303 1304 void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); 1305 void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); 1306 void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); 1307 void StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); 1308 1309 // Emit data (e.g. encoded instruction or immediate) to the instruction stream. 1310 void Emit(uint32_t value); 1311 1312 // 1313 // Overridden common assembler high-level functionality. 1314 // 1315 1316 // Emit code that will create an activation on the stack. 1317 void BuildFrame(size_t frame_size, 1318 ManagedRegister method_reg, 1319 ArrayRef<const ManagedRegister> callee_save_regs, 1320 const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; 1321 1322 // Emit code that will remove an activation from the stack. 1323 void RemoveFrame(size_t frame_size, 1324 ArrayRef<const ManagedRegister> callee_save_regs, 1325 bool may_suspend) OVERRIDE; 1326 1327 void IncreaseFrameSize(size_t adjust) OVERRIDE; 1328 void DecreaseFrameSize(size_t adjust) OVERRIDE; 1329 1330 // Store routines. 1331 void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; 1332 void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; 1333 void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; 1334 1335 void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; 1336 1337 void StoreStackOffsetToThread(ThreadOffset64 thr_offs, 1338 FrameOffset fr_offs, 1339 ManagedRegister mscratch) OVERRIDE; 1340 1341 void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; 1342 1343 void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, 1344 ManagedRegister mscratch) OVERRIDE; 1345 1346 // Load routines. 1347 void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; 1348 1349 void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE; 1350 1351 void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; 1352 1353 void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, 1354 bool unpoison_reference) OVERRIDE; 1355 1356 void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; 1357 1358 void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE; 1359 1360 // Copying routines. 1361 void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; 1362 1363 void CopyRawPtrFromThread(FrameOffset fr_offs, 1364 ThreadOffset64 thr_offs, 1365 ManagedRegister mscratch) OVERRIDE; 1366 1367 void CopyRawPtrToThread(ThreadOffset64 thr_offs, 1368 FrameOffset fr_offs, 1369 ManagedRegister mscratch) OVERRIDE; 1370 1371 void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; 1372 1373 void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; 1374 1375 void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch, 1376 size_t size) OVERRIDE; 1377 1378 void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, 1379 ManagedRegister mscratch, size_t size) OVERRIDE; 1380 1381 void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch, 1382 size_t size) OVERRIDE; 1383 1384 void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, 1385 ManagedRegister mscratch, size_t size) OVERRIDE; 1386 1387 void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, 1388 ManagedRegister mscratch, size_t size) OVERRIDE; 1389 1390 void MemoryBarrier(ManagedRegister) OVERRIDE; 1391 1392 // Sign extension. 1393 void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; 1394 1395 // Zero extension. 1396 void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; 1397 1398 // Exploit fast access in managed code to Thread::Current(). 1399 void GetCurrentThread(ManagedRegister tr) OVERRIDE; 1400 void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; 1401 1402 // Set up out_reg to hold a Object** into the handle scope, or to be null if the 1403 // value is null and null_allowed. in_reg holds a possibly stale reference 1404 // that can be used to avoid loading the handle scope entry to see if the value is 1405 // null. 1406 void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, 1407 ManagedRegister in_reg, bool null_allowed) OVERRIDE; 1408 1409 // Set up out_off to hold a Object** into the handle scope, or to be null if the 1410 // value is null and null_allowed. 1411 void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister 1412 mscratch, bool null_allowed) OVERRIDE; 1413 1414 // src holds a handle scope entry (Object**) load this into dst. 1415 void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; 1416 1417 // Heap::VerifyObject on src. In some cases (such as a reference to this) we 1418 // know that src may not be null. 1419 void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; 1420 void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; 1421 1422 // Call to address held at [base+offset]. 1423 void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; 1424 void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; 1425 void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE; 1426 1427 // Generate code to check if Thread::Current()->exception_ is non-null 1428 // and branch to a ExceptionSlowPath if it is. 1429 void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; 1430 1431 // Emit slow paths queued during assembly and promote short branches to long if needed. 1432 void FinalizeCode() OVERRIDE; 1433 1434 // Emit branches and finalize all instructions. 1435 void FinalizeInstructions(const MemoryRegion& region); 1436 1437 // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, 1438 // must be used instead of Mips64Label::GetPosition()). 1439 uint32_t GetLabelLocation(const Mips64Label* label) const; 1440 1441 // Get the final position of a label after local fixup based on the old position 1442 // recorded before FinalizeCode(). 1443 uint32_t GetAdjustedPosition(uint32_t old_position); 1444 1445 // Note that PC-relative literal loads are handled as pseudo branches because they need very 1446 // similar relocation and may similarly expand in size to accomodate for larger offsets relative 1447 // to PC. 1448 enum BranchCondition { 1449 kCondLT, 1450 kCondGE, 1451 kCondLE, 1452 kCondGT, 1453 kCondLTZ, 1454 kCondGEZ, 1455 kCondLEZ, 1456 kCondGTZ, 1457 kCondEQ, 1458 kCondNE, 1459 kCondEQZ, 1460 kCondNEZ, 1461 kCondLTU, 1462 kCondGEU, 1463 kCondF, // Floating-point predicate false. 1464 kCondT, // Floating-point predicate true. 1465 kUncond, 1466 }; 1467 friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); 1468 1469 private: 1470 class Branch { 1471 public: 1472 enum Type { 1473 // R6 short branches (can be promoted to long). 1474 kUncondBranch, 1475 kCondBranch, 1476 kCall, 1477 // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually. 1478 kBareUncondBranch, 1479 kBareCondBranch, 1480 kBareCall, 1481 // R2 short branches (can't be promoted to long), delay slots filled manually. 1482 kR2BareCondBranch, 1483 // Near label. 1484 kLabel, 1485 // Near literals. 1486 kLiteral, 1487 kLiteralUnsigned, 1488 kLiteralLong, 1489 // Long branches. 1490 kLongUncondBranch, 1491 kLongCondBranch, 1492 kLongCall, 1493 // Far label. 1494 kFarLabel, 1495 // Far literals. 1496 kFarLiteral, 1497 kFarLiteralUnsigned, 1498 kFarLiteralLong, 1499 }; 1500 1501 // Bit sizes of offsets defined as enums to minimize chance of typos. 1502 enum OffsetBits { 1503 kOffset16 = 16, 1504 kOffset18 = 18, 1505 kOffset21 = 21, 1506 kOffset23 = 23, 1507 kOffset28 = 28, 1508 kOffset32 = 32, 1509 }; 1510 1511 static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ 1512 static constexpr int32_t kMaxBranchLength = 32; 1513 static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); 1514 1515 struct BranchInfo { 1516 // Branch length as a number of 4-byte-long instructions. 1517 uint32_t length; 1518 // Ordinal number (0-based) of the first (or the only) instruction that contains the branch's 1519 // PC-relative offset (or its most significant 16-bit half, which goes first). 1520 uint32_t instr_offset; 1521 // Different MIPS instructions with PC-relative offsets apply said offsets to slightly 1522 // different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte 1523 // instructions) from the instruction containing the offset. 1524 uint32_t pc_org; 1525 // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch 1526 // and kBareCondBranch are an exception: use kOffset23 for beqzc/bnezc). 1527 OffsetBits offset_size; 1528 // Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift 1529 // count. 1530 int offset_shift; 1531 }; 1532 static const BranchInfo branch_info_[/* Type */]; 1533 1534 // Unconditional branch or call. 1535 Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare); 1536 // Conditional branch. 1537 Branch(bool is_r6, 1538 uint32_t location, 1539 uint32_t target, 1540 BranchCondition condition, 1541 GpuRegister lhs_reg, 1542 GpuRegister rhs_reg, 1543 bool is_bare); 1544 // Label address (in literal area) or literal. 1545 Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type); 1546 1547 // Some conditional branches with lhs = rhs are effectively NOPs, while some 1548 // others are effectively unconditional. MIPSR6 conditional branches require lhs != rhs. 1549 // So, we need a way to identify such branches in order to emit no instructions for them 1550 // or change them to unconditional. 1551 static bool IsNop(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); 1552 static bool IsUncond(BranchCondition condition, GpuRegister lhs, GpuRegister rhs); 1553 1554 static BranchCondition OppositeCondition(BranchCondition cond); 1555 1556 Type GetType() const; 1557 BranchCondition GetCondition() const; 1558 GpuRegister GetLeftRegister() const; 1559 GpuRegister GetRightRegister() const; 1560 uint32_t GetTarget() const; 1561 uint32_t GetLocation() const; 1562 uint32_t GetOldLocation() const; 1563 uint32_t GetLength() const; 1564 uint32_t GetOldLength() const; 1565 uint32_t GetSize() const; 1566 uint32_t GetOldSize() const; 1567 uint32_t GetEndLocation() const; 1568 uint32_t GetOldEndLocation() const; 1569 bool IsBare() const; 1570 bool IsLong() const; 1571 bool IsResolved() const; 1572 1573 // Returns the bit size of the signed offset that the branch instruction can handle. 1574 OffsetBits GetOffsetSize() const; 1575 1576 // Calculates the distance between two byte locations in the assembler buffer and 1577 // returns the number of bits needed to represent the distance as a signed integer. 1578 // 1579 // Branch instructions have signed offsets of 16, 19 (addiupc), 21 (beqzc/bnezc), 1580 // and 26 (bc) bits, which are additionally shifted left 2 positions at run time. 1581 // 1582 // Composite branches (made of several instructions) with longer reach have 32-bit 1583 // offsets encoded as 2 16-bit "halves" in two instructions (high half goes first). 1584 // The composite branches cover the range of PC + ~+/-2GB. The range is not end-to-end, 1585 // however. Consider the following implementation of a long unconditional branch, for 1586 // example: 1587 // 1588 // auipc at, offset_31_16 // at = pc + sign_extend(offset_31_16) << 16 1589 // jic at, offset_15_0 // pc = at + sign_extend(offset_15_0) 1590 // 1591 // Both of the above instructions take 16-bit signed offsets as immediate operands. 1592 // When bit 15 of offset_15_0 is 1, it effectively causes subtraction of 0x10000 1593 // due to sign extension. This must be compensated for by incrementing offset_31_16 1594 // by 1. offset_31_16 can only be incremented by 1 if it's not 0x7FFF. If it is 1595 // 0x7FFF, adding 1 will overflow the positive offset into the negative range. 1596 // Therefore, the long branch range is something like from PC - 0x80000000 to 1597 // PC + 0x7FFF7FFF, IOW, shorter by 32KB on one side. 1598 // 1599 // The returned values are therefore: 18, 21, 23, 28 and 32. There's also a special 1600 // case with the addiu instruction and a 16 bit offset. 1601 static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target); 1602 1603 // Resolve a branch when the target is known. 1604 void Resolve(uint32_t target); 1605 1606 // Relocate a branch by a given delta if needed due to expansion of this or another 1607 // branch at a given location by this delta (just changes location_ and target_). 1608 void Relocate(uint32_t expand_location, uint32_t delta); 1609 1610 // If the branch is short, changes its type to long. 1611 void PromoteToLong(); 1612 1613 // If necessary, updates the type by promoting a short branch to a long branch 1614 // based on the branch location and target. Returns the amount (in bytes) by 1615 // which the branch size has increased. 1616 // max_short_distance caps the maximum distance between location_ and target_ 1617 // that is allowed for short branches. This is for debugging/testing purposes. 1618 // max_short_distance = 0 forces all short branches to become long. 1619 // Use the implicit default argument when not debugging/testing. 1620 uint32_t PromoteIfNeeded(uint32_t max_short_distance = std::numeric_limits<uint32_t>::max()); 1621 1622 // Returns the location of the instruction(s) containing the offset. 1623 uint32_t GetOffsetLocation() const; 1624 1625 // Calculates and returns the offset ready for encoding in the branch instruction(s). 1626 uint32_t GetOffset() const; 1627 1628 private: 1629 // Completes branch construction by determining and recording its type. 1630 void InitializeType(Type initial_type, bool is_r6); 1631 // Helper for the above. 1632 void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); 1633 1634 uint32_t old_location_; // Offset into assembler buffer in bytes. 1635 uint32_t location_; // Offset into assembler buffer in bytes. 1636 uint32_t target_; // Offset into assembler buffer in bytes. 1637 1638 GpuRegister lhs_reg_; // Left-hand side register in conditional branches or 1639 // destination register in literals. 1640 GpuRegister rhs_reg_; // Right-hand side register in conditional branches. 1641 BranchCondition condition_; // Condition for conditional branches. 1642 1643 Type type_; // Current type of the branch. 1644 Type old_type_; // Initial type of the branch. 1645 }; 1646 friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); 1647 friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); 1648 1649 void EmitR(int opcode, GpuRegister rs, GpuRegister rt, GpuRegister rd, int shamt, int funct); 1650 void EmitRsd(int opcode, GpuRegister rs, GpuRegister rd, int shamt, int funct); 1651 void EmitRtd(int opcode, GpuRegister rt, GpuRegister rd, int shamt, int funct); 1652 void EmitI(int opcode, GpuRegister rs, GpuRegister rt, uint16_t imm); 1653 void EmitI21(int opcode, GpuRegister rs, uint32_t imm21); 1654 void EmitI26(int opcode, uint32_t imm26); 1655 void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); 1656 void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); 1657 void EmitBcondR6(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); 1658 void EmitBcondR2(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint16_t imm16); 1659 void EmitMsa3R(int operation, 1660 int df, 1661 VectorRegister wt, 1662 VectorRegister ws, 1663 VectorRegister wd, 1664 int minor_opcode); 1665 void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); 1666 void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); 1667 void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); 1668 void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode); 1669 void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); 1670 void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); 1671 1672 void Buncond(Mips64Label* label, bool is_bare); 1673 void Bcond(Mips64Label* label, 1674 bool is_r6, 1675 bool is_bare, 1676 BranchCondition condition, 1677 GpuRegister lhs, 1678 GpuRegister rhs = ZERO); 1679 void Call(Mips64Label* label, bool is_bare); 1680 void FinalizeLabeledBranch(Mips64Label* label); 1681 1682 Branch* GetBranch(uint32_t branch_id); 1683 const Branch* GetBranch(uint32_t branch_id) const; 1684 1685 void EmitLiterals(); 1686 void ReserveJumpTableSpace(); 1687 void EmitJumpTables(); 1688 void PromoteBranches(); 1689 void EmitBranch(Branch* branch); 1690 void EmitBranches(); 1691 void PatchCFI(); 1692 1693 // Emits exception block. 1694 void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); 1695 1696 bool HasMsa() const { 1697 return has_msa_; 1698 } 1699 1700 // List of exception blocks to generate at the end of the code cache. 1701 std::vector<Mips64ExceptionSlowPath> exception_blocks_; 1702 1703 std::vector<Branch> branches_; 1704 1705 // Whether appending instructions at the end of the buffer or overwriting the existing ones. 1706 bool overwriting_; 1707 // The current overwrite location. 1708 uint32_t overwrite_location_; 1709 1710 // Use std::deque<> for literal labels to allow insertions at the end 1711 // without invalidating pointers and references to existing elements. 1712 ArenaDeque<Literal> literals_; 1713 ArenaDeque<Literal> long_literals_; // 64-bit literals separated for alignment reasons. 1714 1715 // Jump table list. 1716 ArenaDeque<JumpTable> jump_tables_; 1717 1718 // Data for AdjustedPosition(), see the description there. 1719 uint32_t last_position_adjustment_; 1720 uint32_t last_old_position_; 1721 uint32_t last_branch_id_; 1722 1723 const bool has_msa_; 1724 1725 DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); 1726 }; 1727 1728 } // namespace mips64 1729 } // namespace art 1730 1731 #endif // ART_COMPILER_UTILS_MIPS64_ASSEMBLER_MIPS64_H_ 1732