1 // Copyright 2015, ARM Limited 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_A64_SIMULATOR_A64_H_ 28 #define VIXL_A64_SIMULATOR_A64_H_ 29 30 #include "vixl/globals.h" 31 #include "vixl/utils.h" 32 #include "vixl/a64/instructions-a64.h" 33 #include "vixl/a64/assembler-a64.h" 34 #include "vixl/a64/disasm-a64.h" 35 #include "vixl/a64/instrument-a64.h" 36 37 namespace vixl { 38 39 // Debug instructions. 40 // 41 // VIXL's macro-assembler and simulator support a few pseudo instructions to 42 // make debugging easier. These pseudo instructions do not exist on real 43 // hardware. 44 // 45 // TODO: Provide controls to prevent the macro assembler from emitting 46 // pseudo-instructions. This is important for ahead-of-time compilers, where the 47 // macro assembler is built with USE_SIMULATOR but the code will eventually be 48 // run on real hardware. 49 // 50 // TODO: Also consider allowing these pseudo-instructions to be disabled in the 51 // simulator, so that users can check that the input is a valid native code. 52 // (This isn't possible in all cases. Printf won't work, for example.) 53 // 54 // Each debug pseudo instruction is represented by a HLT instruction. The HLT 55 // immediate field is used to identify the type of debug pseudo instruction. 56 57 enum DebugHltOpcodes { 58 kUnreachableOpcode = 0xdeb0, 59 kPrintfOpcode, 60 kTraceOpcode, 61 kLogOpcode, 62 // Aliases. 63 kDebugHltFirstOpcode = kUnreachableOpcode, 64 kDebugHltLastOpcode = kLogOpcode 65 }; 66 67 // Each pseudo instruction uses a custom encoding for additional arguments, as 68 // described below. 69 70 // Unreachable - kUnreachableOpcode 71 // 72 // Instruction which should never be executed. This is used as a guard in parts 73 // of the code that should not be reachable, such as in data encoded inline in 74 // the instructions. 75 76 // Printf - kPrintfOpcode 77 // - arg_count: The number of arguments. 78 // - arg_pattern: A set of PrintfArgPattern values, packed into two-bit fields. 79 // 80 // Simulate a call to printf. 81 // 82 // Floating-point and integer arguments are passed in separate sets of registers 83 // in AAPCS64 (even for varargs functions), so it is not possible to determine 84 // the type of each argument without some information about the values that were 85 // passed in. This information could be retrieved from the printf format string, 86 // but the format string is not trivial to parse so we encode the relevant 87 // information with the HLT instruction. 88 // 89 // Also, the following registers are populated (as if for a native A64 call): 90 // x0: The format string 91 // x1-x7: Optional arguments, if type == CPURegister::kRegister 92 // d0-d7: Optional arguments, if type == CPURegister::kFPRegister 93 const unsigned kPrintfArgCountOffset = 1 * kInstructionSize; 94 const unsigned kPrintfArgPatternListOffset = 2 * kInstructionSize; 95 const unsigned kPrintfLength = 3 * kInstructionSize; 96 97 const unsigned kPrintfMaxArgCount = 4; 98 99 // The argument pattern is a set of two-bit-fields, each with one of the 100 // following values: 101 enum PrintfArgPattern { 102 kPrintfArgW = 1, 103 kPrintfArgX = 2, 104 // There is no kPrintfArgS because floats are always converted to doubles in C 105 // varargs calls. 106 kPrintfArgD = 3 107 }; 108 static const unsigned kPrintfArgPatternBits = 2; 109 110 // Trace - kTraceOpcode 111 // - parameter: TraceParameter stored as a uint32_t 112 // - command: TraceCommand stored as a uint32_t 113 // 114 // Allow for trace management in the generated code. This enables or disables 115 // automatic tracing of the specified information for every simulated 116 // instruction. 117 const unsigned kTraceParamsOffset = 1 * kInstructionSize; 118 const unsigned kTraceCommandOffset = 2 * kInstructionSize; 119 const unsigned kTraceLength = 3 * kInstructionSize; 120 121 // Trace parameters. 122 enum TraceParameters { 123 LOG_DISASM = 1 << 0, // Log disassembly. 124 LOG_REGS = 1 << 1, // Log general purpose registers. 125 LOG_VREGS = 1 << 2, // Log NEON and floating-point registers. 126 LOG_SYSREGS = 1 << 3, // Log the flags and system registers. 127 LOG_WRITE = 1 << 4, // Log writes to memory. 128 129 LOG_NONE = 0, 130 LOG_STATE = LOG_REGS | LOG_VREGS | LOG_SYSREGS, 131 LOG_ALL = LOG_DISASM | LOG_STATE | LOG_WRITE 132 }; 133 134 // Trace commands. 135 enum TraceCommand { 136 TRACE_ENABLE = 1, 137 TRACE_DISABLE = 2 138 }; 139 140 // Log - kLogOpcode 141 // - parameter: TraceParameter stored as a uint32_t 142 // 143 // Print the specified information once. This mechanism is separate from Trace. 144 // In particular, _all_ of the specified registers are printed, rather than just 145 // the registers that the instruction writes. 146 // 147 // Any combination of the TraceParameters values can be used, except that 148 // LOG_DISASM is not supported for Log. 149 const unsigned kLogParamsOffset = 1 * kInstructionSize; 150 const unsigned kLogLength = 2 * kInstructionSize; 151 152 153 // Assemble the specified IEEE-754 components into the target type and apply 154 // appropriate rounding. 155 // sign: 0 = positive, 1 = negative 156 // exponent: Unbiased IEEE-754 exponent. 157 // mantissa: The mantissa of the input. The top bit (which is not encoded for 158 // normal IEEE-754 values) must not be omitted. This bit has the 159 // value 'pow(2, exponent)'. 160 // 161 // The input value is assumed to be a normalized value. That is, the input may 162 // not be infinity or NaN. If the source value is subnormal, it must be 163 // normalized before calling this function such that the highest set bit in the 164 // mantissa has the value 'pow(2, exponent)'. 165 // 166 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than 167 // calling a templated FPRound. 168 template <class T, int ebits, int mbits> 169 T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa, 170 FPRounding round_mode) { 171 VIXL_ASSERT((sign == 0) || (sign == 1)); 172 173 // Only FPTieEven and FPRoundOdd rounding modes are implemented. 174 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 175 176 // Rounding can promote subnormals to normals, and normals to infinities. For 177 // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be 178 // encodable as a float, but rounding based on the low-order mantissa bits 179 // could make it overflow. With ties-to-even rounding, this value would become 180 // an infinity. 181 182 // ---- Rounding Method ---- 183 // 184 // The exponent is irrelevant in the rounding operation, so we treat the 185 // lowest-order bit that will fit into the result ('onebit') as having 186 // the value '1'. Similarly, the highest-order bit that won't fit into 187 // the result ('halfbit') has the value '0.5'. The 'point' sits between 188 // 'onebit' and 'halfbit': 189 // 190 // These bits fit into the result. 191 // |---------------------| 192 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 193 // || 194 // / | 195 // / halfbit 196 // onebit 197 // 198 // For subnormal outputs, the range of representable bits is smaller and 199 // the position of onebit and halfbit depends on the exponent of the 200 // input, but the method is otherwise similar. 201 // 202 // onebit(frac) 203 // | 204 // | halfbit(frac) halfbit(adjusted) 205 // | / / 206 // | | | 207 // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 208 // 0b00.0... -> 0b00.0... -> 0b00 209 // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 210 // 0b00.1... -> 0b00.1... -> 0b01 211 // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 212 // 0b01.0... -> 0b01.0... -> 0b01 213 // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 214 // 0b01.1... -> 0b01.1... -> 0b10 215 // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 216 // 0b10.0... -> 0b10.0... -> 0b10 217 // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 218 // 0b10.1... -> 0b10.1... -> 0b11 219 // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 220 // ... / | / | 221 // / | / | 222 // / | 223 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | 224 // 225 // mantissa = (mantissa >> shift) + halfbit(adjusted); 226 227 static const int mantissa_offset = 0; 228 static const int exponent_offset = mantissa_offset + mbits; 229 static const int sign_offset = exponent_offset + ebits; 230 VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); 231 232 // Bail out early for zero inputs. 233 if (mantissa == 0) { 234 return sign << sign_offset; 235 } 236 237 // If all bits in the exponent are set, the value is infinite or NaN. 238 // This is true for all binary IEEE-754 formats. 239 static const int infinite_exponent = (1 << ebits) - 1; 240 static const int max_normal_exponent = infinite_exponent - 1; 241 242 // Apply the exponent bias to encode it for the result. Doing this early makes 243 // it easy to detect values that will be infinite or subnormal. 244 exponent += max_normal_exponent >> 1; 245 246 if (exponent > max_normal_exponent) { 247 // Overflow: the input is too large for the result type to represent. 248 if (round_mode == FPTieEven) { 249 // FPTieEven rounding mode handles overflows using infinities. 250 exponent = infinite_exponent; 251 mantissa = 0; 252 } else { 253 VIXL_ASSERT(round_mode == FPRoundOdd); 254 // FPRoundOdd rounding mode handles overflows using the largest magnitude 255 // normal number. 256 exponent = max_normal_exponent; 257 mantissa = (UINT64_C(1) << exponent_offset) - 1; 258 } 259 return (sign << sign_offset) | 260 (exponent << exponent_offset) | 261 (mantissa << mantissa_offset); 262 } 263 264 // Calculate the shift required to move the top mantissa bit to the proper 265 // place in the destination type. 266 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); 267 int shift = highest_significant_bit - mbits; 268 269 if (exponent <= 0) { 270 // The output will be subnormal (before rounding). 271 // For subnormal outputs, the shift must be adjusted by the exponent. The +1 272 // is necessary because the exponent of a subnormal value (encoded as 0) is 273 // the same as the exponent of the smallest normal value (encoded as 1). 274 shift += -exponent + 1; 275 276 // Handle inputs that would produce a zero output. 277 // 278 // Shifts higher than highest_significant_bit+1 will always produce a zero 279 // result. A shift of exactly highest_significant_bit+1 might produce a 280 // non-zero result after rounding. 281 if (shift > (highest_significant_bit + 1)) { 282 if (round_mode == FPTieEven) { 283 // The result will always be +/-0.0. 284 return sign << sign_offset; 285 } else { 286 VIXL_ASSERT(round_mode == FPRoundOdd); 287 VIXL_ASSERT(mantissa != 0); 288 // For FPRoundOdd, if the mantissa is too small to represent and 289 // non-zero return the next "odd" value. 290 return (sign << sign_offset) | 1; 291 } 292 } 293 294 // Properly encode the exponent for a subnormal output. 295 exponent = 0; 296 } else { 297 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 298 // normal values. 299 mantissa &= ~(UINT64_C(1) << highest_significant_bit); 300 } 301 302 if (shift > 0) { 303 if (round_mode == FPTieEven) { 304 // We have to shift the mantissa to the right. Some precision is lost, so 305 // we need to apply rounding. 306 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; 307 uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1; 308 uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); 309 uint64_t adjusted = mantissa - adjustment; 310 T halfbit_adjusted = (adjusted >> (shift-1)) & 1; 311 312 T result = (sign << sign_offset) | 313 (exponent << exponent_offset) | 314 ((mantissa >> shift) << mantissa_offset); 315 316 // A very large mantissa can overflow during rounding. If this happens, 317 // the exponent should be incremented and the mantissa set to 1.0 318 // (encoded as 0). Applying halfbit_adjusted after assembling the float 319 // has the nice side-effect that this case is handled for free. 320 // 321 // This also handles cases where a very large finite value overflows to 322 // infinity, or where a very large subnormal value overflows to become 323 // normal. 324 return result + halfbit_adjusted; 325 } else { 326 VIXL_ASSERT(round_mode == FPRoundOdd); 327 // If any bits at position halfbit or below are set, onebit (ie. the 328 // bottom bit of the resulting mantissa) must be set. 329 uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); 330 if (fractional_bits != 0) { 331 mantissa |= UINT64_C(1) << shift; 332 } 333 334 return (sign << sign_offset) | 335 (exponent << exponent_offset) | 336 ((mantissa >> shift) << mantissa_offset); 337 } 338 } else { 339 // We have to shift the mantissa to the left (or not at all). The input 340 // mantissa is exactly representable in the output mantissa, so apply no 341 // rounding correction. 342 return (sign << sign_offset) | 343 (exponent << exponent_offset) | 344 ((mantissa << -shift) << mantissa_offset); 345 } 346 } 347 348 349 // Representation of memory, with typed getters and setters for access. 350 class Memory { 351 public: 352 template <typename T> 353 static T AddressUntag(T address) { 354 // Cast the address using a C-style cast. A reinterpret_cast would be 355 // appropriate, but it can't cast one integral type to another. 356 uint64_t bits = (uint64_t)address; 357 return (T)(bits & ~kAddressTagMask); 358 } 359 360 template <typename T, typename A> 361 static T Read(A address) { 362 T value; 363 address = AddressUntag(address); 364 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || 365 (sizeof(value) == 4) || (sizeof(value) == 8) || 366 (sizeof(value) == 16)); 367 memcpy(&value, reinterpret_cast<const char *>(address), sizeof(value)); 368 return value; 369 } 370 371 template <typename T, typename A> 372 static void Write(A address, T value) { 373 address = AddressUntag(address); 374 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || 375 (sizeof(value) == 4) || (sizeof(value) == 8) || 376 (sizeof(value) == 16)); 377 memcpy(reinterpret_cast<char *>(address), &value, sizeof(value)); 378 } 379 }; 380 381 // Represent a register (r0-r31, v0-v31). 382 template<int kSizeInBytes> 383 class SimRegisterBase { 384 public: 385 SimRegisterBase() : written_since_last_log_(false) {} 386 387 // Write the specified value. The value is zero-extended if necessary. 388 template<typename T> 389 void Set(T new_value) { 390 VIXL_STATIC_ASSERT(sizeof(new_value) <= kSizeInBytes); 391 if (sizeof(new_value) < kSizeInBytes) { 392 // All AArch64 registers are zero-extending. 393 memset(value_ + sizeof(new_value), 0, kSizeInBytes - sizeof(new_value)); 394 } 395 memcpy(value_, &new_value, sizeof(new_value)); 396 NotifyRegisterWrite(); 397 } 398 399 // Insert a typed value into a register, leaving the rest of the register 400 // unchanged. The lane parameter indicates where in the register the value 401 // should be inserted, in the range [ 0, sizeof(value_) / sizeof(T) ), where 402 // 0 represents the least significant bits. 403 template<typename T> 404 void Insert(int lane, T new_value) { 405 VIXL_ASSERT(lane >= 0); 406 VIXL_ASSERT((sizeof(new_value) + 407 (lane * sizeof(new_value))) <= kSizeInBytes); 408 memcpy(&value_[lane * sizeof(new_value)], &new_value, sizeof(new_value)); 409 NotifyRegisterWrite(); 410 } 411 412 // Read the value as the specified type. The value is truncated if necessary. 413 template<typename T> 414 T Get(int lane = 0) const { 415 T result; 416 VIXL_ASSERT(lane >= 0); 417 VIXL_ASSERT((sizeof(result) + (lane * sizeof(result))) <= kSizeInBytes); 418 memcpy(&result, &value_[lane * sizeof(result)], sizeof(result)); 419 return result; 420 } 421 422 // TODO: Make this return a map of updated bytes, so that we can highlight 423 // updated lanes for load-and-insert. (That never happens for scalar code, but 424 // NEON has some instructions that can update individual lanes.) 425 bool WrittenSinceLastLog() const { 426 return written_since_last_log_; 427 } 428 429 void NotifyRegisterLogged() { 430 written_since_last_log_ = false; 431 } 432 433 protected: 434 uint8_t value_[kSizeInBytes]; 435 436 // Helpers to aid with register tracing. 437 bool written_since_last_log_; 438 439 void NotifyRegisterWrite() { 440 written_since_last_log_ = true; 441 } 442 }; 443 typedef SimRegisterBase<kXRegSizeInBytes> SimRegister; // r0-r31 444 typedef SimRegisterBase<kQRegSizeInBytes> SimVRegister; // v0-v31 445 446 // Representation of a vector register, with typed getters and setters for lanes 447 // and additional information to represent lane state. 448 class LogicVRegister { 449 public: 450 inline LogicVRegister(SimVRegister& other) // NOLINT 451 : register_(other) { 452 for (unsigned i = 0; i < sizeof(saturated_) / sizeof(saturated_[0]); i++) { 453 saturated_[i] = kNotSaturated; 454 } 455 for (unsigned i = 0; i < sizeof(round_) / sizeof(round_[0]); i++) { 456 round_[i] = 0; 457 } 458 } 459 460 int64_t Int(VectorFormat vform, int index) const { 461 int64_t element; 462 switch (LaneSizeInBitsFromFormat(vform)) { 463 case 8: element = register_.Get<int8_t>(index); break; 464 case 16: element = register_.Get<int16_t>(index); break; 465 case 32: element = register_.Get<int32_t>(index); break; 466 case 64: element = register_.Get<int64_t>(index); break; 467 default: VIXL_UNREACHABLE(); return 0; 468 } 469 return element; 470 } 471 472 uint64_t Uint(VectorFormat vform, int index) const { 473 uint64_t element; 474 switch (LaneSizeInBitsFromFormat(vform)) { 475 case 8: element = register_.Get<uint8_t>(index); break; 476 case 16: element = register_.Get<uint16_t>(index); break; 477 case 32: element = register_.Get<uint32_t>(index); break; 478 case 64: element = register_.Get<uint64_t>(index); break; 479 default: VIXL_UNREACHABLE(); return 0; 480 } 481 return element; 482 } 483 484 int64_t IntLeftJustified(VectorFormat vform, int index) const { 485 return Int(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); 486 } 487 488 uint64_t UintLeftJustified(VectorFormat vform, int index) const { 489 return Uint(vform, index) << (64 - LaneSizeInBitsFromFormat(vform)); 490 } 491 492 void SetInt(VectorFormat vform, int index, int64_t value) const { 493 switch (LaneSizeInBitsFromFormat(vform)) { 494 case 8: register_.Insert(index, static_cast<int8_t>(value)); break; 495 case 16: register_.Insert(index, static_cast<int16_t>(value)); break; 496 case 32: register_.Insert(index, static_cast<int32_t>(value)); break; 497 case 64: register_.Insert(index, static_cast<int64_t>(value)); break; 498 default: VIXL_UNREACHABLE(); return; 499 } 500 } 501 502 void SetUint(VectorFormat vform, int index, uint64_t value) const { 503 switch (LaneSizeInBitsFromFormat(vform)) { 504 case 8: register_.Insert(index, static_cast<uint8_t>(value)); break; 505 case 16: register_.Insert(index, static_cast<uint16_t>(value)); break; 506 case 32: register_.Insert(index, static_cast<uint32_t>(value)); break; 507 case 64: register_.Insert(index, static_cast<uint64_t>(value)); break; 508 default: VIXL_UNREACHABLE(); return; 509 } 510 } 511 512 void ReadUintFromMem(VectorFormat vform, int index, uint64_t addr) const { 513 switch (LaneSizeInBitsFromFormat(vform)) { 514 case 8: register_.Insert(index, Memory::Read<uint8_t>(addr)); break; 515 case 16: register_.Insert(index, Memory::Read<uint16_t>(addr)); break; 516 case 32: register_.Insert(index, Memory::Read<uint32_t>(addr)); break; 517 case 64: register_.Insert(index, Memory::Read<uint64_t>(addr)); break; 518 default: VIXL_UNREACHABLE(); return; 519 } 520 } 521 522 void WriteUintToMem(VectorFormat vform, int index, uint64_t addr) const { 523 switch (LaneSizeInBitsFromFormat(vform)) { 524 case 8: Memory::Write<uint8_t>(addr, Uint(vform, index)); break; 525 case 16: Memory::Write<uint16_t>(addr, Uint(vform, index)); break; 526 case 32: Memory::Write<uint32_t>(addr, Uint(vform, index)); break; 527 case 64: Memory::Write<uint64_t>(addr, Uint(vform, index)); break; 528 } 529 } 530 531 template <typename T> 532 T Float(int index) const { 533 return register_.Get<T>(index); 534 } 535 536 template <typename T> 537 void SetFloat(int index, T value) const { 538 register_.Insert(index, value); 539 } 540 541 // When setting a result in a register of size less than Q, the top bits of 542 // the Q register must be cleared. 543 void ClearForWrite(VectorFormat vform) const { 544 unsigned size = RegisterSizeInBytesFromFormat(vform); 545 for (unsigned i = size; i < kQRegSizeInBytes; i++) { 546 SetUint(kFormat16B, i, 0); 547 } 548 } 549 550 // Saturation state for each lane of a vector. 551 enum Saturation { 552 kNotSaturated = 0, 553 kSignedSatPositive = 1 << 0, 554 kSignedSatNegative = 1 << 1, 555 kSignedSatMask = kSignedSatPositive | kSignedSatNegative, 556 kSignedSatUndefined = kSignedSatMask, 557 kUnsignedSatPositive = 1 << 2, 558 kUnsignedSatNegative = 1 << 3, 559 kUnsignedSatMask = kUnsignedSatPositive | kUnsignedSatNegative, 560 kUnsignedSatUndefined = kUnsignedSatMask 561 }; 562 563 // Getters for saturation state. 564 Saturation GetSignedSaturation(int index) { 565 return static_cast<Saturation>(saturated_[index] & kSignedSatMask); 566 } 567 568 Saturation GetUnsignedSaturation(int index) { 569 return static_cast<Saturation>(saturated_[index] & kUnsignedSatMask); 570 } 571 572 // Setters for saturation state. 573 void ClearSat(int index) { 574 saturated_[index] = kNotSaturated; 575 } 576 577 void SetSignedSat(int index, bool positive) { 578 SetSatFlag(index, positive ? kSignedSatPositive : kSignedSatNegative); 579 } 580 581 void SetUnsignedSat(int index, bool positive) { 582 SetSatFlag(index, positive ? kUnsignedSatPositive : kUnsignedSatNegative); 583 } 584 585 void SetSatFlag(int index, Saturation sat) { 586 saturated_[index] = static_cast<Saturation>(saturated_[index] | sat); 587 VIXL_ASSERT((sat & kUnsignedSatMask) != kUnsignedSatUndefined); 588 VIXL_ASSERT((sat & kSignedSatMask) != kSignedSatUndefined); 589 } 590 591 // Saturate lanes of a vector based on saturation state. 592 LogicVRegister& SignedSaturate(VectorFormat vform) { 593 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 594 Saturation sat = GetSignedSaturation(i); 595 if (sat == kSignedSatPositive) { 596 SetInt(vform, i, MaxIntFromFormat(vform)); 597 } else if (sat == kSignedSatNegative) { 598 SetInt(vform, i, MinIntFromFormat(vform)); 599 } 600 } 601 return *this; 602 } 603 604 LogicVRegister& UnsignedSaturate(VectorFormat vform) { 605 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 606 Saturation sat = GetUnsignedSaturation(i); 607 if (sat == kUnsignedSatPositive) { 608 SetUint(vform, i, MaxUintFromFormat(vform)); 609 } else if (sat == kUnsignedSatNegative) { 610 SetUint(vform, i, 0); 611 } 612 } 613 return *this; 614 } 615 616 // Getter for rounding state. 617 bool GetRounding(int index) { 618 return round_[index]; 619 } 620 621 // Setter for rounding state. 622 void SetRounding(int index, bool round) { 623 round_[index] = round; 624 } 625 626 // Round lanes of a vector based on rounding state. 627 LogicVRegister& Round(VectorFormat vform) { 628 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 629 SetInt(vform, i, Int(vform, i) + (GetRounding(i) ? 1 : 0)); 630 } 631 return *this; 632 } 633 634 // Unsigned halve lanes of a vector, and use the saturation state to set the 635 // top bit. 636 LogicVRegister& Uhalve(VectorFormat vform) { 637 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 638 uint64_t val = Uint(vform, i); 639 SetRounding(i, (val & 1) == 1); 640 val >>= 1; 641 if (GetUnsignedSaturation(i) != kNotSaturated) { 642 // If the operation causes unsigned saturation, the bit shifted into the 643 // most significant bit must be set. 644 val |= (MaxUintFromFormat(vform) >> 1) + 1; 645 } 646 SetInt(vform, i, val); 647 } 648 return *this; 649 } 650 651 // Signed halve lanes of a vector, and use the carry state to set the top bit. 652 LogicVRegister& Halve(VectorFormat vform) { 653 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 654 int64_t val = Int(vform, i); 655 SetRounding(i, (val & 1) == 1); 656 val >>= 1; 657 if (GetSignedSaturation(i) != kNotSaturated) { 658 // If the operation causes signed saturation, the sign bit must be 659 // inverted. 660 val ^= (MaxUintFromFormat(vform) >> 1) + 1; 661 } 662 SetInt(vform, i, val); 663 } 664 return *this; 665 } 666 667 private: 668 SimVRegister& register_; 669 670 // Allocate one saturation state entry per lane; largest register is type Q, 671 // and lanes can be a minimum of one byte wide. 672 Saturation saturated_[kQRegSizeInBytes]; 673 674 // Allocate one rounding state entry per lane. 675 bool round_[kQRegSizeInBytes]; 676 }; 677 678 // The proper way to initialize a simulated system register (such as NZCV) is as 679 // follows: 680 // SimSystemRegister nzcv = SimSystemRegister::DefaultValueFor(NZCV); 681 class SimSystemRegister { 682 public: 683 // The default constructor represents a register which has no writable bits. 684 // It is not possible to set its value to anything other than 0. 685 SimSystemRegister() : value_(0), write_ignore_mask_(0xffffffff) { } 686 687 uint32_t RawValue() const { 688 return value_; 689 } 690 691 void SetRawValue(uint32_t new_value) { 692 value_ = (value_ & write_ignore_mask_) | (new_value & ~write_ignore_mask_); 693 } 694 695 uint32_t Bits(int msb, int lsb) const { 696 return unsigned_bitextract_32(msb, lsb, value_); 697 } 698 699 int32_t SignedBits(int msb, int lsb) const { 700 return signed_bitextract_32(msb, lsb, value_); 701 } 702 703 void SetBits(int msb, int lsb, uint32_t bits); 704 705 // Default system register values. 706 static SimSystemRegister DefaultValueFor(SystemRegister id); 707 708 #define DEFINE_GETTER(Name, HighBit, LowBit, Func) \ 709 uint32_t Name() const { return Func(HighBit, LowBit); } \ 710 void Set##Name(uint32_t bits) { SetBits(HighBit, LowBit, bits); } 711 #define DEFINE_WRITE_IGNORE_MASK(Name, Mask) \ 712 static const uint32_t Name##WriteIgnoreMask = ~static_cast<uint32_t>(Mask); 713 714 SYSTEM_REGISTER_FIELDS_LIST(DEFINE_GETTER, DEFINE_WRITE_IGNORE_MASK) 715 716 #undef DEFINE_ZERO_BITS 717 #undef DEFINE_GETTER 718 719 protected: 720 // Most system registers only implement a few of the bits in the word. Other 721 // bits are "read-as-zero, write-ignored". The write_ignore_mask argument 722 // describes the bits which are not modifiable. 723 SimSystemRegister(uint32_t value, uint32_t write_ignore_mask) 724 : value_(value), write_ignore_mask_(write_ignore_mask) { } 725 726 uint32_t value_; 727 uint32_t write_ignore_mask_; 728 }; 729 730 731 class SimExclusiveLocalMonitor { 732 public: 733 SimExclusiveLocalMonitor() : kSkipClearProbability(8), seed_(0x87654321) { 734 Clear(); 735 } 736 737 // Clear the exclusive monitor (like clrex). 738 void Clear() { 739 address_ = 0; 740 size_ = 0; 741 } 742 743 // Clear the exclusive monitor most of the time. 744 void MaybeClear() { 745 if ((seed_ % kSkipClearProbability) != 0) { 746 Clear(); 747 } 748 749 // Advance seed_ using a simple linear congruential generator. 750 seed_ = (seed_ * 48271) % 2147483647; 751 } 752 753 // Mark the address range for exclusive access (like load-exclusive). 754 void MarkExclusive(uint64_t address, size_t size) { 755 address_ = address; 756 size_ = size; 757 } 758 759 // Return true if the address range is marked (like store-exclusive). 760 // This helper doesn't implicitly clear the monitor. 761 bool IsExclusive(uint64_t address, size_t size) { 762 VIXL_ASSERT(size > 0); 763 // Be pedantic: Require both the address and the size to match. 764 return (size == size_) && (address == address_); 765 } 766 767 private: 768 uint64_t address_; 769 size_t size_; 770 771 const int kSkipClearProbability; 772 uint32_t seed_; 773 }; 774 775 776 // We can't accurate simulate the global monitor since it depends on external 777 // influences. Instead, this implementation occasionally causes accesses to 778 // fail, according to kPassProbability. 779 class SimExclusiveGlobalMonitor { 780 public: 781 SimExclusiveGlobalMonitor() : kPassProbability(8), seed_(0x87654321) {} 782 783 bool IsExclusive(uint64_t address, size_t size) { 784 USE(address); 785 USE(size); 786 787 bool pass = (seed_ % kPassProbability) != 0; 788 // Advance seed_ using a simple linear congruential generator. 789 seed_ = (seed_ * 48271) % 2147483647; 790 return pass; 791 } 792 793 private: 794 const int kPassProbability; 795 uint32_t seed_; 796 }; 797 798 799 class Simulator : public DecoderVisitor { 800 public: 801 explicit Simulator(Decoder* decoder, FILE* stream = stdout); 802 ~Simulator(); 803 804 void ResetState(); 805 806 // Run the simulator. 807 virtual void Run(); 808 void RunFrom(const Instruction* first); 809 810 // Simulation helpers. 811 const Instruction* pc() const { return pc_; } 812 void set_pc(const Instruction* new_pc) { 813 pc_ = Memory::AddressUntag(new_pc); 814 pc_modified_ = true; 815 } 816 817 void increment_pc() { 818 if (!pc_modified_) { 819 pc_ = pc_->NextInstruction(); 820 } 821 822 pc_modified_ = false; 823 } 824 825 void ExecuteInstruction() { 826 // The program counter should always be aligned. 827 VIXL_ASSERT(IsWordAligned(pc_)); 828 decoder_->Decode(pc_); 829 increment_pc(); 830 } 831 832 // Declare all Visitor functions. 833 #define DECLARE(A) virtual void Visit##A(const Instruction* instr); 834 VISITOR_LIST(DECLARE) 835 #undef DECLARE 836 837 // Integer register accessors. 838 839 // Basic accessor: Read the register as the specified type. 840 template<typename T> 841 T reg(unsigned code, Reg31Mode r31mode = Reg31IsZeroRegister) const { 842 VIXL_ASSERT(code < kNumberOfRegisters); 843 if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { 844 T result; 845 memset(&result, 0, sizeof(result)); 846 return result; 847 } 848 return registers_[code].Get<T>(); 849 } 850 851 // Common specialized accessors for the reg() template. 852 int32_t wreg(unsigned code, 853 Reg31Mode r31mode = Reg31IsZeroRegister) const { 854 return reg<int32_t>(code, r31mode); 855 } 856 857 int64_t xreg(unsigned code, 858 Reg31Mode r31mode = Reg31IsZeroRegister) const { 859 return reg<int64_t>(code, r31mode); 860 } 861 862 // As above, with parameterized size and return type. The value is 863 // either zero-extended or truncated to fit, as required. 864 template<typename T> 865 T reg(unsigned size, unsigned code, 866 Reg31Mode r31mode = Reg31IsZeroRegister) const { 867 uint64_t raw; 868 switch (size) { 869 case kWRegSize: raw = reg<uint32_t>(code, r31mode); break; 870 case kXRegSize: raw = reg<uint64_t>(code, r31mode); break; 871 default: 872 VIXL_UNREACHABLE(); 873 return 0; 874 } 875 876 T result; 877 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); 878 // Copy the result and truncate to fit. This assumes a little-endian host. 879 memcpy(&result, &raw, sizeof(result)); 880 return result; 881 } 882 883 // Use int64_t by default if T is not specified. 884 int64_t reg(unsigned size, unsigned code, 885 Reg31Mode r31mode = Reg31IsZeroRegister) const { 886 return reg<int64_t>(size, code, r31mode); 887 } 888 889 enum RegLogMode { 890 LogRegWrites, 891 NoRegLog 892 }; 893 894 // Write 'value' into an integer register. The value is zero-extended. This 895 // behaviour matches AArch64 register writes. 896 template<typename T> 897 void set_reg(unsigned code, T value, 898 RegLogMode log_mode = LogRegWrites, 899 Reg31Mode r31mode = Reg31IsZeroRegister) { 900 VIXL_STATIC_ASSERT((sizeof(T) == kWRegSizeInBytes) || 901 (sizeof(T) == kXRegSizeInBytes)); 902 VIXL_ASSERT(code < kNumberOfRegisters); 903 904 if ((code == 31) && (r31mode == Reg31IsZeroRegister)) { 905 return; 906 } 907 908 registers_[code].Set(value); 909 910 if (log_mode == LogRegWrites) LogRegister(code, r31mode); 911 } 912 913 // Common specialized accessors for the set_reg() template. 914 void set_wreg(unsigned code, int32_t value, 915 RegLogMode log_mode = LogRegWrites, 916 Reg31Mode r31mode = Reg31IsZeroRegister) { 917 set_reg(code, value, log_mode, r31mode); 918 } 919 920 void set_xreg(unsigned code, int64_t value, 921 RegLogMode log_mode = LogRegWrites, 922 Reg31Mode r31mode = Reg31IsZeroRegister) { 923 set_reg(code, value, log_mode, r31mode); 924 } 925 926 // As above, with parameterized size and type. The value is either 927 // zero-extended or truncated to fit, as required. 928 template<typename T> 929 void set_reg(unsigned size, unsigned code, T value, 930 RegLogMode log_mode = LogRegWrites, 931 Reg31Mode r31mode = Reg31IsZeroRegister) { 932 // Zero-extend the input. 933 uint64_t raw = 0; 934 VIXL_STATIC_ASSERT(sizeof(value) <= sizeof(raw)); 935 memcpy(&raw, &value, sizeof(value)); 936 937 // Write (and possibly truncate) the value. 938 switch (size) { 939 case kWRegSize: set_reg<uint32_t>(code, raw, log_mode, r31mode); break; 940 case kXRegSize: set_reg<uint64_t>(code, raw, log_mode, r31mode); break; 941 default: 942 VIXL_UNREACHABLE(); 943 return; 944 } 945 } 946 947 // Common specialized accessors for the set_reg() template. 948 949 // Commonly-used special cases. 950 template<typename T> 951 void set_lr(T value) { 952 set_reg(kLinkRegCode, value); 953 } 954 955 template<typename T> 956 void set_sp(T value) { 957 set_reg(31, value, LogRegWrites, Reg31IsStackPointer); 958 } 959 960 // Vector register accessors. 961 // These are equivalent to the integer register accessors, but for vector 962 // registers. 963 964 // A structure for representing a 128-bit Q register. 965 struct qreg_t { uint8_t val[kQRegSizeInBytes]; }; 966 967 // Basic accessor: read the register as the specified type. 968 template<typename T> 969 T vreg(unsigned code) const { 970 VIXL_STATIC_ASSERT((sizeof(T) == kBRegSizeInBytes) || 971 (sizeof(T) == kHRegSizeInBytes) || 972 (sizeof(T) == kSRegSizeInBytes) || 973 (sizeof(T) == kDRegSizeInBytes) || 974 (sizeof(T) == kQRegSizeInBytes)); 975 VIXL_ASSERT(code < kNumberOfVRegisters); 976 977 return vregisters_[code].Get<T>(); 978 } 979 980 // Common specialized accessors for the vreg() template. 981 int8_t breg(unsigned code) const { 982 return vreg<int8_t>(code); 983 } 984 985 int16_t hreg(unsigned code) const { 986 return vreg<int16_t>(code); 987 } 988 989 float sreg(unsigned code) const { 990 return vreg<float>(code); 991 } 992 993 uint32_t sreg_bits(unsigned code) const { 994 return vreg<uint32_t>(code); 995 } 996 997 double dreg(unsigned code) const { 998 return vreg<double>(code); 999 } 1000 1001 uint64_t dreg_bits(unsigned code) const { 1002 return vreg<uint64_t>(code); 1003 } 1004 1005 qreg_t qreg(unsigned code) const { 1006 return vreg<qreg_t>(code); 1007 } 1008 1009 // As above, with parameterized size and return type. The value is 1010 // either zero-extended or truncated to fit, as required. 1011 template<typename T> 1012 T vreg(unsigned size, unsigned code) const { 1013 uint64_t raw = 0; 1014 T result; 1015 1016 switch (size) { 1017 case kSRegSize: raw = vreg<uint32_t>(code); break; 1018 case kDRegSize: raw = vreg<uint64_t>(code); break; 1019 default: 1020 VIXL_UNREACHABLE(); 1021 break; 1022 } 1023 1024 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(raw)); 1025 // Copy the result and truncate to fit. This assumes a little-endian host. 1026 memcpy(&result, &raw, sizeof(result)); 1027 return result; 1028 } 1029 1030 inline SimVRegister& vreg(unsigned code) { 1031 return vregisters_[code]; 1032 } 1033 1034 // Basic accessor: Write the specified value. 1035 template<typename T> 1036 void set_vreg(unsigned code, T value, 1037 RegLogMode log_mode = LogRegWrites) { 1038 VIXL_STATIC_ASSERT((sizeof(value) == kBRegSizeInBytes) || 1039 (sizeof(value) == kHRegSizeInBytes) || 1040 (sizeof(value) == kSRegSizeInBytes) || 1041 (sizeof(value) == kDRegSizeInBytes) || 1042 (sizeof(value) == kQRegSizeInBytes)); 1043 VIXL_ASSERT(code < kNumberOfVRegisters); 1044 vregisters_[code].Set(value); 1045 1046 if (log_mode == LogRegWrites) { 1047 LogVRegister(code, GetPrintRegisterFormat(value)); 1048 } 1049 } 1050 1051 // Common specialized accessors for the set_vreg() template. 1052 void set_breg(unsigned code, int8_t value, 1053 RegLogMode log_mode = LogRegWrites) { 1054 set_vreg(code, value, log_mode); 1055 } 1056 1057 void set_hreg(unsigned code, int16_t value, 1058 RegLogMode log_mode = LogRegWrites) { 1059 set_vreg(code, value, log_mode); 1060 } 1061 1062 void set_sreg(unsigned code, float value, 1063 RegLogMode log_mode = LogRegWrites) { 1064 set_vreg(code, value, log_mode); 1065 } 1066 1067 void set_sreg_bits(unsigned code, uint32_t value, 1068 RegLogMode log_mode = LogRegWrites) { 1069 set_vreg(code, value, log_mode); 1070 } 1071 1072 void set_dreg(unsigned code, double value, 1073 RegLogMode log_mode = LogRegWrites) { 1074 set_vreg(code, value, log_mode); 1075 } 1076 1077 void set_dreg_bits(unsigned code, uint64_t value, 1078 RegLogMode log_mode = LogRegWrites) { 1079 set_vreg(code, value, log_mode); 1080 } 1081 1082 void set_qreg(unsigned code, qreg_t value, 1083 RegLogMode log_mode = LogRegWrites) { 1084 set_vreg(code, value, log_mode); 1085 } 1086 1087 bool N() const { return nzcv_.N() != 0; } 1088 bool Z() const { return nzcv_.Z() != 0; } 1089 bool C() const { return nzcv_.C() != 0; } 1090 bool V() const { return nzcv_.V() != 0; } 1091 SimSystemRegister& nzcv() { return nzcv_; } 1092 1093 // TODO: Find a way to make the fpcr_ members return the proper types, so 1094 // these accessors are not necessary. 1095 FPRounding RMode() { return static_cast<FPRounding>(fpcr_.RMode()); } 1096 bool DN() { return fpcr_.DN() != 0; } 1097 SimSystemRegister& fpcr() { return fpcr_; } 1098 1099 // Specify relevant register formats for Print(V)Register and related helpers. 1100 enum PrintRegisterFormat { 1101 // The lane size. 1102 kPrintRegLaneSizeB = 0 << 0, 1103 kPrintRegLaneSizeH = 1 << 0, 1104 kPrintRegLaneSizeS = 2 << 0, 1105 kPrintRegLaneSizeW = kPrintRegLaneSizeS, 1106 kPrintRegLaneSizeD = 3 << 0, 1107 kPrintRegLaneSizeX = kPrintRegLaneSizeD, 1108 kPrintRegLaneSizeQ = 4 << 0, 1109 1110 kPrintRegLaneSizeOffset = 0, 1111 kPrintRegLaneSizeMask = 7 << 0, 1112 1113 // The lane count. 1114 kPrintRegAsScalar = 0, 1115 kPrintRegAsDVector = 1 << 3, 1116 kPrintRegAsQVector = 2 << 3, 1117 1118 kPrintRegAsVectorMask = 3 << 3, 1119 1120 // Indicate floating-point format lanes. (This flag is only supported for S- 1121 // and D-sized lanes.) 1122 kPrintRegAsFP = 1 << 5, 1123 1124 // Supported combinations. 1125 1126 kPrintXReg = kPrintRegLaneSizeX | kPrintRegAsScalar, 1127 kPrintWReg = kPrintRegLaneSizeW | kPrintRegAsScalar, 1128 kPrintSReg = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, 1129 kPrintDReg = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, 1130 1131 kPrintReg1B = kPrintRegLaneSizeB | kPrintRegAsScalar, 1132 kPrintReg8B = kPrintRegLaneSizeB | kPrintRegAsDVector, 1133 kPrintReg16B = kPrintRegLaneSizeB | kPrintRegAsQVector, 1134 kPrintReg1H = kPrintRegLaneSizeH | kPrintRegAsScalar, 1135 kPrintReg4H = kPrintRegLaneSizeH | kPrintRegAsDVector, 1136 kPrintReg8H = kPrintRegLaneSizeH | kPrintRegAsQVector, 1137 kPrintReg1S = kPrintRegLaneSizeS | kPrintRegAsScalar, 1138 kPrintReg2S = kPrintRegLaneSizeS | kPrintRegAsDVector, 1139 kPrintReg4S = kPrintRegLaneSizeS | kPrintRegAsQVector, 1140 kPrintReg1SFP = kPrintRegLaneSizeS | kPrintRegAsScalar | kPrintRegAsFP, 1141 kPrintReg2SFP = kPrintRegLaneSizeS | kPrintRegAsDVector | kPrintRegAsFP, 1142 kPrintReg4SFP = kPrintRegLaneSizeS | kPrintRegAsQVector | kPrintRegAsFP, 1143 kPrintReg1D = kPrintRegLaneSizeD | kPrintRegAsScalar, 1144 kPrintReg2D = kPrintRegLaneSizeD | kPrintRegAsQVector, 1145 kPrintReg1DFP = kPrintRegLaneSizeD | kPrintRegAsScalar | kPrintRegAsFP, 1146 kPrintReg2DFP = kPrintRegLaneSizeD | kPrintRegAsQVector | kPrintRegAsFP, 1147 kPrintReg1Q = kPrintRegLaneSizeQ | kPrintRegAsScalar 1148 }; 1149 1150 unsigned GetPrintRegLaneSizeInBytesLog2(PrintRegisterFormat format) { 1151 return (format & kPrintRegLaneSizeMask) >> kPrintRegLaneSizeOffset; 1152 } 1153 1154 unsigned GetPrintRegLaneSizeInBytes(PrintRegisterFormat format) { 1155 return 1 << GetPrintRegLaneSizeInBytesLog2(format); 1156 } 1157 1158 unsigned GetPrintRegSizeInBytesLog2(PrintRegisterFormat format) { 1159 if (format & kPrintRegAsDVector) return kDRegSizeInBytesLog2; 1160 if (format & kPrintRegAsQVector) return kQRegSizeInBytesLog2; 1161 1162 // Scalar types. 1163 return GetPrintRegLaneSizeInBytesLog2(format); 1164 } 1165 1166 unsigned GetPrintRegSizeInBytes(PrintRegisterFormat format) { 1167 return 1 << GetPrintRegSizeInBytesLog2(format); 1168 } 1169 1170 unsigned GetPrintRegLaneCount(PrintRegisterFormat format) { 1171 unsigned reg_size_log2 = GetPrintRegSizeInBytesLog2(format); 1172 unsigned lane_size_log2 = GetPrintRegLaneSizeInBytesLog2(format); 1173 VIXL_ASSERT(reg_size_log2 >= lane_size_log2); 1174 return 1 << (reg_size_log2 - lane_size_log2); 1175 } 1176 1177 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned reg_size, 1178 unsigned lane_size); 1179 1180 PrintRegisterFormat GetPrintRegisterFormatForSize(unsigned size) { 1181 return GetPrintRegisterFormatForSize(size, size); 1182 } 1183 1184 PrintRegisterFormat GetPrintRegisterFormatForSizeFP(unsigned size) { 1185 switch (size) { 1186 default: VIXL_UNREACHABLE(); return kPrintDReg; 1187 case kDRegSizeInBytes: return kPrintDReg; 1188 case kSRegSizeInBytes: return kPrintSReg; 1189 } 1190 } 1191 1192 PrintRegisterFormat GetPrintRegisterFormatTryFP(PrintRegisterFormat format) { 1193 if ((GetPrintRegLaneSizeInBytes(format) == kSRegSizeInBytes) || 1194 (GetPrintRegLaneSizeInBytes(format) == kDRegSizeInBytes)) { 1195 return static_cast<PrintRegisterFormat>(format | kPrintRegAsFP); 1196 } 1197 return format; 1198 } 1199 1200 template<typename T> 1201 PrintRegisterFormat GetPrintRegisterFormat(T value) { 1202 return GetPrintRegisterFormatForSize(sizeof(value)); 1203 } 1204 1205 PrintRegisterFormat GetPrintRegisterFormat(double value) { 1206 VIXL_STATIC_ASSERT(sizeof(value) == kDRegSizeInBytes); 1207 return GetPrintRegisterFormatForSizeFP(sizeof(value)); 1208 } 1209 1210 PrintRegisterFormat GetPrintRegisterFormat(float value) { 1211 VIXL_STATIC_ASSERT(sizeof(value) == kSRegSizeInBytes); 1212 return GetPrintRegisterFormatForSizeFP(sizeof(value)); 1213 } 1214 1215 PrintRegisterFormat GetPrintRegisterFormat(VectorFormat vform); 1216 1217 // Print all registers of the specified types. 1218 void PrintRegisters(); 1219 void PrintVRegisters(); 1220 void PrintSystemRegisters(); 1221 1222 // As above, but only print the registers that have been updated. 1223 void PrintWrittenRegisters(); 1224 void PrintWrittenVRegisters(); 1225 1226 // As above, but respect LOG_REG and LOG_VREG. 1227 void LogWrittenRegisters() { 1228 if (trace_parameters() & LOG_REGS) PrintWrittenRegisters(); 1229 } 1230 void LogWrittenVRegisters() { 1231 if (trace_parameters() & LOG_VREGS) PrintWrittenVRegisters(); 1232 } 1233 void LogAllWrittenRegisters() { 1234 LogWrittenRegisters(); 1235 LogWrittenVRegisters(); 1236 } 1237 1238 // Print individual register values (after update). 1239 void PrintRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer); 1240 void PrintVRegister(unsigned code, PrintRegisterFormat format); 1241 void PrintSystemRegister(SystemRegister id); 1242 1243 // Like Print* (above), but respect trace_parameters(). 1244 void LogRegister(unsigned code, Reg31Mode r31mode = Reg31IsStackPointer) { 1245 if (trace_parameters() & LOG_REGS) PrintRegister(code, r31mode); 1246 } 1247 void LogVRegister(unsigned code, PrintRegisterFormat format) { 1248 if (trace_parameters() & LOG_VREGS) PrintVRegister(code, format); 1249 } 1250 void LogSystemRegister(SystemRegister id) { 1251 if (trace_parameters() & LOG_SYSREGS) PrintSystemRegister(id); 1252 } 1253 1254 // Print memory accesses. 1255 void PrintRead(uintptr_t address, unsigned reg_code, 1256 PrintRegisterFormat format); 1257 void PrintWrite(uintptr_t address, unsigned reg_code, 1258 PrintRegisterFormat format); 1259 void PrintVRead(uintptr_t address, unsigned reg_code, 1260 PrintRegisterFormat format, unsigned lane); 1261 void PrintVWrite(uintptr_t address, unsigned reg_code, 1262 PrintRegisterFormat format, unsigned lane); 1263 1264 // Like Print* (above), but respect trace_parameters(). 1265 void LogRead(uintptr_t address, unsigned reg_code, 1266 PrintRegisterFormat format) { 1267 if (trace_parameters() & LOG_REGS) PrintRead(address, reg_code, format); 1268 } 1269 void LogWrite(uintptr_t address, unsigned reg_code, 1270 PrintRegisterFormat format) { 1271 if (trace_parameters() & LOG_WRITE) PrintWrite(address, reg_code, format); 1272 } 1273 void LogVRead(uintptr_t address, unsigned reg_code, 1274 PrintRegisterFormat format, unsigned lane = 0) { 1275 if (trace_parameters() & LOG_VREGS) { 1276 PrintVRead(address, reg_code, format, lane); 1277 } 1278 } 1279 void LogVWrite(uintptr_t address, unsigned reg_code, 1280 PrintRegisterFormat format, unsigned lane = 0) { 1281 if (trace_parameters() & LOG_WRITE) { 1282 PrintVWrite(address, reg_code, format, lane); 1283 } 1284 } 1285 1286 // Helper functions for register tracing. 1287 void PrintRegisterRawHelper(unsigned code, Reg31Mode r31mode, 1288 int size_in_bytes = kXRegSizeInBytes); 1289 void PrintVRegisterRawHelper(unsigned code, int bytes = kQRegSizeInBytes, 1290 int lsb = 0); 1291 void PrintVRegisterFPHelper(unsigned code, unsigned lane_size_in_bytes, 1292 int lane_count = 1, int rightmost_lane = 0); 1293 1294 void DoUnreachable(const Instruction* instr); 1295 void DoTrace(const Instruction* instr); 1296 void DoLog(const Instruction* instr); 1297 1298 static const char* WRegNameForCode(unsigned code, 1299 Reg31Mode mode = Reg31IsZeroRegister); 1300 static const char* XRegNameForCode(unsigned code, 1301 Reg31Mode mode = Reg31IsZeroRegister); 1302 static const char* SRegNameForCode(unsigned code); 1303 static const char* DRegNameForCode(unsigned code); 1304 static const char* VRegNameForCode(unsigned code); 1305 1306 bool coloured_trace() const { return coloured_trace_; } 1307 void set_coloured_trace(bool value); 1308 1309 int trace_parameters() const { return trace_parameters_; } 1310 void set_trace_parameters(int parameters); 1311 1312 void set_instruction_stats(bool value); 1313 1314 // Clear the simulated local monitor to force the next store-exclusive 1315 // instruction to fail. 1316 void ClearLocalMonitor() { 1317 local_monitor_.Clear(); 1318 } 1319 1320 void SilenceExclusiveAccessWarning() { 1321 print_exclusive_access_warning_ = false; 1322 } 1323 1324 protected: 1325 const char* clr_normal; 1326 const char* clr_flag_name; 1327 const char* clr_flag_value; 1328 const char* clr_reg_name; 1329 const char* clr_reg_value; 1330 const char* clr_vreg_name; 1331 const char* clr_vreg_value; 1332 const char* clr_memory_address; 1333 const char* clr_warning; 1334 const char* clr_warning_message; 1335 const char* clr_printf; 1336 1337 // Simulation helpers ------------------------------------ 1338 bool ConditionPassed(Condition cond) { 1339 switch (cond) { 1340 case eq: 1341 return Z(); 1342 case ne: 1343 return !Z(); 1344 case hs: 1345 return C(); 1346 case lo: 1347 return !C(); 1348 case mi: 1349 return N(); 1350 case pl: 1351 return !N(); 1352 case vs: 1353 return V(); 1354 case vc: 1355 return !V(); 1356 case hi: 1357 return C() && !Z(); 1358 case ls: 1359 return !(C() && !Z()); 1360 case ge: 1361 return N() == V(); 1362 case lt: 1363 return N() != V(); 1364 case gt: 1365 return !Z() && (N() == V()); 1366 case le: 1367 return !(!Z() && (N() == V())); 1368 case nv: 1369 VIXL_FALLTHROUGH(); 1370 case al: 1371 return true; 1372 default: 1373 VIXL_UNREACHABLE(); 1374 return false; 1375 } 1376 } 1377 1378 bool ConditionPassed(Instr cond) { 1379 return ConditionPassed(static_cast<Condition>(cond)); 1380 } 1381 1382 bool ConditionFailed(Condition cond) { 1383 return !ConditionPassed(cond); 1384 } 1385 1386 void AddSubHelper(const Instruction* instr, int64_t op2); 1387 int64_t AddWithCarry(unsigned reg_size, 1388 bool set_flags, 1389 int64_t src1, 1390 int64_t src2, 1391 int64_t carry_in = 0); 1392 void LogicalHelper(const Instruction* instr, int64_t op2); 1393 void ConditionalCompareHelper(const Instruction* instr, int64_t op2); 1394 void LoadStoreHelper(const Instruction* instr, 1395 int64_t offset, 1396 AddrMode addrmode); 1397 void LoadStorePairHelper(const Instruction* instr, AddrMode addrmode); 1398 uintptr_t AddressModeHelper(unsigned addr_reg, 1399 int64_t offset, 1400 AddrMode addrmode); 1401 void NEONLoadStoreMultiStructHelper(const Instruction* instr, 1402 AddrMode addr_mode); 1403 void NEONLoadStoreSingleStructHelper(const Instruction* instr, 1404 AddrMode addr_mode); 1405 1406 uint64_t AddressUntag(uint64_t address) { 1407 return address & ~kAddressTagMask; 1408 } 1409 1410 template <typename T> 1411 T* AddressUntag(T* address) { 1412 uintptr_t address_raw = reinterpret_cast<uintptr_t>(address); 1413 return reinterpret_cast<T*>(AddressUntag(address_raw)); 1414 } 1415 1416 int64_t ShiftOperand(unsigned reg_size, 1417 int64_t value, 1418 Shift shift_type, 1419 unsigned amount); 1420 int64_t Rotate(unsigned reg_width, 1421 int64_t value, 1422 Shift shift_type, 1423 unsigned amount); 1424 int64_t ExtendValue(unsigned reg_width, 1425 int64_t value, 1426 Extend extend_type, 1427 unsigned left_shift = 0); 1428 1429 enum ReverseByteMode { 1430 Reverse16 = 0, 1431 Reverse32 = 1, 1432 Reverse64 = 2 1433 }; 1434 uint64_t ReverseBytes(uint64_t value, ReverseByteMode mode); 1435 uint64_t ReverseBits(uint64_t value, unsigned num_bits); 1436 uint16_t PolynomialMult(uint8_t op1, uint8_t op2); 1437 1438 void ld1(VectorFormat vform, 1439 LogicVRegister dst, 1440 uint64_t addr); 1441 void ld1(VectorFormat vform, 1442 LogicVRegister dst, 1443 int index, 1444 uint64_t addr); 1445 void ld1r(VectorFormat vform, 1446 LogicVRegister dst, 1447 uint64_t addr); 1448 void ld2(VectorFormat vform, 1449 LogicVRegister dst1, 1450 LogicVRegister dst2, 1451 uint64_t addr); 1452 void ld2(VectorFormat vform, 1453 LogicVRegister dst1, 1454 LogicVRegister dst2, 1455 int index, 1456 uint64_t addr); 1457 void ld2r(VectorFormat vform, 1458 LogicVRegister dst1, 1459 LogicVRegister dst2, 1460 uint64_t addr); 1461 void ld3(VectorFormat vform, 1462 LogicVRegister dst1, 1463 LogicVRegister dst2, 1464 LogicVRegister dst3, 1465 uint64_t addr); 1466 void ld3(VectorFormat vform, 1467 LogicVRegister dst1, 1468 LogicVRegister dst2, 1469 LogicVRegister dst3, 1470 int index, 1471 uint64_t addr); 1472 void ld3r(VectorFormat vform, 1473 LogicVRegister dst1, 1474 LogicVRegister dst2, 1475 LogicVRegister dst3, 1476 uint64_t addr); 1477 void ld4(VectorFormat vform, 1478 LogicVRegister dst1, 1479 LogicVRegister dst2, 1480 LogicVRegister dst3, 1481 LogicVRegister dst4, 1482 uint64_t addr); 1483 void ld4(VectorFormat vform, 1484 LogicVRegister dst1, 1485 LogicVRegister dst2, 1486 LogicVRegister dst3, 1487 LogicVRegister dst4, 1488 int index, 1489 uint64_t addr); 1490 void ld4r(VectorFormat vform, 1491 LogicVRegister dst1, 1492 LogicVRegister dst2, 1493 LogicVRegister dst3, 1494 LogicVRegister dst4, 1495 uint64_t addr); 1496 void st1(VectorFormat vform, 1497 LogicVRegister src, 1498 uint64_t addr); 1499 void st1(VectorFormat vform, 1500 LogicVRegister src, 1501 int index, 1502 uint64_t addr); 1503 void st2(VectorFormat vform, 1504 LogicVRegister src, 1505 LogicVRegister src2, 1506 uint64_t addr); 1507 void st2(VectorFormat vform, 1508 LogicVRegister src, 1509 LogicVRegister src2, 1510 int index, 1511 uint64_t addr); 1512 void st3(VectorFormat vform, 1513 LogicVRegister src, 1514 LogicVRegister src2, 1515 LogicVRegister src3, 1516 uint64_t addr); 1517 void st3(VectorFormat vform, 1518 LogicVRegister src, 1519 LogicVRegister src2, 1520 LogicVRegister src3, 1521 int index, 1522 uint64_t addr); 1523 void st4(VectorFormat vform, 1524 LogicVRegister src, 1525 LogicVRegister src2, 1526 LogicVRegister src3, 1527 LogicVRegister src4, 1528 uint64_t addr); 1529 void st4(VectorFormat vform, 1530 LogicVRegister src, 1531 LogicVRegister src2, 1532 LogicVRegister src3, 1533 LogicVRegister src4, 1534 int index, 1535 uint64_t addr); 1536 LogicVRegister cmp(VectorFormat vform, 1537 LogicVRegister dst, 1538 const LogicVRegister& src1, 1539 const LogicVRegister& src2, 1540 Condition cond); 1541 LogicVRegister cmp(VectorFormat vform, 1542 LogicVRegister dst, 1543 const LogicVRegister& src1, 1544 int imm, 1545 Condition cond); 1546 LogicVRegister cmptst(VectorFormat vform, 1547 LogicVRegister dst, 1548 const LogicVRegister& src1, 1549 const LogicVRegister& src2); 1550 LogicVRegister add(VectorFormat vform, 1551 LogicVRegister dst, 1552 const LogicVRegister& src1, 1553 const LogicVRegister& src2); 1554 LogicVRegister addp(VectorFormat vform, 1555 LogicVRegister dst, 1556 const LogicVRegister& src1, 1557 const LogicVRegister& src2); 1558 LogicVRegister mla(VectorFormat vform, 1559 LogicVRegister dst, 1560 const LogicVRegister& src1, 1561 const LogicVRegister& src2); 1562 LogicVRegister mls(VectorFormat vform, 1563 LogicVRegister dst, 1564 const LogicVRegister& src1, 1565 const LogicVRegister& src2); 1566 LogicVRegister mul(VectorFormat vform, 1567 LogicVRegister dst, 1568 const LogicVRegister& src1, 1569 const LogicVRegister& src2); 1570 LogicVRegister mul(VectorFormat vform, 1571 LogicVRegister dst, 1572 const LogicVRegister& src1, 1573 const LogicVRegister& src2, 1574 int index); 1575 LogicVRegister mla(VectorFormat vform, 1576 LogicVRegister dst, 1577 const LogicVRegister& src1, 1578 const LogicVRegister& src2, 1579 int index); 1580 LogicVRegister mls(VectorFormat vform, 1581 LogicVRegister dst, 1582 const LogicVRegister& src1, 1583 const LogicVRegister& src2, 1584 int index); 1585 LogicVRegister pmul(VectorFormat vform, 1586 LogicVRegister dst, 1587 const LogicVRegister& src1, 1588 const LogicVRegister& src2); 1589 1590 typedef LogicVRegister (Simulator::*ByElementOp)(VectorFormat vform, 1591 LogicVRegister dst, 1592 const LogicVRegister& src1, 1593 const LogicVRegister& src2, 1594 int index); 1595 LogicVRegister fmul(VectorFormat vform, 1596 LogicVRegister dst, 1597 const LogicVRegister& src1, 1598 const LogicVRegister& src2, 1599 int index); 1600 LogicVRegister fmla(VectorFormat vform, 1601 LogicVRegister dst, 1602 const LogicVRegister& src1, 1603 const LogicVRegister& src2, 1604 int index); 1605 LogicVRegister fmls(VectorFormat vform, 1606 LogicVRegister dst, 1607 const LogicVRegister& src1, 1608 const LogicVRegister& src2, 1609 int index); 1610 LogicVRegister fmulx(VectorFormat vform, 1611 LogicVRegister dst, 1612 const LogicVRegister& src1, 1613 const LogicVRegister& src2, 1614 int index); 1615 LogicVRegister smull(VectorFormat vform, 1616 LogicVRegister dst, 1617 const LogicVRegister& src1, 1618 const LogicVRegister& src2, 1619 int index); 1620 LogicVRegister smull2(VectorFormat vform, 1621 LogicVRegister dst, 1622 const LogicVRegister& src1, 1623 const LogicVRegister& src2, 1624 int index); 1625 LogicVRegister umull(VectorFormat vform, 1626 LogicVRegister dst, 1627 const LogicVRegister& src1, 1628 const LogicVRegister& src2, 1629 int index); 1630 LogicVRegister umull2(VectorFormat vform, 1631 LogicVRegister dst, 1632 const LogicVRegister& src1, 1633 const LogicVRegister& src2, 1634 int index); 1635 LogicVRegister smlal(VectorFormat vform, 1636 LogicVRegister dst, 1637 const LogicVRegister& src1, 1638 const LogicVRegister& src2, 1639 int index); 1640 LogicVRegister smlal2(VectorFormat vform, 1641 LogicVRegister dst, 1642 const LogicVRegister& src1, 1643 const LogicVRegister& src2, 1644 int index); 1645 LogicVRegister umlal(VectorFormat vform, 1646 LogicVRegister dst, 1647 const LogicVRegister& src1, 1648 const LogicVRegister& src2, 1649 int index); 1650 LogicVRegister umlal2(VectorFormat vform, 1651 LogicVRegister dst, 1652 const LogicVRegister& src1, 1653 const LogicVRegister& src2, 1654 int index); 1655 LogicVRegister smlsl(VectorFormat vform, 1656 LogicVRegister dst, 1657 const LogicVRegister& src1, 1658 const LogicVRegister& src2, 1659 int index); 1660 LogicVRegister smlsl2(VectorFormat vform, 1661 LogicVRegister dst, 1662 const LogicVRegister& src1, 1663 const LogicVRegister& src2, 1664 int index); 1665 LogicVRegister umlsl(VectorFormat vform, 1666 LogicVRegister dst, 1667 const LogicVRegister& src1, 1668 const LogicVRegister& src2, 1669 int index); 1670 LogicVRegister umlsl2(VectorFormat vform, 1671 LogicVRegister dst, 1672 const LogicVRegister& src1, 1673 const LogicVRegister& src2, 1674 int index); 1675 LogicVRegister sqdmull(VectorFormat vform, 1676 LogicVRegister dst, 1677 const LogicVRegister& src1, 1678 const LogicVRegister& src2, 1679 int index); 1680 LogicVRegister sqdmull2(VectorFormat vform, 1681 LogicVRegister dst, 1682 const LogicVRegister& src1, 1683 const LogicVRegister& src2, 1684 int index); 1685 LogicVRegister sqdmlal(VectorFormat vform, 1686 LogicVRegister dst, 1687 const LogicVRegister& src1, 1688 const LogicVRegister& src2, 1689 int index); 1690 LogicVRegister sqdmlal2(VectorFormat vform, 1691 LogicVRegister dst, 1692 const LogicVRegister& src1, 1693 const LogicVRegister& src2, 1694 int index); 1695 LogicVRegister sqdmlsl(VectorFormat vform, 1696 LogicVRegister dst, 1697 const LogicVRegister& src1, 1698 const LogicVRegister& src2, 1699 int index); 1700 LogicVRegister sqdmlsl2(VectorFormat vform, 1701 LogicVRegister dst, 1702 const LogicVRegister& src1, 1703 const LogicVRegister& src2, 1704 int index); 1705 LogicVRegister sqdmulh(VectorFormat vform, 1706 LogicVRegister dst, 1707 const LogicVRegister& src1, 1708 const LogicVRegister& src2, 1709 int index); 1710 LogicVRegister sqrdmulh(VectorFormat vform, 1711 LogicVRegister dst, 1712 const LogicVRegister& src1, 1713 const LogicVRegister& src2, 1714 int index); 1715 LogicVRegister sub(VectorFormat vform, 1716 LogicVRegister dst, 1717 const LogicVRegister& src1, 1718 const LogicVRegister& src2); 1719 LogicVRegister and_(VectorFormat vform, 1720 LogicVRegister dst, 1721 const LogicVRegister& src1, 1722 const LogicVRegister& src2); 1723 LogicVRegister orr(VectorFormat vform, 1724 LogicVRegister dst, 1725 const LogicVRegister& src1, 1726 const LogicVRegister& src2); 1727 LogicVRegister orn(VectorFormat vform, 1728 LogicVRegister dst, 1729 const LogicVRegister& src1, 1730 const LogicVRegister& src2); 1731 LogicVRegister eor(VectorFormat vform, 1732 LogicVRegister dst, 1733 const LogicVRegister& src1, 1734 const LogicVRegister& src2); 1735 LogicVRegister bic(VectorFormat vform, 1736 LogicVRegister dst, 1737 const LogicVRegister& src1, 1738 const LogicVRegister& src2); 1739 LogicVRegister bic(VectorFormat vform, 1740 LogicVRegister dst, 1741 const LogicVRegister& src, 1742 uint64_t imm); 1743 LogicVRegister bif(VectorFormat vform, 1744 LogicVRegister dst, 1745 const LogicVRegister& src1, 1746 const LogicVRegister& src2); 1747 LogicVRegister bit(VectorFormat vform, 1748 LogicVRegister dst, 1749 const LogicVRegister& src1, 1750 const LogicVRegister& src2); 1751 LogicVRegister bsl(VectorFormat vform, 1752 LogicVRegister dst, 1753 const LogicVRegister& src1, 1754 const LogicVRegister& src2); 1755 LogicVRegister cls(VectorFormat vform, 1756 LogicVRegister dst, 1757 const LogicVRegister& src); 1758 LogicVRegister clz(VectorFormat vform, 1759 LogicVRegister dst, 1760 const LogicVRegister& src); 1761 LogicVRegister cnt(VectorFormat vform, 1762 LogicVRegister dst, 1763 const LogicVRegister& src); 1764 LogicVRegister not_(VectorFormat vform, 1765 LogicVRegister dst, 1766 const LogicVRegister& src); 1767 LogicVRegister rbit(VectorFormat vform, 1768 LogicVRegister dst, 1769 const LogicVRegister& src); 1770 LogicVRegister rev(VectorFormat vform, 1771 LogicVRegister dst, 1772 const LogicVRegister& src, 1773 int revSize); 1774 LogicVRegister rev16(VectorFormat vform, 1775 LogicVRegister dst, 1776 const LogicVRegister& src); 1777 LogicVRegister rev32(VectorFormat vform, 1778 LogicVRegister dst, 1779 const LogicVRegister& src); 1780 LogicVRegister rev64(VectorFormat vform, 1781 LogicVRegister dst, 1782 const LogicVRegister& src); 1783 LogicVRegister addlp(VectorFormat vform, 1784 LogicVRegister dst, 1785 const LogicVRegister& src, 1786 bool is_signed, 1787 bool do_accumulate); 1788 LogicVRegister saddlp(VectorFormat vform, 1789 LogicVRegister dst, 1790 const LogicVRegister& src); 1791 LogicVRegister uaddlp(VectorFormat vform, 1792 LogicVRegister dst, 1793 const LogicVRegister& src); 1794 LogicVRegister sadalp(VectorFormat vform, 1795 LogicVRegister dst, 1796 const LogicVRegister& src); 1797 LogicVRegister uadalp(VectorFormat vform, 1798 LogicVRegister dst, 1799 const LogicVRegister& src); 1800 LogicVRegister ext(VectorFormat vform, 1801 LogicVRegister dst, 1802 const LogicVRegister& src1, 1803 const LogicVRegister& src2, 1804 int index); 1805 LogicVRegister ins_element(VectorFormat vform, 1806 LogicVRegister dst, 1807 int dst_index, 1808 const LogicVRegister& src, 1809 int src_index); 1810 LogicVRegister ins_immediate(VectorFormat vform, 1811 LogicVRegister dst, 1812 int dst_index, 1813 uint64_t imm); 1814 LogicVRegister dup_element(VectorFormat vform, 1815 LogicVRegister dst, 1816 const LogicVRegister& src, 1817 int src_index); 1818 LogicVRegister dup_immediate(VectorFormat vform, 1819 LogicVRegister dst, 1820 uint64_t imm); 1821 LogicVRegister movi(VectorFormat vform, 1822 LogicVRegister dst, 1823 uint64_t imm); 1824 LogicVRegister mvni(VectorFormat vform, 1825 LogicVRegister dst, 1826 uint64_t imm); 1827 LogicVRegister orr(VectorFormat vform, 1828 LogicVRegister dst, 1829 const LogicVRegister& src, 1830 uint64_t imm); 1831 LogicVRegister sshl(VectorFormat vform, 1832 LogicVRegister dst, 1833 const LogicVRegister& src1, 1834 const LogicVRegister& src2); 1835 LogicVRegister ushl(VectorFormat vform, 1836 LogicVRegister dst, 1837 const LogicVRegister& src1, 1838 const LogicVRegister& src2); 1839 LogicVRegister sminmax(VectorFormat vform, 1840 LogicVRegister dst, 1841 const LogicVRegister& src1, 1842 const LogicVRegister& src2, 1843 bool max); 1844 LogicVRegister smax(VectorFormat vform, 1845 LogicVRegister dst, 1846 const LogicVRegister& src1, 1847 const LogicVRegister& src2); 1848 LogicVRegister smin(VectorFormat vform, 1849 LogicVRegister dst, 1850 const LogicVRegister& src1, 1851 const LogicVRegister& src2); 1852 LogicVRegister sminmaxp(VectorFormat vform, 1853 LogicVRegister dst, 1854 int dst_index, 1855 const LogicVRegister& src, 1856 bool max); 1857 LogicVRegister smaxp(VectorFormat vform, 1858 LogicVRegister dst, 1859 const LogicVRegister& src1, 1860 const LogicVRegister& src2); 1861 LogicVRegister sminp(VectorFormat vform, 1862 LogicVRegister dst, 1863 const LogicVRegister& src1, 1864 const LogicVRegister& src2); 1865 LogicVRegister addp(VectorFormat vform, 1866 LogicVRegister dst, 1867 const LogicVRegister& src); 1868 LogicVRegister addv(VectorFormat vform, 1869 LogicVRegister dst, 1870 const LogicVRegister& src); 1871 LogicVRegister uaddlv(VectorFormat vform, 1872 LogicVRegister dst, 1873 const LogicVRegister& src); 1874 LogicVRegister saddlv(VectorFormat vform, 1875 LogicVRegister dst, 1876 const LogicVRegister& src); 1877 LogicVRegister sminmaxv(VectorFormat vform, 1878 LogicVRegister dst, 1879 const LogicVRegister& src, 1880 bool max); 1881 LogicVRegister smaxv(VectorFormat vform, 1882 LogicVRegister dst, 1883 const LogicVRegister& src); 1884 LogicVRegister sminv(VectorFormat vform, 1885 LogicVRegister dst, 1886 const LogicVRegister& src); 1887 LogicVRegister uxtl(VectorFormat vform, 1888 LogicVRegister dst, 1889 const LogicVRegister& src); 1890 LogicVRegister uxtl2(VectorFormat vform, 1891 LogicVRegister dst, 1892 const LogicVRegister& src); 1893 LogicVRegister sxtl(VectorFormat vform, 1894 LogicVRegister dst, 1895 const LogicVRegister& src); 1896 LogicVRegister sxtl2(VectorFormat vform, 1897 LogicVRegister dst, 1898 const LogicVRegister& src); 1899 LogicVRegister tbl(VectorFormat vform, 1900 LogicVRegister dst, 1901 const LogicVRegister& tab, 1902 const LogicVRegister& ind); 1903 LogicVRegister tbl(VectorFormat vform, 1904 LogicVRegister dst, 1905 const LogicVRegister& tab, 1906 const LogicVRegister& tab2, 1907 const LogicVRegister& ind); 1908 LogicVRegister tbl(VectorFormat vform, 1909 LogicVRegister dst, 1910 const LogicVRegister& tab, 1911 const LogicVRegister& tab2, 1912 const LogicVRegister& tab3, 1913 const LogicVRegister& ind); 1914 LogicVRegister tbl(VectorFormat vform, 1915 LogicVRegister dst, 1916 const LogicVRegister& tab, 1917 const LogicVRegister& tab2, 1918 const LogicVRegister& tab3, 1919 const LogicVRegister& tab4, 1920 const LogicVRegister& ind); 1921 LogicVRegister tbx(VectorFormat vform, 1922 LogicVRegister dst, 1923 const LogicVRegister& tab, 1924 const LogicVRegister& ind); 1925 LogicVRegister tbx(VectorFormat vform, 1926 LogicVRegister dst, 1927 const LogicVRegister& tab, 1928 const LogicVRegister& tab2, 1929 const LogicVRegister& ind); 1930 LogicVRegister tbx(VectorFormat vform, 1931 LogicVRegister dst, 1932 const LogicVRegister& tab, 1933 const LogicVRegister& tab2, 1934 const LogicVRegister& tab3, 1935 const LogicVRegister& ind); 1936 LogicVRegister tbx(VectorFormat vform, 1937 LogicVRegister dst, 1938 const LogicVRegister& tab, 1939 const LogicVRegister& tab2, 1940 const LogicVRegister& tab3, 1941 const LogicVRegister& tab4, 1942 const LogicVRegister& ind); 1943 LogicVRegister uaddl(VectorFormat vform, 1944 LogicVRegister dst, 1945 const LogicVRegister& src1, 1946 const LogicVRegister& src2); 1947 LogicVRegister uaddl2(VectorFormat vform, 1948 LogicVRegister dst, 1949 const LogicVRegister& src1, 1950 const LogicVRegister& src2); 1951 LogicVRegister uaddw(VectorFormat vform, 1952 LogicVRegister dst, 1953 const LogicVRegister& src1, 1954 const LogicVRegister& src2); 1955 LogicVRegister uaddw2(VectorFormat vform, 1956 LogicVRegister dst, 1957 const LogicVRegister& src1, 1958 const LogicVRegister& src2); 1959 LogicVRegister saddl(VectorFormat vform, 1960 LogicVRegister dst, 1961 const LogicVRegister& src1, 1962 const LogicVRegister& src2); 1963 LogicVRegister saddl2(VectorFormat vform, 1964 LogicVRegister dst, 1965 const LogicVRegister& src1, 1966 const LogicVRegister& src2); 1967 LogicVRegister saddw(VectorFormat vform, 1968 LogicVRegister dst, 1969 const LogicVRegister& src1, 1970 const LogicVRegister& src2); 1971 LogicVRegister saddw2(VectorFormat vform, 1972 LogicVRegister dst, 1973 const LogicVRegister& src1, 1974 const LogicVRegister& src2); 1975 LogicVRegister usubl(VectorFormat vform, 1976 LogicVRegister dst, 1977 const LogicVRegister& src1, 1978 const LogicVRegister& src2); 1979 LogicVRegister usubl2(VectorFormat vform, 1980 LogicVRegister dst, 1981 const LogicVRegister& src1, 1982 const LogicVRegister& src2); 1983 LogicVRegister usubw(VectorFormat vform, 1984 LogicVRegister dst, 1985 const LogicVRegister& src1, 1986 const LogicVRegister& src2); 1987 LogicVRegister usubw2(VectorFormat vform, 1988 LogicVRegister dst, 1989 const LogicVRegister& src1, 1990 const LogicVRegister& src2); 1991 LogicVRegister ssubl(VectorFormat vform, 1992 LogicVRegister dst, 1993 const LogicVRegister& src1, 1994 const LogicVRegister& src2); 1995 LogicVRegister ssubl2(VectorFormat vform, 1996 LogicVRegister dst, 1997 const LogicVRegister& src1, 1998 const LogicVRegister& src2); 1999 LogicVRegister ssubw(VectorFormat vform, 2000 LogicVRegister dst, 2001 const LogicVRegister& src1, 2002 const LogicVRegister& src2); 2003 LogicVRegister ssubw2(VectorFormat vform, 2004 LogicVRegister dst, 2005 const LogicVRegister& src1, 2006 const LogicVRegister& src2); 2007 LogicVRegister uminmax(VectorFormat vform, 2008 LogicVRegister dst, 2009 const LogicVRegister& src1, 2010 const LogicVRegister& src2, 2011 bool max); 2012 LogicVRegister umax(VectorFormat vform, 2013 LogicVRegister dst, 2014 const LogicVRegister& src1, 2015 const LogicVRegister& src2); 2016 LogicVRegister umin(VectorFormat vform, 2017 LogicVRegister dst, 2018 const LogicVRegister& src1, 2019 const LogicVRegister& src2); 2020 LogicVRegister uminmaxp(VectorFormat vform, 2021 LogicVRegister dst, 2022 int dst_index, 2023 const LogicVRegister& src, 2024 bool max); 2025 LogicVRegister umaxp(VectorFormat vform, 2026 LogicVRegister dst, 2027 const LogicVRegister& src1, 2028 const LogicVRegister& src2); 2029 LogicVRegister uminp(VectorFormat vform, 2030 LogicVRegister dst, 2031 const LogicVRegister& src1, 2032 const LogicVRegister& src2); 2033 LogicVRegister uminmaxv(VectorFormat vform, 2034 LogicVRegister dst, 2035 const LogicVRegister& src, 2036 bool max); 2037 LogicVRegister umaxv(VectorFormat vform, 2038 LogicVRegister dst, 2039 const LogicVRegister& src); 2040 LogicVRegister uminv(VectorFormat vform, 2041 LogicVRegister dst, 2042 const LogicVRegister& src); 2043 LogicVRegister trn1(VectorFormat vform, 2044 LogicVRegister dst, 2045 const LogicVRegister& src1, 2046 const LogicVRegister& src2); 2047 LogicVRegister trn2(VectorFormat vform, 2048 LogicVRegister dst, 2049 const LogicVRegister& src1, 2050 const LogicVRegister& src2); 2051 LogicVRegister zip1(VectorFormat vform, 2052 LogicVRegister dst, 2053 const LogicVRegister& src1, 2054 const LogicVRegister& src2); 2055 LogicVRegister zip2(VectorFormat vform, 2056 LogicVRegister dst, 2057 const LogicVRegister& src1, 2058 const LogicVRegister& src2); 2059 LogicVRegister uzp1(VectorFormat vform, 2060 LogicVRegister dst, 2061 const LogicVRegister& src1, 2062 const LogicVRegister& src2); 2063 LogicVRegister uzp2(VectorFormat vform, 2064 LogicVRegister dst, 2065 const LogicVRegister& src1, 2066 const LogicVRegister& src2); 2067 LogicVRegister shl(VectorFormat vform, 2068 LogicVRegister dst, 2069 const LogicVRegister& src, 2070 int shift); 2071 LogicVRegister scvtf(VectorFormat vform, 2072 LogicVRegister dst, 2073 const LogicVRegister& src, 2074 int fbits, 2075 FPRounding rounding_mode); 2076 LogicVRegister ucvtf(VectorFormat vform, 2077 LogicVRegister dst, 2078 const LogicVRegister& src, 2079 int fbits, 2080 FPRounding rounding_mode); 2081 LogicVRegister sshll(VectorFormat vform, 2082 LogicVRegister dst, 2083 const LogicVRegister& src, 2084 int shift); 2085 LogicVRegister sshll2(VectorFormat vform, 2086 LogicVRegister dst, 2087 const LogicVRegister& src, 2088 int shift); 2089 LogicVRegister shll(VectorFormat vform, 2090 LogicVRegister dst, 2091 const LogicVRegister& src); 2092 LogicVRegister shll2(VectorFormat vform, 2093 LogicVRegister dst, 2094 const LogicVRegister& src); 2095 LogicVRegister ushll(VectorFormat vform, 2096 LogicVRegister dst, 2097 const LogicVRegister& src, 2098 int shift); 2099 LogicVRegister ushll2(VectorFormat vform, 2100 LogicVRegister dst, 2101 const LogicVRegister& src, 2102 int shift); 2103 LogicVRegister sli(VectorFormat vform, 2104 LogicVRegister dst, 2105 const LogicVRegister& src, 2106 int shift); 2107 LogicVRegister sri(VectorFormat vform, 2108 LogicVRegister dst, 2109 const LogicVRegister& src, 2110 int shift); 2111 LogicVRegister sshr(VectorFormat vform, 2112 LogicVRegister dst, 2113 const LogicVRegister& src, 2114 int shift); 2115 LogicVRegister ushr(VectorFormat vform, 2116 LogicVRegister dst, 2117 const LogicVRegister& src, 2118 int shift); 2119 LogicVRegister ssra(VectorFormat vform, 2120 LogicVRegister dst, 2121 const LogicVRegister& src, 2122 int shift); 2123 LogicVRegister usra(VectorFormat vform, 2124 LogicVRegister dst, 2125 const LogicVRegister& src, 2126 int shift); 2127 LogicVRegister srsra(VectorFormat vform, 2128 LogicVRegister dst, 2129 const LogicVRegister& src, 2130 int shift); 2131 LogicVRegister ursra(VectorFormat vform, 2132 LogicVRegister dst, 2133 const LogicVRegister& src, 2134 int shift); 2135 LogicVRegister suqadd(VectorFormat vform, 2136 LogicVRegister dst, 2137 const LogicVRegister& src); 2138 LogicVRegister usqadd(VectorFormat vform, 2139 LogicVRegister dst, 2140 const LogicVRegister& src); 2141 LogicVRegister sqshl(VectorFormat vform, 2142 LogicVRegister dst, 2143 const LogicVRegister& src, 2144 int shift); 2145 LogicVRegister uqshl(VectorFormat vform, 2146 LogicVRegister dst, 2147 const LogicVRegister& src, 2148 int shift); 2149 LogicVRegister sqshlu(VectorFormat vform, 2150 LogicVRegister dst, 2151 const LogicVRegister& src, 2152 int shift); 2153 LogicVRegister abs(VectorFormat vform, 2154 LogicVRegister dst, 2155 const LogicVRegister& src); 2156 LogicVRegister neg(VectorFormat vform, 2157 LogicVRegister dst, 2158 const LogicVRegister& src); 2159 LogicVRegister extractnarrow(VectorFormat vform, 2160 LogicVRegister dst, 2161 bool dstIsSigned, 2162 const LogicVRegister& src, 2163 bool srcIsSigned); 2164 LogicVRegister xtn(VectorFormat vform, 2165 LogicVRegister dst, 2166 const LogicVRegister& src); 2167 LogicVRegister sqxtn(VectorFormat vform, 2168 LogicVRegister dst, 2169 const LogicVRegister& src); 2170 LogicVRegister uqxtn(VectorFormat vform, 2171 LogicVRegister dst, 2172 const LogicVRegister& src); 2173 LogicVRegister sqxtun(VectorFormat vform, 2174 LogicVRegister dst, 2175 const LogicVRegister& src); 2176 LogicVRegister absdiff(VectorFormat vform, 2177 LogicVRegister dst, 2178 const LogicVRegister& src1, 2179 const LogicVRegister& src2, 2180 bool issigned); 2181 LogicVRegister saba(VectorFormat vform, 2182 LogicVRegister dst, 2183 const LogicVRegister& src1, 2184 const LogicVRegister& src2); 2185 LogicVRegister uaba(VectorFormat vform, 2186 LogicVRegister dst, 2187 const LogicVRegister& src1, 2188 const LogicVRegister& src2); 2189 LogicVRegister shrn(VectorFormat vform, 2190 LogicVRegister dst, 2191 const LogicVRegister& src, 2192 int shift); 2193 LogicVRegister shrn2(VectorFormat vform, 2194 LogicVRegister dst, 2195 const LogicVRegister& src, 2196 int shift); 2197 LogicVRegister rshrn(VectorFormat vform, 2198 LogicVRegister dst, 2199 const LogicVRegister& src, 2200 int shift); 2201 LogicVRegister rshrn2(VectorFormat vform, 2202 LogicVRegister dst, 2203 const LogicVRegister& src, 2204 int shift); 2205 LogicVRegister uqshrn(VectorFormat vform, 2206 LogicVRegister dst, 2207 const LogicVRegister& src, 2208 int shift); 2209 LogicVRegister uqshrn2(VectorFormat vform, 2210 LogicVRegister dst, 2211 const LogicVRegister& src, 2212 int shift); 2213 LogicVRegister uqrshrn(VectorFormat vform, 2214 LogicVRegister dst, 2215 const LogicVRegister& src, 2216 int shift); 2217 LogicVRegister uqrshrn2(VectorFormat vform, 2218 LogicVRegister dst, 2219 const LogicVRegister& src, 2220 int shift); 2221 LogicVRegister sqshrn(VectorFormat vform, 2222 LogicVRegister dst, 2223 const LogicVRegister& src, 2224 int shift); 2225 LogicVRegister sqshrn2(VectorFormat vform, 2226 LogicVRegister dst, 2227 const LogicVRegister& src, 2228 int shift); 2229 LogicVRegister sqrshrn(VectorFormat vform, 2230 LogicVRegister dst, 2231 const LogicVRegister& src, 2232 int shift); 2233 LogicVRegister sqrshrn2(VectorFormat vform, 2234 LogicVRegister dst, 2235 const LogicVRegister& src, 2236 int shift); 2237 LogicVRegister sqshrun(VectorFormat vform, 2238 LogicVRegister dst, 2239 const LogicVRegister& src, 2240 int shift); 2241 LogicVRegister sqshrun2(VectorFormat vform, 2242 LogicVRegister dst, 2243 const LogicVRegister& src, 2244 int shift); 2245 LogicVRegister sqrshrun(VectorFormat vform, 2246 LogicVRegister dst, 2247 const LogicVRegister& src, 2248 int shift); 2249 LogicVRegister sqrshrun2(VectorFormat vform, 2250 LogicVRegister dst, 2251 const LogicVRegister& src, 2252 int shift); 2253 LogicVRegister sqrdmulh(VectorFormat vform, 2254 LogicVRegister dst, 2255 const LogicVRegister& src1, 2256 const LogicVRegister& src2, 2257 bool round = true); 2258 LogicVRegister sqdmulh(VectorFormat vform, 2259 LogicVRegister dst, 2260 const LogicVRegister& src1, 2261 const LogicVRegister& src2); 2262 #define NEON_3VREG_LOGIC_LIST(V) \ 2263 V(addhn) \ 2264 V(addhn2) \ 2265 V(raddhn) \ 2266 V(raddhn2) \ 2267 V(subhn) \ 2268 V(subhn2) \ 2269 V(rsubhn) \ 2270 V(rsubhn2) \ 2271 V(pmull) \ 2272 V(pmull2) \ 2273 V(sabal) \ 2274 V(sabal2) \ 2275 V(uabal) \ 2276 V(uabal2) \ 2277 V(sabdl) \ 2278 V(sabdl2) \ 2279 V(uabdl) \ 2280 V(uabdl2) \ 2281 V(smull) \ 2282 V(smull2) \ 2283 V(umull) \ 2284 V(umull2) \ 2285 V(smlal) \ 2286 V(smlal2) \ 2287 V(umlal) \ 2288 V(umlal2) \ 2289 V(smlsl) \ 2290 V(smlsl2) \ 2291 V(umlsl) \ 2292 V(umlsl2) \ 2293 V(sqdmlal) \ 2294 V(sqdmlal2) \ 2295 V(sqdmlsl) \ 2296 V(sqdmlsl2) \ 2297 V(sqdmull) \ 2298 V(sqdmull2) 2299 2300 #define DEFINE_LOGIC_FUNC(FXN) \ 2301 LogicVRegister FXN(VectorFormat vform, \ 2302 LogicVRegister dst, \ 2303 const LogicVRegister& src1, \ 2304 const LogicVRegister& src2); 2305 NEON_3VREG_LOGIC_LIST(DEFINE_LOGIC_FUNC) 2306 #undef DEFINE_LOGIC_FUNC 2307 2308 #define NEON_FP3SAME_LIST(V) \ 2309 V(fadd, FPAdd, false) \ 2310 V(fsub, FPSub, true) \ 2311 V(fmul, FPMul, true) \ 2312 V(fmulx, FPMulx, true) \ 2313 V(fdiv, FPDiv, true) \ 2314 V(fmax, FPMax, false) \ 2315 V(fmin, FPMin, false) \ 2316 V(fmaxnm, FPMaxNM, false) \ 2317 V(fminnm, FPMinNM, false) 2318 2319 #define DECLARE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 2320 template <typename T> \ 2321 LogicVRegister FN(VectorFormat vform, \ 2322 LogicVRegister dst, \ 2323 const LogicVRegister& src1, \ 2324 const LogicVRegister& src2); \ 2325 LogicVRegister FN(VectorFormat vform, \ 2326 LogicVRegister dst, \ 2327 const LogicVRegister& src1, \ 2328 const LogicVRegister& src2); 2329 NEON_FP3SAME_LIST(DECLARE_NEON_FP_VECTOR_OP) 2330 #undef DECLARE_NEON_FP_VECTOR_OP 2331 2332 #define NEON_FPPAIRWISE_LIST(V) \ 2333 V(faddp, fadd, FPAdd) \ 2334 V(fmaxp, fmax, FPMax) \ 2335 V(fmaxnmp, fmaxnm, FPMaxNM) \ 2336 V(fminp, fmin, FPMin) \ 2337 V(fminnmp, fminnm, FPMinNM) 2338 2339 #define DECLARE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 2340 LogicVRegister FNP(VectorFormat vform, \ 2341 LogicVRegister dst, \ 2342 const LogicVRegister& src1, \ 2343 const LogicVRegister& src2); \ 2344 LogicVRegister FNP(VectorFormat vform, \ 2345 LogicVRegister dst, \ 2346 const LogicVRegister& src); 2347 NEON_FPPAIRWISE_LIST(DECLARE_NEON_FP_PAIR_OP) 2348 #undef DECLARE_NEON_FP_PAIR_OP 2349 2350 template <typename T> 2351 LogicVRegister frecps(VectorFormat vform, 2352 LogicVRegister dst, 2353 const LogicVRegister& src1, 2354 const LogicVRegister& src2); 2355 LogicVRegister frecps(VectorFormat vform, 2356 LogicVRegister dst, 2357 const LogicVRegister& src1, 2358 const LogicVRegister& src2); 2359 template <typename T> 2360 LogicVRegister frsqrts(VectorFormat vform, 2361 LogicVRegister dst, 2362 const LogicVRegister& src1, 2363 const LogicVRegister& src2); 2364 LogicVRegister frsqrts(VectorFormat vform, 2365 LogicVRegister dst, 2366 const LogicVRegister& src1, 2367 const LogicVRegister& src2); 2368 template <typename T> 2369 LogicVRegister fmla(VectorFormat vform, 2370 LogicVRegister dst, 2371 const LogicVRegister& src1, 2372 const LogicVRegister& src2); 2373 LogicVRegister fmla(VectorFormat vform, 2374 LogicVRegister dst, 2375 const LogicVRegister& src1, 2376 const LogicVRegister& src2); 2377 template <typename T> 2378 LogicVRegister fmls(VectorFormat vform, 2379 LogicVRegister dst, 2380 const LogicVRegister& src1, 2381 const LogicVRegister& src2); 2382 LogicVRegister fmls(VectorFormat vform, 2383 LogicVRegister dst, 2384 const LogicVRegister& src1, 2385 const LogicVRegister& src2); 2386 LogicVRegister fnmul(VectorFormat vform, 2387 LogicVRegister dst, 2388 const LogicVRegister& src1, 2389 const LogicVRegister& src2); 2390 2391 template <typename T> 2392 LogicVRegister fcmp(VectorFormat vform, 2393 LogicVRegister dst, 2394 const LogicVRegister& src1, 2395 const LogicVRegister& src2, 2396 Condition cond); 2397 LogicVRegister fcmp(VectorFormat vform, 2398 LogicVRegister dst, 2399 const LogicVRegister& src1, 2400 const LogicVRegister& src2, 2401 Condition cond); 2402 LogicVRegister fabscmp(VectorFormat vform, 2403 LogicVRegister dst, 2404 const LogicVRegister& src1, 2405 const LogicVRegister& src2, 2406 Condition cond); 2407 LogicVRegister fcmp_zero(VectorFormat vform, 2408 LogicVRegister dst, 2409 const LogicVRegister& src, 2410 Condition cond); 2411 2412 template <typename T> 2413 LogicVRegister fneg(VectorFormat vform, 2414 LogicVRegister dst, 2415 const LogicVRegister& src); 2416 LogicVRegister fneg(VectorFormat vform, 2417 LogicVRegister dst, 2418 const LogicVRegister& src); 2419 template <typename T> 2420 LogicVRegister frecpx(VectorFormat vform, 2421 LogicVRegister dst, 2422 const LogicVRegister& src); 2423 LogicVRegister frecpx(VectorFormat vform, 2424 LogicVRegister dst, 2425 const LogicVRegister& src); 2426 template <typename T> 2427 LogicVRegister fabs_(VectorFormat vform, 2428 LogicVRegister dst, 2429 const LogicVRegister& src); 2430 LogicVRegister fabs_(VectorFormat vform, 2431 LogicVRegister dst, 2432 const LogicVRegister& src); 2433 LogicVRegister fabd(VectorFormat vform, 2434 LogicVRegister dst, 2435 const LogicVRegister& src1, 2436 const LogicVRegister& src2); 2437 LogicVRegister frint(VectorFormat vform, 2438 LogicVRegister dst, 2439 const LogicVRegister& src, 2440 FPRounding rounding_mode, 2441 bool inexact_exception = false); 2442 LogicVRegister fcvts(VectorFormat vform, 2443 LogicVRegister dst, 2444 const LogicVRegister& src, 2445 FPRounding rounding_mode, 2446 int fbits = 0); 2447 LogicVRegister fcvtu(VectorFormat vform, 2448 LogicVRegister dst, 2449 const LogicVRegister& src, 2450 FPRounding rounding_mode, 2451 int fbits = 0); 2452 LogicVRegister fcvtl(VectorFormat vform, 2453 LogicVRegister dst, 2454 const LogicVRegister& src); 2455 LogicVRegister fcvtl2(VectorFormat vform, 2456 LogicVRegister dst, 2457 const LogicVRegister& src); 2458 LogicVRegister fcvtn(VectorFormat vform, 2459 LogicVRegister dst, 2460 const LogicVRegister& src); 2461 LogicVRegister fcvtn2(VectorFormat vform, 2462 LogicVRegister dst, 2463 const LogicVRegister& src); 2464 LogicVRegister fcvtxn(VectorFormat vform, 2465 LogicVRegister dst, 2466 const LogicVRegister& src); 2467 LogicVRegister fcvtxn2(VectorFormat vform, 2468 LogicVRegister dst, 2469 const LogicVRegister& src); 2470 LogicVRegister fsqrt(VectorFormat vform, 2471 LogicVRegister dst, 2472 const LogicVRegister& src); 2473 LogicVRegister frsqrte(VectorFormat vform, 2474 LogicVRegister dst, 2475 const LogicVRegister& src); 2476 LogicVRegister frecpe(VectorFormat vform, 2477 LogicVRegister dst, 2478 const LogicVRegister& src, 2479 FPRounding rounding); 2480 LogicVRegister ursqrte(VectorFormat vform, 2481 LogicVRegister dst, 2482 const LogicVRegister& src); 2483 LogicVRegister urecpe(VectorFormat vform, 2484 LogicVRegister dst, 2485 const LogicVRegister& src); 2486 2487 typedef float (Simulator::*FPMinMaxOp)(float a, float b); 2488 2489 LogicVRegister fminmaxv(VectorFormat vform, 2490 LogicVRegister dst, 2491 const LogicVRegister& src, 2492 FPMinMaxOp Op); 2493 2494 LogicVRegister fminv(VectorFormat vform, 2495 LogicVRegister dst, 2496 const LogicVRegister& src); 2497 LogicVRegister fmaxv(VectorFormat vform, 2498 LogicVRegister dst, 2499 const LogicVRegister& src); 2500 LogicVRegister fminnmv(VectorFormat vform, 2501 LogicVRegister dst, 2502 const LogicVRegister& src); 2503 LogicVRegister fmaxnmv(VectorFormat vform, 2504 LogicVRegister dst, 2505 const LogicVRegister& src); 2506 2507 static const uint32_t CRC32_POLY = 0x04C11DB7; 2508 static const uint32_t CRC32C_POLY = 0x1EDC6F41; 2509 uint32_t Poly32Mod2(unsigned n, uint64_t data, uint32_t poly); 2510 template <typename T> 2511 uint32_t Crc32Checksum(uint32_t acc, T val, uint32_t poly); 2512 uint32_t Crc32Checksum(uint32_t acc, uint64_t val, uint32_t poly); 2513 2514 void SysOp_W(int op, int64_t val); 2515 2516 template <typename T> 2517 T FPRecipSqrtEstimate(T op); 2518 template <typename T> 2519 T FPRecipEstimate(T op, FPRounding rounding); 2520 template <typename T, typename R> 2521 R FPToFixed(T op, int fbits, bool is_signed, FPRounding rounding); 2522 2523 void FPCompare(double val0, double val1, FPTrapFlags trap); 2524 double FPRoundInt(double value, FPRounding round_mode); 2525 double FPToDouble(float value); 2526 float FPToFloat(double value, FPRounding round_mode); 2527 float FPToFloat(float16 value); 2528 float16 FPToFloat16(float value, FPRounding round_mode); 2529 float16 FPToFloat16(double value, FPRounding round_mode); 2530 double recip_sqrt_estimate(double a); 2531 double recip_estimate(double a); 2532 double FPRecipSqrtEstimate(double a); 2533 double FPRecipEstimate(double a); 2534 double FixedToDouble(int64_t src, int fbits, FPRounding round_mode); 2535 double UFixedToDouble(uint64_t src, int fbits, FPRounding round_mode); 2536 float FixedToFloat(int64_t src, int fbits, FPRounding round_mode); 2537 float UFixedToFloat(uint64_t src, int fbits, FPRounding round_mode); 2538 int32_t FPToInt32(double value, FPRounding rmode); 2539 int64_t FPToInt64(double value, FPRounding rmode); 2540 uint32_t FPToUInt32(double value, FPRounding rmode); 2541 uint64_t FPToUInt64(double value, FPRounding rmode); 2542 2543 template <typename T> 2544 T FPAdd(T op1, T op2); 2545 2546 template <typename T> 2547 T FPDiv(T op1, T op2); 2548 2549 template <typename T> 2550 T FPMax(T a, T b); 2551 2552 template <typename T> 2553 T FPMaxNM(T a, T b); 2554 2555 template <typename T> 2556 T FPMin(T a, T b); 2557 2558 template <typename T> 2559 T FPMinNM(T a, T b); 2560 2561 template <typename T> 2562 T FPMul(T op1, T op2); 2563 2564 template <typename T> 2565 T FPMulx(T op1, T op2); 2566 2567 template <typename T> 2568 T FPMulAdd(T a, T op1, T op2); 2569 2570 template <typename T> 2571 T FPSqrt(T op); 2572 2573 template <typename T> 2574 T FPSub(T op1, T op2); 2575 2576 template <typename T> 2577 T FPRecipStepFused(T op1, T op2); 2578 2579 template <typename T> 2580 T FPRSqrtStepFused(T op1, T op2); 2581 2582 // This doesn't do anything at the moment. We'll need it if we want support 2583 // for cumulative exception bits or floating-point exceptions. 2584 void FPProcessException() { } 2585 2586 bool FPProcessNaNs(const Instruction* instr); 2587 2588 // Pseudo Printf instruction 2589 void DoPrintf(const Instruction* instr); 2590 2591 // Processor state --------------------------------------- 2592 2593 // Simulated monitors for exclusive access instructions. 2594 SimExclusiveLocalMonitor local_monitor_; 2595 SimExclusiveGlobalMonitor global_monitor_; 2596 2597 // Output stream. 2598 FILE* stream_; 2599 PrintDisassembler* print_disasm_; 2600 2601 // Instruction statistics instrumentation. 2602 Instrument* instrumentation_; 2603 2604 // General purpose registers. Register 31 is the stack pointer. 2605 SimRegister registers_[kNumberOfRegisters]; 2606 2607 // Vector registers 2608 SimVRegister vregisters_[kNumberOfVRegisters]; 2609 2610 // Program Status Register. 2611 // bits[31, 27]: Condition flags N, Z, C, and V. 2612 // (Negative, Zero, Carry, Overflow) 2613 SimSystemRegister nzcv_; 2614 2615 // Floating-Point Control Register 2616 SimSystemRegister fpcr_; 2617 2618 // Only a subset of FPCR features are supported by the simulator. This helper 2619 // checks that the FPCR settings are supported. 2620 // 2621 // This is checked when floating-point instructions are executed, not when 2622 // FPCR is set. This allows generated code to modify FPCR for external 2623 // functions, or to save and restore it when entering and leaving generated 2624 // code. 2625 void AssertSupportedFPCR() { 2626 VIXL_ASSERT(fpcr().FZ() == 0); // No flush-to-zero support. 2627 VIXL_ASSERT(fpcr().RMode() == FPTieEven); // Ties-to-even rounding only. 2628 2629 // The simulator does not support half-precision operations so fpcr().AHP() 2630 // is irrelevant, and is not checked here. 2631 } 2632 2633 static int CalcNFlag(uint64_t result, unsigned reg_size) { 2634 return (result >> (reg_size - 1)) & 1; 2635 } 2636 2637 static int CalcZFlag(uint64_t result) { 2638 return result == 0; 2639 } 2640 2641 static const uint32_t kConditionFlagsMask = 0xf0000000; 2642 2643 // Stack 2644 byte* stack_; 2645 static const int stack_protection_size_ = 256; 2646 // 2 KB stack. 2647 static const int stack_size_ = 2 * 1024 + 2 * stack_protection_size_; 2648 byte* stack_limit_; 2649 2650 Decoder* decoder_; 2651 // Indicates if the pc has been modified by the instruction and should not be 2652 // automatically incremented. 2653 bool pc_modified_; 2654 const Instruction* pc_; 2655 2656 static const char* xreg_names[]; 2657 static const char* wreg_names[]; 2658 static const char* sreg_names[]; 2659 static const char* dreg_names[]; 2660 static const char* vreg_names[]; 2661 2662 static const Instruction* kEndOfSimAddress; 2663 2664 private: 2665 template <typename T> 2666 static T FPDefaultNaN(); 2667 2668 // Standard NaN processing. 2669 template <typename T> 2670 T FPProcessNaN(T op) { 2671 VIXL_ASSERT(std::isnan(op)); 2672 if (IsSignallingNaN(op)) { 2673 FPProcessException(); 2674 } 2675 return DN() ? FPDefaultNaN<T>() : ToQuietNaN(op); 2676 } 2677 2678 template <typename T> 2679 T FPProcessNaNs(T op1, T op2) { 2680 if (IsSignallingNaN(op1)) { 2681 return FPProcessNaN(op1); 2682 } else if (IsSignallingNaN(op2)) { 2683 return FPProcessNaN(op2); 2684 } else if (std::isnan(op1)) { 2685 VIXL_ASSERT(IsQuietNaN(op1)); 2686 return FPProcessNaN(op1); 2687 } else if (std::isnan(op2)) { 2688 VIXL_ASSERT(IsQuietNaN(op2)); 2689 return FPProcessNaN(op2); 2690 } else { 2691 return 0.0; 2692 } 2693 } 2694 2695 template <typename T> 2696 T FPProcessNaNs3(T op1, T op2, T op3) { 2697 if (IsSignallingNaN(op1)) { 2698 return FPProcessNaN(op1); 2699 } else if (IsSignallingNaN(op2)) { 2700 return FPProcessNaN(op2); 2701 } else if (IsSignallingNaN(op3)) { 2702 return FPProcessNaN(op3); 2703 } else if (std::isnan(op1)) { 2704 VIXL_ASSERT(IsQuietNaN(op1)); 2705 return FPProcessNaN(op1); 2706 } else if (std::isnan(op2)) { 2707 VIXL_ASSERT(IsQuietNaN(op2)); 2708 return FPProcessNaN(op2); 2709 } else if (std::isnan(op3)) { 2710 VIXL_ASSERT(IsQuietNaN(op3)); 2711 return FPProcessNaN(op3); 2712 } else { 2713 return 0.0; 2714 } 2715 } 2716 2717 bool coloured_trace_; 2718 2719 // A set of TraceParameters flags. 2720 int trace_parameters_; 2721 2722 // Indicates whether the instruction instrumentation is active. 2723 bool instruction_stats_; 2724 2725 // Indicates whether the exclusive-access warning has been printed. 2726 bool print_exclusive_access_warning_; 2727 void PrintExclusiveAccessWarning(); 2728 }; 2729 } // namespace vixl 2730 2731 #endif // VIXL_A64_SIMULATOR_A64_H_ 2732