1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29 #include <cmath> 30 31 #include "simulator-aarch64.h" 32 33 namespace vixl { 34 namespace aarch64 { 35 36 template <> 37 double Simulator::FPDefaultNaN<double>() { 38 return kFP64DefaultNaN; 39 } 40 41 42 template <> 43 float Simulator::FPDefaultNaN<float>() { 44 return kFP32DefaultNaN; 45 } 46 47 // See FPRound for a description of this function. 48 static inline double FPRoundToDouble(int64_t sign, 49 int64_t exponent, 50 uint64_t mantissa, 51 FPRounding round_mode) { 52 int64_t bits = 53 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 54 exponent, 55 mantissa, 56 round_mode); 57 return RawbitsToDouble(bits); 58 } 59 60 61 // See FPRound for a description of this function. 62 static inline float FPRoundToFloat(int64_t sign, 63 int64_t exponent, 64 uint64_t mantissa, 65 FPRounding round_mode) { 66 int32_t bits = 67 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 68 exponent, 69 mantissa, 70 round_mode); 71 return RawbitsToFloat(bits); 72 } 73 74 75 // See FPRound for a description of this function. 76 static inline float16 FPRoundToFloat16(int64_t sign, 77 int64_t exponent, 78 uint64_t mantissa, 79 FPRounding round_mode) { 80 return FPRound<float16, 81 kFloat16ExponentBits, 82 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode); 83 } 84 85 86 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 87 if (src >= 0) { 88 return UFixedToDouble(src, fbits, round); 89 } else if (src == INT64_MIN) { 90 return -UFixedToDouble(src, fbits, round); 91 } else { 92 return -UFixedToDouble(-src, fbits, round); 93 } 94 } 95 96 97 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 98 // An input of 0 is a special case because the result is effectively 99 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 100 if (src == 0) { 101 return 0.0; 102 } 103 104 // Calculate the exponent. The highest significant bit will have the value 105 // 2^exponent. 106 const int highest_significant_bit = 63 - CountLeadingZeros(src); 107 const int64_t exponent = highest_significant_bit - fbits; 108 109 return FPRoundToDouble(0, exponent, src, round); 110 } 111 112 113 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 114 if (src >= 0) { 115 return UFixedToFloat(src, fbits, round); 116 } else if (src == INT64_MIN) { 117 return -UFixedToFloat(src, fbits, round); 118 } else { 119 return -UFixedToFloat(-src, fbits, round); 120 } 121 } 122 123 124 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 125 // An input of 0 is a special case because the result is effectively 126 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 127 if (src == 0) { 128 return 0.0f; 129 } 130 131 // Calculate the exponent. The highest significant bit will have the value 132 // 2^exponent. 133 const int highest_significant_bit = 63 - CountLeadingZeros(src); 134 const int32_t exponent = highest_significant_bit - fbits; 135 136 return FPRoundToFloat(0, exponent, src, round); 137 } 138 139 140 double Simulator::FPToDouble(float value) { 141 switch (std::fpclassify(value)) { 142 case FP_NAN: { 143 if (IsSignallingNaN(value)) { 144 FPProcessException(); 145 } 146 if (ReadDN()) return kFP64DefaultNaN; 147 148 // Convert NaNs as the processor would: 149 // - The sign is propagated. 150 // - The payload (mantissa) is transferred entirely, except that the top 151 // bit is forced to '1', making the result a quiet NaN. The unused 152 // (low-order) payload bits are set to 0. 153 uint32_t raw = FloatToRawbits(value); 154 155 uint64_t sign = raw >> 31; 156 uint64_t exponent = (1 << 11) - 1; 157 uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw); 158 payload <<= (52 - 23); // The unused low-order bits should be 0. 159 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 160 161 return RawbitsToDouble((sign << 63) | (exponent << 52) | payload); 162 } 163 164 case FP_ZERO: 165 case FP_NORMAL: 166 case FP_SUBNORMAL: 167 case FP_INFINITE: { 168 // All other inputs are preserved in a standard cast, because every value 169 // representable using an IEEE-754 float is also representable using an 170 // IEEE-754 double. 171 return static_cast<double>(value); 172 } 173 } 174 175 VIXL_UNREACHABLE(); 176 return static_cast<double>(value); 177 } 178 179 180 float Simulator::FPToFloat(float16 value) { 181 uint32_t sign = value >> 15; 182 uint32_t exponent = 183 ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 184 kFloat16MantissaBits, 185 value); 186 uint32_t mantissa = 187 ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value); 188 189 switch (Float16Classify(value)) { 190 case FP_ZERO: 191 return (sign == 0) ? 0.0f : -0.0f; 192 193 case FP_INFINITE: 194 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 195 196 case FP_SUBNORMAL: { 197 // Calculate shift required to put mantissa into the most-significant bits 198 // of the destination mantissa. 199 int shift = CountLeadingZeros(mantissa << (32 - 10)); 200 201 // Shift mantissa and discard implicit '1'. 202 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 203 mantissa &= (1 << kFloatMantissaBits) - 1; 204 205 // Adjust the exponent for the shift applied, and rebias. 206 exponent = exponent - shift + (-15 + 127); 207 break; 208 } 209 210 case FP_NAN: 211 if (IsSignallingNaN(value)) { 212 FPProcessException(); 213 } 214 if (ReadDN()) return kFP32DefaultNaN; 215 216 // Convert NaNs as the processor would: 217 // - The sign is propagated. 218 // - The payload (mantissa) is transferred entirely, except that the top 219 // bit is forced to '1', making the result a quiet NaN. The unused 220 // (low-order) payload bits are set to 0. 221 exponent = (1 << kFloatExponentBits) - 1; 222 223 // Increase bits in mantissa, making low-order bits 0. 224 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 225 mantissa |= 1 << 22; // Force a quiet NaN. 226 break; 227 228 case FP_NORMAL: 229 // Increase bits in mantissa, making low-order bits 0. 230 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 231 232 // Change exponent bias. 233 exponent += (-15 + 127); 234 break; 235 236 default: 237 VIXL_UNREACHABLE(); 238 } 239 return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) | 240 mantissa); 241 } 242 243 244 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 245 // Only the FPTieEven rounding mode is implemented. 246 VIXL_ASSERT(round_mode == FPTieEven); 247 USE(round_mode); 248 249 uint32_t raw = FloatToRawbits(value); 250 int32_t sign = raw >> 31; 251 int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127; 252 uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw); 253 254 switch (std::fpclassify(value)) { 255 case FP_NAN: { 256 if (IsSignallingNaN(value)) { 257 FPProcessException(); 258 } 259 if (ReadDN()) return kFP16DefaultNaN; 260 261 // Convert NaNs as the processor would: 262 // - The sign is propagated. 263 // - The payload (mantissa) is transferred as much as possible, except 264 // that the top bit is forced to '1', making the result a quiet NaN. 265 float16 result = 266 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 267 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 268 result |= (1 << 9); // Force a quiet NaN; 269 return result; 270 } 271 272 case FP_ZERO: 273 return (sign == 0) ? 0 : 0x8000; 274 275 case FP_INFINITE: 276 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 277 278 case FP_NORMAL: 279 case FP_SUBNORMAL: { 280 // Convert float-to-half as the processor would, assuming that FPCR.FZ 281 // (flush-to-zero) is not set. 282 283 // Add the implicit '1' bit to the mantissa. 284 mantissa += (1 << 23); 285 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 286 } 287 } 288 289 VIXL_UNREACHABLE(); 290 return 0; 291 } 292 293 294 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 295 // Only the FPTieEven rounding mode is implemented. 296 VIXL_ASSERT(round_mode == FPTieEven); 297 USE(round_mode); 298 299 uint64_t raw = DoubleToRawbits(value); 300 int32_t sign = raw >> 63; 301 int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023; 302 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 303 304 switch (std::fpclassify(value)) { 305 case FP_NAN: { 306 if (IsSignallingNaN(value)) { 307 FPProcessException(); 308 } 309 if (ReadDN()) return kFP16DefaultNaN; 310 311 // Convert NaNs as the processor would: 312 // - The sign is propagated. 313 // - The payload (mantissa) is transferred as much as possible, except 314 // that the top bit is forced to '1', making the result a quiet NaN. 315 float16 result = 316 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 317 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 318 result |= (1 << 9); // Force a quiet NaN; 319 return result; 320 } 321 322 case FP_ZERO: 323 return (sign == 0) ? 0 : 0x8000; 324 325 case FP_INFINITE: 326 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 327 328 case FP_NORMAL: 329 case FP_SUBNORMAL: { 330 // Convert double-to-half as the processor would, assuming that FPCR.FZ 331 // (flush-to-zero) is not set. 332 333 // Add the implicit '1' bit to the mantissa. 334 mantissa += (UINT64_C(1) << 52); 335 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 336 } 337 } 338 339 VIXL_UNREACHABLE(); 340 return 0; 341 } 342 343 344 float Simulator::FPToFloat(double value, FPRounding round_mode) { 345 // Only the FPTieEven rounding mode is implemented. 346 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 347 USE(round_mode); 348 349 switch (std::fpclassify(value)) { 350 case FP_NAN: { 351 if (IsSignallingNaN(value)) { 352 FPProcessException(); 353 } 354 if (ReadDN()) return kFP32DefaultNaN; 355 356 // Convert NaNs as the processor would: 357 // - The sign is propagated. 358 // - The payload (mantissa) is transferred as much as possible, except 359 // that the top bit is forced to '1', making the result a quiet NaN. 360 uint64_t raw = DoubleToRawbits(value); 361 362 uint32_t sign = raw >> 63; 363 uint32_t exponent = (1 << 8) - 1; 364 uint32_t payload = 365 static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw)); 366 payload |= (1 << 22); // Force a quiet NaN. 367 368 return RawbitsToFloat((sign << 31) | (exponent << 23) | payload); 369 } 370 371 case FP_ZERO: 372 case FP_INFINITE: { 373 // In a C++ cast, any value representable in the target type will be 374 // unchanged. This is always the case for +/-0.0 and infinities. 375 return static_cast<float>(value); 376 } 377 378 case FP_NORMAL: 379 case FP_SUBNORMAL: { 380 // Convert double-to-float as the processor would, assuming that FPCR.FZ 381 // (flush-to-zero) is not set. 382 uint64_t raw = DoubleToRawbits(value); 383 // Extract the IEEE-754 double components. 384 uint32_t sign = raw >> 63; 385 // Extract the exponent and remove the IEEE-754 encoding bias. 386 int32_t exponent = 387 static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023; 388 // Extract the mantissa and add the implicit '1' bit. 389 uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw); 390 if (std::fpclassify(value) == FP_NORMAL) { 391 mantissa |= (UINT64_C(1) << 52); 392 } 393 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 394 } 395 } 396 397 VIXL_UNREACHABLE(); 398 return value; 399 } 400 401 402 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 403 dst.ClearForWrite(vform); 404 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 405 dst.ReadUintFromMem(vform, i, addr); 406 addr += LaneSizeInBytesFromFormat(vform); 407 } 408 } 409 410 411 void Simulator::ld1(VectorFormat vform, 412 LogicVRegister dst, 413 int index, 414 uint64_t addr) { 415 dst.ReadUintFromMem(vform, index, addr); 416 } 417 418 419 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 420 dst.ClearForWrite(vform); 421 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 422 dst.ReadUintFromMem(vform, i, addr); 423 } 424 } 425 426 427 void Simulator::ld2(VectorFormat vform, 428 LogicVRegister dst1, 429 LogicVRegister dst2, 430 uint64_t addr1) { 431 dst1.ClearForWrite(vform); 432 dst2.ClearForWrite(vform); 433 int esize = LaneSizeInBytesFromFormat(vform); 434 uint64_t addr2 = addr1 + esize; 435 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 436 dst1.ReadUintFromMem(vform, i, addr1); 437 dst2.ReadUintFromMem(vform, i, addr2); 438 addr1 += 2 * esize; 439 addr2 += 2 * esize; 440 } 441 } 442 443 444 void Simulator::ld2(VectorFormat vform, 445 LogicVRegister dst1, 446 LogicVRegister dst2, 447 int index, 448 uint64_t addr1) { 449 dst1.ClearForWrite(vform); 450 dst2.ClearForWrite(vform); 451 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 452 dst1.ReadUintFromMem(vform, index, addr1); 453 dst2.ReadUintFromMem(vform, index, addr2); 454 } 455 456 457 void Simulator::ld2r(VectorFormat vform, 458 LogicVRegister dst1, 459 LogicVRegister dst2, 460 uint64_t addr) { 461 dst1.ClearForWrite(vform); 462 dst2.ClearForWrite(vform); 463 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 464 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 465 dst1.ReadUintFromMem(vform, i, addr); 466 dst2.ReadUintFromMem(vform, i, addr2); 467 } 468 } 469 470 471 void Simulator::ld3(VectorFormat vform, 472 LogicVRegister dst1, 473 LogicVRegister dst2, 474 LogicVRegister dst3, 475 uint64_t addr1) { 476 dst1.ClearForWrite(vform); 477 dst2.ClearForWrite(vform); 478 dst3.ClearForWrite(vform); 479 int esize = LaneSizeInBytesFromFormat(vform); 480 uint64_t addr2 = addr1 + esize; 481 uint64_t addr3 = addr2 + esize; 482 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 483 dst1.ReadUintFromMem(vform, i, addr1); 484 dst2.ReadUintFromMem(vform, i, addr2); 485 dst3.ReadUintFromMem(vform, i, addr3); 486 addr1 += 3 * esize; 487 addr2 += 3 * esize; 488 addr3 += 3 * esize; 489 } 490 } 491 492 493 void Simulator::ld3(VectorFormat vform, 494 LogicVRegister dst1, 495 LogicVRegister dst2, 496 LogicVRegister dst3, 497 int index, 498 uint64_t addr1) { 499 dst1.ClearForWrite(vform); 500 dst2.ClearForWrite(vform); 501 dst3.ClearForWrite(vform); 502 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 503 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 504 dst1.ReadUintFromMem(vform, index, addr1); 505 dst2.ReadUintFromMem(vform, index, addr2); 506 dst3.ReadUintFromMem(vform, index, addr3); 507 } 508 509 510 void Simulator::ld3r(VectorFormat vform, 511 LogicVRegister dst1, 512 LogicVRegister dst2, 513 LogicVRegister dst3, 514 uint64_t addr) { 515 dst1.ClearForWrite(vform); 516 dst2.ClearForWrite(vform); 517 dst3.ClearForWrite(vform); 518 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 519 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 520 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 521 dst1.ReadUintFromMem(vform, i, addr); 522 dst2.ReadUintFromMem(vform, i, addr2); 523 dst3.ReadUintFromMem(vform, i, addr3); 524 } 525 } 526 527 528 void Simulator::ld4(VectorFormat vform, 529 LogicVRegister dst1, 530 LogicVRegister dst2, 531 LogicVRegister dst3, 532 LogicVRegister dst4, 533 uint64_t addr1) { 534 dst1.ClearForWrite(vform); 535 dst2.ClearForWrite(vform); 536 dst3.ClearForWrite(vform); 537 dst4.ClearForWrite(vform); 538 int esize = LaneSizeInBytesFromFormat(vform); 539 uint64_t addr2 = addr1 + esize; 540 uint64_t addr3 = addr2 + esize; 541 uint64_t addr4 = addr3 + esize; 542 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 543 dst1.ReadUintFromMem(vform, i, addr1); 544 dst2.ReadUintFromMem(vform, i, addr2); 545 dst3.ReadUintFromMem(vform, i, addr3); 546 dst4.ReadUintFromMem(vform, i, addr4); 547 addr1 += 4 * esize; 548 addr2 += 4 * esize; 549 addr3 += 4 * esize; 550 addr4 += 4 * esize; 551 } 552 } 553 554 555 void Simulator::ld4(VectorFormat vform, 556 LogicVRegister dst1, 557 LogicVRegister dst2, 558 LogicVRegister dst3, 559 LogicVRegister dst4, 560 int index, 561 uint64_t addr1) { 562 dst1.ClearForWrite(vform); 563 dst2.ClearForWrite(vform); 564 dst3.ClearForWrite(vform); 565 dst4.ClearForWrite(vform); 566 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 567 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 568 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 569 dst1.ReadUintFromMem(vform, index, addr1); 570 dst2.ReadUintFromMem(vform, index, addr2); 571 dst3.ReadUintFromMem(vform, index, addr3); 572 dst4.ReadUintFromMem(vform, index, addr4); 573 } 574 575 576 void Simulator::ld4r(VectorFormat vform, 577 LogicVRegister dst1, 578 LogicVRegister dst2, 579 LogicVRegister dst3, 580 LogicVRegister dst4, 581 uint64_t addr) { 582 dst1.ClearForWrite(vform); 583 dst2.ClearForWrite(vform); 584 dst3.ClearForWrite(vform); 585 dst4.ClearForWrite(vform); 586 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 587 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 588 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 589 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 590 dst1.ReadUintFromMem(vform, i, addr); 591 dst2.ReadUintFromMem(vform, i, addr2); 592 dst3.ReadUintFromMem(vform, i, addr3); 593 dst4.ReadUintFromMem(vform, i, addr4); 594 } 595 } 596 597 598 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 599 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 600 src.WriteUintToMem(vform, i, addr); 601 addr += LaneSizeInBytesFromFormat(vform); 602 } 603 } 604 605 606 void Simulator::st1(VectorFormat vform, 607 LogicVRegister src, 608 int index, 609 uint64_t addr) { 610 src.WriteUintToMem(vform, index, addr); 611 } 612 613 614 void Simulator::st2(VectorFormat vform, 615 LogicVRegister dst, 616 LogicVRegister dst2, 617 uint64_t addr) { 618 int esize = LaneSizeInBytesFromFormat(vform); 619 uint64_t addr2 = addr + esize; 620 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 621 dst.WriteUintToMem(vform, i, addr); 622 dst2.WriteUintToMem(vform, i, addr2); 623 addr += 2 * esize; 624 addr2 += 2 * esize; 625 } 626 } 627 628 629 void Simulator::st2(VectorFormat vform, 630 LogicVRegister dst, 631 LogicVRegister dst2, 632 int index, 633 uint64_t addr) { 634 int esize = LaneSizeInBytesFromFormat(vform); 635 dst.WriteUintToMem(vform, index, addr); 636 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 637 } 638 639 640 void Simulator::st3(VectorFormat vform, 641 LogicVRegister dst, 642 LogicVRegister dst2, 643 LogicVRegister dst3, 644 uint64_t addr) { 645 int esize = LaneSizeInBytesFromFormat(vform); 646 uint64_t addr2 = addr + esize; 647 uint64_t addr3 = addr2 + esize; 648 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 649 dst.WriteUintToMem(vform, i, addr); 650 dst2.WriteUintToMem(vform, i, addr2); 651 dst3.WriteUintToMem(vform, i, addr3); 652 addr += 3 * esize; 653 addr2 += 3 * esize; 654 addr3 += 3 * esize; 655 } 656 } 657 658 659 void Simulator::st3(VectorFormat vform, 660 LogicVRegister dst, 661 LogicVRegister dst2, 662 LogicVRegister dst3, 663 int index, 664 uint64_t addr) { 665 int esize = LaneSizeInBytesFromFormat(vform); 666 dst.WriteUintToMem(vform, index, addr); 667 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 668 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 669 } 670 671 672 void Simulator::st4(VectorFormat vform, 673 LogicVRegister dst, 674 LogicVRegister dst2, 675 LogicVRegister dst3, 676 LogicVRegister dst4, 677 uint64_t addr) { 678 int esize = LaneSizeInBytesFromFormat(vform); 679 uint64_t addr2 = addr + esize; 680 uint64_t addr3 = addr2 + esize; 681 uint64_t addr4 = addr3 + esize; 682 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 683 dst.WriteUintToMem(vform, i, addr); 684 dst2.WriteUintToMem(vform, i, addr2); 685 dst3.WriteUintToMem(vform, i, addr3); 686 dst4.WriteUintToMem(vform, i, addr4); 687 addr += 4 * esize; 688 addr2 += 4 * esize; 689 addr3 += 4 * esize; 690 addr4 += 4 * esize; 691 } 692 } 693 694 695 void Simulator::st4(VectorFormat vform, 696 LogicVRegister dst, 697 LogicVRegister dst2, 698 LogicVRegister dst3, 699 LogicVRegister dst4, 700 int index, 701 uint64_t addr) { 702 int esize = LaneSizeInBytesFromFormat(vform); 703 dst.WriteUintToMem(vform, index, addr); 704 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 705 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 706 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 707 } 708 709 710 LogicVRegister Simulator::cmp(VectorFormat vform, 711 LogicVRegister dst, 712 const LogicVRegister& src1, 713 const LogicVRegister& src2, 714 Condition cond) { 715 dst.ClearForWrite(vform); 716 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 717 int64_t sa = src1.Int(vform, i); 718 int64_t sb = src2.Int(vform, i); 719 uint64_t ua = src1.Uint(vform, i); 720 uint64_t ub = src2.Uint(vform, i); 721 bool result = false; 722 switch (cond) { 723 case eq: 724 result = (ua == ub); 725 break; 726 case ge: 727 result = (sa >= sb); 728 break; 729 case gt: 730 result = (sa > sb); 731 break; 732 case hi: 733 result = (ua > ub); 734 break; 735 case hs: 736 result = (ua >= ub); 737 break; 738 case lt: 739 result = (sa < sb); 740 break; 741 case le: 742 result = (sa <= sb); 743 break; 744 default: 745 VIXL_UNREACHABLE(); 746 break; 747 } 748 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 749 } 750 return dst; 751 } 752 753 754 LogicVRegister Simulator::cmp(VectorFormat vform, 755 LogicVRegister dst, 756 const LogicVRegister& src1, 757 int imm, 758 Condition cond) { 759 SimVRegister temp; 760 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 761 return cmp(vform, dst, src1, imm_reg, cond); 762 } 763 764 765 LogicVRegister Simulator::cmptst(VectorFormat vform, 766 LogicVRegister dst, 767 const LogicVRegister& src1, 768 const LogicVRegister& src2) { 769 dst.ClearForWrite(vform); 770 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 771 uint64_t ua = src1.Uint(vform, i); 772 uint64_t ub = src2.Uint(vform, i); 773 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 774 } 775 return dst; 776 } 777 778 779 LogicVRegister Simulator::add(VectorFormat vform, 780 LogicVRegister dst, 781 const LogicVRegister& src1, 782 const LogicVRegister& src2) { 783 int lane_size = LaneSizeInBitsFromFormat(vform); 784 dst.ClearForWrite(vform); 785 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 786 // Test for unsigned saturation. 787 uint64_t ua = src1.UintLeftJustified(vform, i); 788 uint64_t ub = src2.UintLeftJustified(vform, i); 789 uint64_t ur = ua + ub; 790 if (ur < ua) { 791 dst.SetUnsignedSat(i, true); 792 } 793 794 // Test for signed saturation. 795 bool pos_a = (ua >> 63) == 0; 796 bool pos_b = (ub >> 63) == 0; 797 bool pos_r = (ur >> 63) == 0; 798 // If the signs of the operands are the same, but different from the result, 799 // there was an overflow. 800 if ((pos_a == pos_b) && (pos_a != pos_r)) { 801 dst.SetSignedSat(i, pos_a); 802 } 803 804 dst.SetInt(vform, i, ur >> (64 - lane_size)); 805 } 806 return dst; 807 } 808 809 810 LogicVRegister Simulator::addp(VectorFormat vform, 811 LogicVRegister dst, 812 const LogicVRegister& src1, 813 const LogicVRegister& src2) { 814 SimVRegister temp1, temp2; 815 uzp1(vform, temp1, src1, src2); 816 uzp2(vform, temp2, src1, src2); 817 add(vform, dst, temp1, temp2); 818 return dst; 819 } 820 821 822 LogicVRegister Simulator::mla(VectorFormat vform, 823 LogicVRegister dst, 824 const LogicVRegister& src1, 825 const LogicVRegister& src2) { 826 SimVRegister temp; 827 mul(vform, temp, src1, src2); 828 add(vform, dst, dst, temp); 829 return dst; 830 } 831 832 833 LogicVRegister Simulator::mls(VectorFormat vform, 834 LogicVRegister dst, 835 const LogicVRegister& src1, 836 const LogicVRegister& src2) { 837 SimVRegister temp; 838 mul(vform, temp, src1, src2); 839 sub(vform, dst, dst, temp); 840 return dst; 841 } 842 843 844 LogicVRegister Simulator::mul(VectorFormat vform, 845 LogicVRegister dst, 846 const LogicVRegister& src1, 847 const LogicVRegister& src2) { 848 dst.ClearForWrite(vform); 849 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 850 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 851 } 852 return dst; 853 } 854 855 856 LogicVRegister Simulator::mul(VectorFormat vform, 857 LogicVRegister dst, 858 const LogicVRegister& src1, 859 const LogicVRegister& src2, 860 int index) { 861 SimVRegister temp; 862 VectorFormat indexform = VectorFormatFillQ(vform); 863 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 864 } 865 866 867 LogicVRegister Simulator::mla(VectorFormat vform, 868 LogicVRegister dst, 869 const LogicVRegister& src1, 870 const LogicVRegister& src2, 871 int index) { 872 SimVRegister temp; 873 VectorFormat indexform = VectorFormatFillQ(vform); 874 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 875 } 876 877 878 LogicVRegister Simulator::mls(VectorFormat vform, 879 LogicVRegister dst, 880 const LogicVRegister& src1, 881 const LogicVRegister& src2, 882 int index) { 883 SimVRegister temp; 884 VectorFormat indexform = VectorFormatFillQ(vform); 885 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 886 } 887 888 889 LogicVRegister Simulator::smull(VectorFormat vform, 890 LogicVRegister dst, 891 const LogicVRegister& src1, 892 const LogicVRegister& src2, 893 int index) { 894 SimVRegister temp; 895 VectorFormat indexform = 896 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 897 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 898 } 899 900 901 LogicVRegister Simulator::smull2(VectorFormat vform, 902 LogicVRegister dst, 903 const LogicVRegister& src1, 904 const LogicVRegister& src2, 905 int index) { 906 SimVRegister temp; 907 VectorFormat indexform = 908 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 909 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 910 } 911 912 913 LogicVRegister Simulator::umull(VectorFormat vform, 914 LogicVRegister dst, 915 const LogicVRegister& src1, 916 const LogicVRegister& src2, 917 int index) { 918 SimVRegister temp; 919 VectorFormat indexform = 920 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 921 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 922 } 923 924 925 LogicVRegister Simulator::umull2(VectorFormat vform, 926 LogicVRegister dst, 927 const LogicVRegister& src1, 928 const LogicVRegister& src2, 929 int index) { 930 SimVRegister temp; 931 VectorFormat indexform = 932 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 933 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 934 } 935 936 937 LogicVRegister Simulator::smlal(VectorFormat vform, 938 LogicVRegister dst, 939 const LogicVRegister& src1, 940 const LogicVRegister& src2, 941 int index) { 942 SimVRegister temp; 943 VectorFormat indexform = 944 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 945 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 946 } 947 948 949 LogicVRegister Simulator::smlal2(VectorFormat vform, 950 LogicVRegister dst, 951 const LogicVRegister& src1, 952 const LogicVRegister& src2, 953 int index) { 954 SimVRegister temp; 955 VectorFormat indexform = 956 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 957 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 958 } 959 960 961 LogicVRegister Simulator::umlal(VectorFormat vform, 962 LogicVRegister dst, 963 const LogicVRegister& src1, 964 const LogicVRegister& src2, 965 int index) { 966 SimVRegister temp; 967 VectorFormat indexform = 968 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 969 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 970 } 971 972 973 LogicVRegister Simulator::umlal2(VectorFormat vform, 974 LogicVRegister dst, 975 const LogicVRegister& src1, 976 const LogicVRegister& src2, 977 int index) { 978 SimVRegister temp; 979 VectorFormat indexform = 980 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 981 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 982 } 983 984 985 LogicVRegister Simulator::smlsl(VectorFormat vform, 986 LogicVRegister dst, 987 const LogicVRegister& src1, 988 const LogicVRegister& src2, 989 int index) { 990 SimVRegister temp; 991 VectorFormat indexform = 992 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 993 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 994 } 995 996 997 LogicVRegister Simulator::smlsl2(VectorFormat vform, 998 LogicVRegister dst, 999 const LogicVRegister& src1, 1000 const LogicVRegister& src2, 1001 int index) { 1002 SimVRegister temp; 1003 VectorFormat indexform = 1004 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1005 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1006 } 1007 1008 1009 LogicVRegister Simulator::umlsl(VectorFormat vform, 1010 LogicVRegister dst, 1011 const LogicVRegister& src1, 1012 const LogicVRegister& src2, 1013 int index) { 1014 SimVRegister temp; 1015 VectorFormat indexform = 1016 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1017 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1018 } 1019 1020 1021 LogicVRegister Simulator::umlsl2(VectorFormat vform, 1022 LogicVRegister dst, 1023 const LogicVRegister& src1, 1024 const LogicVRegister& src2, 1025 int index) { 1026 SimVRegister temp; 1027 VectorFormat indexform = 1028 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1029 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1030 } 1031 1032 1033 LogicVRegister Simulator::sqdmull(VectorFormat vform, 1034 LogicVRegister dst, 1035 const LogicVRegister& src1, 1036 const LogicVRegister& src2, 1037 int index) { 1038 SimVRegister temp; 1039 VectorFormat indexform = 1040 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1041 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1042 } 1043 1044 1045 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1046 LogicVRegister dst, 1047 const LogicVRegister& src1, 1048 const LogicVRegister& src2, 1049 int index) { 1050 SimVRegister temp; 1051 VectorFormat indexform = 1052 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1053 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1054 } 1055 1056 1057 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1058 LogicVRegister dst, 1059 const LogicVRegister& src1, 1060 const LogicVRegister& src2, 1061 int index) { 1062 SimVRegister temp; 1063 VectorFormat indexform = 1064 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1065 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1066 } 1067 1068 1069 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1070 LogicVRegister dst, 1071 const LogicVRegister& src1, 1072 const LogicVRegister& src2, 1073 int index) { 1074 SimVRegister temp; 1075 VectorFormat indexform = 1076 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1077 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1078 } 1079 1080 1081 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1082 LogicVRegister dst, 1083 const LogicVRegister& src1, 1084 const LogicVRegister& src2, 1085 int index) { 1086 SimVRegister temp; 1087 VectorFormat indexform = 1088 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1089 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1090 } 1091 1092 1093 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1094 LogicVRegister dst, 1095 const LogicVRegister& src1, 1096 const LogicVRegister& src2, 1097 int index) { 1098 SimVRegister temp; 1099 VectorFormat indexform = 1100 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1101 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1102 } 1103 1104 1105 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1106 LogicVRegister dst, 1107 const LogicVRegister& src1, 1108 const LogicVRegister& src2, 1109 int index) { 1110 SimVRegister temp; 1111 VectorFormat indexform = VectorFormatFillQ(vform); 1112 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1113 } 1114 1115 1116 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1117 LogicVRegister dst, 1118 const LogicVRegister& src1, 1119 const LogicVRegister& src2, 1120 int index) { 1121 SimVRegister temp; 1122 VectorFormat indexform = VectorFormatFillQ(vform); 1123 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1124 } 1125 1126 1127 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { 1128 uint16_t result = 0; 1129 uint16_t extended_op2 = op2; 1130 for (int i = 0; i < 8; ++i) { 1131 if ((op1 >> i) & 1) { 1132 result = result ^ (extended_op2 << i); 1133 } 1134 } 1135 return result; 1136 } 1137 1138 1139 LogicVRegister Simulator::pmul(VectorFormat vform, 1140 LogicVRegister dst, 1141 const LogicVRegister& src1, 1142 const LogicVRegister& src2) { 1143 dst.ClearForWrite(vform); 1144 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1145 dst.SetUint(vform, 1146 i, 1147 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1148 } 1149 return dst; 1150 } 1151 1152 1153 LogicVRegister Simulator::pmull(VectorFormat vform, 1154 LogicVRegister dst, 1155 const LogicVRegister& src1, 1156 const LogicVRegister& src2) { 1157 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1158 dst.ClearForWrite(vform); 1159 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1160 dst.SetUint(vform, 1161 i, 1162 PolynomialMult(src1.Uint(vform_src, i), 1163 src2.Uint(vform_src, i))); 1164 } 1165 return dst; 1166 } 1167 1168 1169 LogicVRegister Simulator::pmull2(VectorFormat vform, 1170 LogicVRegister dst, 1171 const LogicVRegister& src1, 1172 const LogicVRegister& src2) { 1173 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1174 dst.ClearForWrite(vform); 1175 int lane_count = LaneCountFromFormat(vform); 1176 for (int i = 0; i < lane_count; i++) { 1177 dst.SetUint(vform, 1178 i, 1179 PolynomialMult(src1.Uint(vform_src, lane_count + i), 1180 src2.Uint(vform_src, lane_count + i))); 1181 } 1182 return dst; 1183 } 1184 1185 1186 LogicVRegister Simulator::sub(VectorFormat vform, 1187 LogicVRegister dst, 1188 const LogicVRegister& src1, 1189 const LogicVRegister& src2) { 1190 int lane_size = LaneSizeInBitsFromFormat(vform); 1191 dst.ClearForWrite(vform); 1192 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1193 // Test for unsigned saturation. 1194 uint64_t ua = src1.UintLeftJustified(vform, i); 1195 uint64_t ub = src2.UintLeftJustified(vform, i); 1196 uint64_t ur = ua - ub; 1197 if (ub > ua) { 1198 dst.SetUnsignedSat(i, false); 1199 } 1200 1201 // Test for signed saturation. 1202 bool pos_a = (ua >> 63) == 0; 1203 bool pos_b = (ub >> 63) == 0; 1204 bool pos_r = (ur >> 63) == 0; 1205 // If the signs of the operands are different, and the sign of the first 1206 // operand doesn't match the result, there was an overflow. 1207 if ((pos_a != pos_b) && (pos_a != pos_r)) { 1208 dst.SetSignedSat(i, pos_a); 1209 } 1210 1211 dst.SetInt(vform, i, ur >> (64 - lane_size)); 1212 } 1213 return dst; 1214 } 1215 1216 1217 LogicVRegister Simulator::and_(VectorFormat vform, 1218 LogicVRegister dst, 1219 const LogicVRegister& src1, 1220 const LogicVRegister& src2) { 1221 dst.ClearForWrite(vform); 1222 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1223 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1224 } 1225 return dst; 1226 } 1227 1228 1229 LogicVRegister Simulator::orr(VectorFormat vform, 1230 LogicVRegister dst, 1231 const LogicVRegister& src1, 1232 const LogicVRegister& src2) { 1233 dst.ClearForWrite(vform); 1234 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1235 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1236 } 1237 return dst; 1238 } 1239 1240 1241 LogicVRegister Simulator::orn(VectorFormat vform, 1242 LogicVRegister dst, 1243 const LogicVRegister& src1, 1244 const LogicVRegister& src2) { 1245 dst.ClearForWrite(vform); 1246 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1247 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1248 } 1249 return dst; 1250 } 1251 1252 1253 LogicVRegister Simulator::eor(VectorFormat vform, 1254 LogicVRegister dst, 1255 const LogicVRegister& src1, 1256 const LogicVRegister& src2) { 1257 dst.ClearForWrite(vform); 1258 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1259 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1260 } 1261 return dst; 1262 } 1263 1264 1265 LogicVRegister Simulator::bic(VectorFormat vform, 1266 LogicVRegister dst, 1267 const LogicVRegister& src1, 1268 const LogicVRegister& src2) { 1269 dst.ClearForWrite(vform); 1270 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1271 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1272 } 1273 return dst; 1274 } 1275 1276 1277 LogicVRegister Simulator::bic(VectorFormat vform, 1278 LogicVRegister dst, 1279 const LogicVRegister& src, 1280 uint64_t imm) { 1281 uint64_t result[16]; 1282 int laneCount = LaneCountFromFormat(vform); 1283 for (int i = 0; i < laneCount; ++i) { 1284 result[i] = src.Uint(vform, i) & ~imm; 1285 } 1286 dst.ClearForWrite(vform); 1287 for (int i = 0; i < laneCount; ++i) { 1288 dst.SetUint(vform, i, result[i]); 1289 } 1290 return dst; 1291 } 1292 1293 1294 LogicVRegister Simulator::bif(VectorFormat vform, 1295 LogicVRegister dst, 1296 const LogicVRegister& src1, 1297 const LogicVRegister& src2) { 1298 dst.ClearForWrite(vform); 1299 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1300 uint64_t operand1 = dst.Uint(vform, i); 1301 uint64_t operand2 = ~src2.Uint(vform, i); 1302 uint64_t operand3 = src1.Uint(vform, i); 1303 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1304 dst.SetUint(vform, i, result); 1305 } 1306 return dst; 1307 } 1308 1309 1310 LogicVRegister Simulator::bit(VectorFormat vform, 1311 LogicVRegister dst, 1312 const LogicVRegister& src1, 1313 const LogicVRegister& src2) { 1314 dst.ClearForWrite(vform); 1315 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1316 uint64_t operand1 = dst.Uint(vform, i); 1317 uint64_t operand2 = src2.Uint(vform, i); 1318 uint64_t operand3 = src1.Uint(vform, i); 1319 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1320 dst.SetUint(vform, i, result); 1321 } 1322 return dst; 1323 } 1324 1325 1326 LogicVRegister Simulator::bsl(VectorFormat vform, 1327 LogicVRegister dst, 1328 const LogicVRegister& src1, 1329 const LogicVRegister& src2) { 1330 dst.ClearForWrite(vform); 1331 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1332 uint64_t operand1 = src2.Uint(vform, i); 1333 uint64_t operand2 = dst.Uint(vform, i); 1334 uint64_t operand3 = src1.Uint(vform, i); 1335 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1336 dst.SetUint(vform, i, result); 1337 } 1338 return dst; 1339 } 1340 1341 1342 LogicVRegister Simulator::sminmax(VectorFormat vform, 1343 LogicVRegister dst, 1344 const LogicVRegister& src1, 1345 const LogicVRegister& src2, 1346 bool max) { 1347 dst.ClearForWrite(vform); 1348 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1349 int64_t src1_val = src1.Int(vform, i); 1350 int64_t src2_val = src2.Int(vform, i); 1351 int64_t dst_val; 1352 if (max) { 1353 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1354 } else { 1355 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1356 } 1357 dst.SetInt(vform, i, dst_val); 1358 } 1359 return dst; 1360 } 1361 1362 1363 LogicVRegister Simulator::smax(VectorFormat vform, 1364 LogicVRegister dst, 1365 const LogicVRegister& src1, 1366 const LogicVRegister& src2) { 1367 return sminmax(vform, dst, src1, src2, true); 1368 } 1369 1370 1371 LogicVRegister Simulator::smin(VectorFormat vform, 1372 LogicVRegister dst, 1373 const LogicVRegister& src1, 1374 const LogicVRegister& src2) { 1375 return sminmax(vform, dst, src1, src2, false); 1376 } 1377 1378 1379 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1380 LogicVRegister dst, 1381 const LogicVRegister& src1, 1382 const LogicVRegister& src2, 1383 bool max) { 1384 int lanes = LaneCountFromFormat(vform); 1385 int64_t result[kMaxLanesPerVector]; 1386 const LogicVRegister* src = &src1; 1387 for (int j = 0; j < 2; j++) { 1388 for (int i = 0; i < lanes; i += 2) { 1389 int64_t first_val = src->Int(vform, i); 1390 int64_t second_val = src->Int(vform, i + 1); 1391 int64_t dst_val; 1392 if (max) { 1393 dst_val = (first_val > second_val) ? first_val : second_val; 1394 } else { 1395 dst_val = (first_val < second_val) ? first_val : second_val; 1396 } 1397 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1398 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1399 } 1400 src = &src2; 1401 } 1402 dst.SetIntArray(vform, result); 1403 return dst; 1404 } 1405 1406 1407 LogicVRegister Simulator::smaxp(VectorFormat vform, 1408 LogicVRegister dst, 1409 const LogicVRegister& src1, 1410 const LogicVRegister& src2) { 1411 return sminmaxp(vform, dst, src1, src2, true); 1412 } 1413 1414 1415 LogicVRegister Simulator::sminp(VectorFormat vform, 1416 LogicVRegister dst, 1417 const LogicVRegister& src1, 1418 const LogicVRegister& src2) { 1419 return sminmaxp(vform, dst, src1, src2, false); 1420 } 1421 1422 1423 LogicVRegister Simulator::addp(VectorFormat vform, 1424 LogicVRegister dst, 1425 const LogicVRegister& src) { 1426 VIXL_ASSERT(vform == kFormatD); 1427 1428 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); 1429 dst.ClearForWrite(vform); 1430 dst.SetUint(vform, 0, dst_val); 1431 return dst; 1432 } 1433 1434 1435 LogicVRegister Simulator::addv(VectorFormat vform, 1436 LogicVRegister dst, 1437 const LogicVRegister& src) { 1438 VectorFormat vform_dst = 1439 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1440 1441 1442 int64_t dst_val = 0; 1443 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1444 dst_val += src.Int(vform, i); 1445 } 1446 1447 dst.ClearForWrite(vform_dst); 1448 dst.SetInt(vform_dst, 0, dst_val); 1449 return dst; 1450 } 1451 1452 1453 LogicVRegister Simulator::saddlv(VectorFormat vform, 1454 LogicVRegister dst, 1455 const LogicVRegister& src) { 1456 VectorFormat vform_dst = 1457 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1458 1459 int64_t dst_val = 0; 1460 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1461 dst_val += src.Int(vform, i); 1462 } 1463 1464 dst.ClearForWrite(vform_dst); 1465 dst.SetInt(vform_dst, 0, dst_val); 1466 return dst; 1467 } 1468 1469 1470 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1471 LogicVRegister dst, 1472 const LogicVRegister& src) { 1473 VectorFormat vform_dst = 1474 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1475 1476 uint64_t dst_val = 0; 1477 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1478 dst_val += src.Uint(vform, i); 1479 } 1480 1481 dst.ClearForWrite(vform_dst); 1482 dst.SetUint(vform_dst, 0, dst_val); 1483 return dst; 1484 } 1485 1486 1487 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1488 LogicVRegister dst, 1489 const LogicVRegister& src, 1490 bool max) { 1491 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1492 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1493 int64_t src_val = src.Int(vform, i); 1494 if (max) { 1495 dst_val = (src_val > dst_val) ? src_val : dst_val; 1496 } else { 1497 dst_val = (src_val < dst_val) ? src_val : dst_val; 1498 } 1499 } 1500 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1501 dst.SetInt(vform, 0, dst_val); 1502 return dst; 1503 } 1504 1505 1506 LogicVRegister Simulator::smaxv(VectorFormat vform, 1507 LogicVRegister dst, 1508 const LogicVRegister& src) { 1509 sminmaxv(vform, dst, src, true); 1510 return dst; 1511 } 1512 1513 1514 LogicVRegister Simulator::sminv(VectorFormat vform, 1515 LogicVRegister dst, 1516 const LogicVRegister& src) { 1517 sminmaxv(vform, dst, src, false); 1518 return dst; 1519 } 1520 1521 1522 LogicVRegister Simulator::uminmax(VectorFormat vform, 1523 LogicVRegister dst, 1524 const LogicVRegister& src1, 1525 const LogicVRegister& src2, 1526 bool max) { 1527 dst.ClearForWrite(vform); 1528 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1529 uint64_t src1_val = src1.Uint(vform, i); 1530 uint64_t src2_val = src2.Uint(vform, i); 1531 uint64_t dst_val; 1532 if (max) { 1533 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1534 } else { 1535 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1536 } 1537 dst.SetUint(vform, i, dst_val); 1538 } 1539 return dst; 1540 } 1541 1542 1543 LogicVRegister Simulator::umax(VectorFormat vform, 1544 LogicVRegister dst, 1545 const LogicVRegister& src1, 1546 const LogicVRegister& src2) { 1547 return uminmax(vform, dst, src1, src2, true); 1548 } 1549 1550 1551 LogicVRegister Simulator::umin(VectorFormat vform, 1552 LogicVRegister dst, 1553 const LogicVRegister& src1, 1554 const LogicVRegister& src2) { 1555 return uminmax(vform, dst, src1, src2, false); 1556 } 1557 1558 1559 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1560 LogicVRegister dst, 1561 const LogicVRegister& src1, 1562 const LogicVRegister& src2, 1563 bool max) { 1564 int lanes = LaneCountFromFormat(vform); 1565 uint64_t result[kMaxLanesPerVector]; 1566 const LogicVRegister* src = &src1; 1567 for (int j = 0; j < 2; j++) { 1568 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1569 uint64_t first_val = src->Uint(vform, i); 1570 uint64_t second_val = src->Uint(vform, i + 1); 1571 uint64_t dst_val; 1572 if (max) { 1573 dst_val = (first_val > second_val) ? first_val : second_val; 1574 } else { 1575 dst_val = (first_val < second_val) ? first_val : second_val; 1576 } 1577 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1578 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1579 } 1580 src = &src2; 1581 } 1582 dst.SetUintArray(vform, result); 1583 return dst; 1584 } 1585 1586 1587 LogicVRegister Simulator::umaxp(VectorFormat vform, 1588 LogicVRegister dst, 1589 const LogicVRegister& src1, 1590 const LogicVRegister& src2) { 1591 return uminmaxp(vform, dst, src1, src2, true); 1592 } 1593 1594 1595 LogicVRegister Simulator::uminp(VectorFormat vform, 1596 LogicVRegister dst, 1597 const LogicVRegister& src1, 1598 const LogicVRegister& src2) { 1599 return uminmaxp(vform, dst, src1, src2, false); 1600 } 1601 1602 1603 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1604 LogicVRegister dst, 1605 const LogicVRegister& src, 1606 bool max) { 1607 uint64_t dst_val = max ? 0 : UINT64_MAX; 1608 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1609 uint64_t src_val = src.Uint(vform, i); 1610 if (max) { 1611 dst_val = (src_val > dst_val) ? src_val : dst_val; 1612 } else { 1613 dst_val = (src_val < dst_val) ? src_val : dst_val; 1614 } 1615 } 1616 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1617 dst.SetUint(vform, 0, dst_val); 1618 return dst; 1619 } 1620 1621 1622 LogicVRegister Simulator::umaxv(VectorFormat vform, 1623 LogicVRegister dst, 1624 const LogicVRegister& src) { 1625 uminmaxv(vform, dst, src, true); 1626 return dst; 1627 } 1628 1629 1630 LogicVRegister Simulator::uminv(VectorFormat vform, 1631 LogicVRegister dst, 1632 const LogicVRegister& src) { 1633 uminmaxv(vform, dst, src, false); 1634 return dst; 1635 } 1636 1637 1638 LogicVRegister Simulator::shl(VectorFormat vform, 1639 LogicVRegister dst, 1640 const LogicVRegister& src, 1641 int shift) { 1642 VIXL_ASSERT(shift >= 0); 1643 SimVRegister temp; 1644 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1645 return ushl(vform, dst, src, shiftreg); 1646 } 1647 1648 1649 LogicVRegister Simulator::sshll(VectorFormat vform, 1650 LogicVRegister dst, 1651 const LogicVRegister& src, 1652 int shift) { 1653 VIXL_ASSERT(shift >= 0); 1654 SimVRegister temp1, temp2; 1655 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1656 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1657 return sshl(vform, dst, extendedreg, shiftreg); 1658 } 1659 1660 1661 LogicVRegister Simulator::sshll2(VectorFormat vform, 1662 LogicVRegister dst, 1663 const LogicVRegister& src, 1664 int shift) { 1665 VIXL_ASSERT(shift >= 0); 1666 SimVRegister temp1, temp2; 1667 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1668 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1669 return sshl(vform, dst, extendedreg, shiftreg); 1670 } 1671 1672 1673 LogicVRegister Simulator::shll(VectorFormat vform, 1674 LogicVRegister dst, 1675 const LogicVRegister& src) { 1676 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1677 return sshll(vform, dst, src, shift); 1678 } 1679 1680 1681 LogicVRegister Simulator::shll2(VectorFormat vform, 1682 LogicVRegister dst, 1683 const LogicVRegister& src) { 1684 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1685 return sshll2(vform, dst, src, shift); 1686 } 1687 1688 1689 LogicVRegister Simulator::ushll(VectorFormat vform, 1690 LogicVRegister dst, 1691 const LogicVRegister& src, 1692 int shift) { 1693 VIXL_ASSERT(shift >= 0); 1694 SimVRegister temp1, temp2; 1695 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1696 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1697 return ushl(vform, dst, extendedreg, shiftreg); 1698 } 1699 1700 1701 LogicVRegister Simulator::ushll2(VectorFormat vform, 1702 LogicVRegister dst, 1703 const LogicVRegister& src, 1704 int shift) { 1705 VIXL_ASSERT(shift >= 0); 1706 SimVRegister temp1, temp2; 1707 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1708 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1709 return ushl(vform, dst, extendedreg, shiftreg); 1710 } 1711 1712 1713 LogicVRegister Simulator::sli(VectorFormat vform, 1714 LogicVRegister dst, 1715 const LogicVRegister& src, 1716 int shift) { 1717 dst.ClearForWrite(vform); 1718 int laneCount = LaneCountFromFormat(vform); 1719 for (int i = 0; i < laneCount; i++) { 1720 uint64_t src_lane = src.Uint(vform, i); 1721 uint64_t dst_lane = dst.Uint(vform, i); 1722 uint64_t shifted = src_lane << shift; 1723 uint64_t mask = MaxUintFromFormat(vform) << shift; 1724 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1725 } 1726 return dst; 1727 } 1728 1729 1730 LogicVRegister Simulator::sqshl(VectorFormat vform, 1731 LogicVRegister dst, 1732 const LogicVRegister& src, 1733 int shift) { 1734 VIXL_ASSERT(shift >= 0); 1735 SimVRegister temp; 1736 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1737 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1738 } 1739 1740 1741 LogicVRegister Simulator::uqshl(VectorFormat vform, 1742 LogicVRegister dst, 1743 const LogicVRegister& src, 1744 int shift) { 1745 VIXL_ASSERT(shift >= 0); 1746 SimVRegister temp; 1747 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1748 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1749 } 1750 1751 1752 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1753 LogicVRegister dst, 1754 const LogicVRegister& src, 1755 int shift) { 1756 VIXL_ASSERT(shift >= 0); 1757 SimVRegister temp; 1758 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1759 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1760 } 1761 1762 1763 LogicVRegister Simulator::sri(VectorFormat vform, 1764 LogicVRegister dst, 1765 const LogicVRegister& src, 1766 int shift) { 1767 dst.ClearForWrite(vform); 1768 int laneCount = LaneCountFromFormat(vform); 1769 VIXL_ASSERT((shift > 0) && 1770 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1771 for (int i = 0; i < laneCount; i++) { 1772 uint64_t src_lane = src.Uint(vform, i); 1773 uint64_t dst_lane = dst.Uint(vform, i); 1774 uint64_t shifted; 1775 uint64_t mask; 1776 if (shift == 64) { 1777 shifted = 0; 1778 mask = 0; 1779 } else { 1780 shifted = src_lane >> shift; 1781 mask = MaxUintFromFormat(vform) >> shift; 1782 } 1783 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1784 } 1785 return dst; 1786 } 1787 1788 1789 LogicVRegister Simulator::ushr(VectorFormat vform, 1790 LogicVRegister dst, 1791 const LogicVRegister& src, 1792 int shift) { 1793 VIXL_ASSERT(shift >= 0); 1794 SimVRegister temp; 1795 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1796 return ushl(vform, dst, src, shiftreg); 1797 } 1798 1799 1800 LogicVRegister Simulator::sshr(VectorFormat vform, 1801 LogicVRegister dst, 1802 const LogicVRegister& src, 1803 int shift) { 1804 VIXL_ASSERT(shift >= 0); 1805 SimVRegister temp; 1806 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1807 return sshl(vform, dst, src, shiftreg); 1808 } 1809 1810 1811 LogicVRegister Simulator::ssra(VectorFormat vform, 1812 LogicVRegister dst, 1813 const LogicVRegister& src, 1814 int shift) { 1815 SimVRegister temp; 1816 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1817 return add(vform, dst, dst, shifted_reg); 1818 } 1819 1820 1821 LogicVRegister Simulator::usra(VectorFormat vform, 1822 LogicVRegister dst, 1823 const LogicVRegister& src, 1824 int shift) { 1825 SimVRegister temp; 1826 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1827 return add(vform, dst, dst, shifted_reg); 1828 } 1829 1830 1831 LogicVRegister Simulator::srsra(VectorFormat vform, 1832 LogicVRegister dst, 1833 const LogicVRegister& src, 1834 int shift) { 1835 SimVRegister temp; 1836 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1837 return add(vform, dst, dst, shifted_reg); 1838 } 1839 1840 1841 LogicVRegister Simulator::ursra(VectorFormat vform, 1842 LogicVRegister dst, 1843 const LogicVRegister& src, 1844 int shift) { 1845 SimVRegister temp; 1846 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1847 return add(vform, dst, dst, shifted_reg); 1848 } 1849 1850 1851 LogicVRegister Simulator::cls(VectorFormat vform, 1852 LogicVRegister dst, 1853 const LogicVRegister& src) { 1854 uint64_t result[16]; 1855 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1856 int laneCount = LaneCountFromFormat(vform); 1857 for (int i = 0; i < laneCount; i++) { 1858 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1859 } 1860 1861 dst.ClearForWrite(vform); 1862 for (int i = 0; i < laneCount; ++i) { 1863 dst.SetUint(vform, i, result[i]); 1864 } 1865 return dst; 1866 } 1867 1868 1869 LogicVRegister Simulator::clz(VectorFormat vform, 1870 LogicVRegister dst, 1871 const LogicVRegister& src) { 1872 uint64_t result[16]; 1873 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1874 int laneCount = LaneCountFromFormat(vform); 1875 for (int i = 0; i < laneCount; i++) { 1876 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1877 } 1878 1879 dst.ClearForWrite(vform); 1880 for (int i = 0; i < laneCount; ++i) { 1881 dst.SetUint(vform, i, result[i]); 1882 } 1883 return dst; 1884 } 1885 1886 1887 LogicVRegister Simulator::cnt(VectorFormat vform, 1888 LogicVRegister dst, 1889 const LogicVRegister& src) { 1890 uint64_t result[16]; 1891 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1892 int laneCount = LaneCountFromFormat(vform); 1893 for (int i = 0; i < laneCount; i++) { 1894 uint64_t value = src.Uint(vform, i); 1895 result[i] = 0; 1896 for (int j = 0; j < laneSizeInBits; j++) { 1897 result[i] += (value & 1); 1898 value >>= 1; 1899 } 1900 } 1901 1902 dst.ClearForWrite(vform); 1903 for (int i = 0; i < laneCount; ++i) { 1904 dst.SetUint(vform, i, result[i]); 1905 } 1906 return dst; 1907 } 1908 1909 1910 LogicVRegister Simulator::sshl(VectorFormat vform, 1911 LogicVRegister dst, 1912 const LogicVRegister& src1, 1913 const LogicVRegister& src2) { 1914 dst.ClearForWrite(vform); 1915 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1916 int8_t shift_val = src2.Int(vform, i); 1917 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1918 1919 // Set signed saturation state. 1920 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1921 dst.SetSignedSat(i, lj_src_val >= 0); 1922 } 1923 1924 // Set unsigned saturation state. 1925 if (lj_src_val < 0) { 1926 dst.SetUnsignedSat(i, false); 1927 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1928 (lj_src_val != 0)) { 1929 dst.SetUnsignedSat(i, true); 1930 } 1931 1932 int64_t src_val = src1.Int(vform, i); 1933 bool src_is_negative = src_val < 0; 1934 if (shift_val > 63) { 1935 dst.SetInt(vform, i, 0); 1936 } else if (shift_val < -63) { 1937 dst.SetRounding(i, src_is_negative); 1938 dst.SetInt(vform, i, src_is_negative ? -1 : 0); 1939 } else { 1940 // Use unsigned types for shifts, as behaviour is undefined for signed 1941 // lhs. 1942 uint64_t usrc_val = static_cast<uint64_t>(src_val); 1943 1944 if (shift_val < 0) { 1945 // Convert to right shift. 1946 shift_val = -shift_val; 1947 1948 // Set rounding state by testing most-significant bit shifted out. 1949 // Rounding only needed on right shifts. 1950 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { 1951 dst.SetRounding(i, true); 1952 } 1953 1954 usrc_val >>= shift_val; 1955 1956 if (src_is_negative) { 1957 // Simulate sign-extension. 1958 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); 1959 } 1960 } else { 1961 usrc_val <<= shift_val; 1962 } 1963 dst.SetUint(vform, i, usrc_val); 1964 } 1965 } 1966 return dst; 1967 } 1968 1969 1970 LogicVRegister Simulator::ushl(VectorFormat vform, 1971 LogicVRegister dst, 1972 const LogicVRegister& src1, 1973 const LogicVRegister& src2) { 1974 dst.ClearForWrite(vform); 1975 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1976 int8_t shift_val = src2.Int(vform, i); 1977 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1978 1979 // Set saturation state. 1980 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1981 dst.SetUnsignedSat(i, true); 1982 } 1983 1984 uint64_t src_val = src1.Uint(vform, i); 1985 if ((shift_val > 63) || (shift_val < -64)) { 1986 dst.SetUint(vform, i, 0); 1987 } else { 1988 if (shift_val < 0) { 1989 // Set rounding state. Rounding only needed on right shifts. 1990 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1991 dst.SetRounding(i, true); 1992 } 1993 1994 if (shift_val == -64) { 1995 src_val = 0; 1996 } else { 1997 src_val >>= -shift_val; 1998 } 1999 } else { 2000 src_val <<= shift_val; 2001 } 2002 dst.SetUint(vform, i, src_val); 2003 } 2004 } 2005 return dst; 2006 } 2007 2008 2009 LogicVRegister Simulator::neg(VectorFormat vform, 2010 LogicVRegister dst, 2011 const LogicVRegister& src) { 2012 dst.ClearForWrite(vform); 2013 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2014 // Test for signed saturation. 2015 int64_t sa = src.Int(vform, i); 2016 if (sa == MinIntFromFormat(vform)) { 2017 dst.SetSignedSat(i, true); 2018 } 2019 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 2020 } 2021 return dst; 2022 } 2023 2024 2025 LogicVRegister Simulator::suqadd(VectorFormat vform, 2026 LogicVRegister dst, 2027 const LogicVRegister& src) { 2028 dst.ClearForWrite(vform); 2029 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2030 int64_t sa = dst.IntLeftJustified(vform, i); 2031 uint64_t ub = src.UintLeftJustified(vform, i); 2032 uint64_t ur = sa + ub; 2033 2034 int64_t sr; 2035 memcpy(&sr, &ur, sizeof(sr)); 2036 if (sr < sa) { // Test for signed positive saturation. 2037 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 2038 } else { 2039 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); 2040 } 2041 } 2042 return dst; 2043 } 2044 2045 2046 LogicVRegister Simulator::usqadd(VectorFormat vform, 2047 LogicVRegister dst, 2048 const LogicVRegister& src) { 2049 dst.ClearForWrite(vform); 2050 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2051 uint64_t ua = dst.UintLeftJustified(vform, i); 2052 int64_t sb = src.IntLeftJustified(vform, i); 2053 uint64_t ur = ua + sb; 2054 2055 if ((sb > 0) && (ur <= ua)) { 2056 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2057 } else if ((sb < 0) && (ur >= ua)) { 2058 dst.SetUint(vform, i, 0); // Negative saturation. 2059 } else { 2060 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2061 } 2062 } 2063 return dst; 2064 } 2065 2066 2067 LogicVRegister Simulator::abs(VectorFormat vform, 2068 LogicVRegister dst, 2069 const LogicVRegister& src) { 2070 dst.ClearForWrite(vform); 2071 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2072 // Test for signed saturation. 2073 int64_t sa = src.Int(vform, i); 2074 if (sa == MinIntFromFormat(vform)) { 2075 dst.SetSignedSat(i, true); 2076 } 2077 if (sa < 0) { 2078 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 2079 } else { 2080 dst.SetInt(vform, i, sa); 2081 } 2082 } 2083 return dst; 2084 } 2085 2086 2087 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2088 LogicVRegister dst, 2089 bool dstIsSigned, 2090 const LogicVRegister& src, 2091 bool srcIsSigned) { 2092 bool upperhalf = false; 2093 VectorFormat srcform = kFormatUndefined; 2094 int64_t ssrc[8]; 2095 uint64_t usrc[8]; 2096 2097 switch (dstform) { 2098 case kFormat8B: 2099 upperhalf = false; 2100 srcform = kFormat8H; 2101 break; 2102 case kFormat16B: 2103 upperhalf = true; 2104 srcform = kFormat8H; 2105 break; 2106 case kFormat4H: 2107 upperhalf = false; 2108 srcform = kFormat4S; 2109 break; 2110 case kFormat8H: 2111 upperhalf = true; 2112 srcform = kFormat4S; 2113 break; 2114 case kFormat2S: 2115 upperhalf = false; 2116 srcform = kFormat2D; 2117 break; 2118 case kFormat4S: 2119 upperhalf = true; 2120 srcform = kFormat2D; 2121 break; 2122 case kFormatB: 2123 upperhalf = false; 2124 srcform = kFormatH; 2125 break; 2126 case kFormatH: 2127 upperhalf = false; 2128 srcform = kFormatS; 2129 break; 2130 case kFormatS: 2131 upperhalf = false; 2132 srcform = kFormatD; 2133 break; 2134 default: 2135 VIXL_UNIMPLEMENTED(); 2136 } 2137 2138 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2139 ssrc[i] = src.Int(srcform, i); 2140 usrc[i] = src.Uint(srcform, i); 2141 } 2142 2143 int offset; 2144 if (upperhalf) { 2145 offset = LaneCountFromFormat(dstform) / 2; 2146 } else { 2147 offset = 0; 2148 dst.ClearForWrite(dstform); 2149 } 2150 2151 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2152 // Test for signed saturation 2153 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2154 dst.SetSignedSat(offset + i, true); 2155 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2156 dst.SetSignedSat(offset + i, false); 2157 } 2158 2159 // Test for unsigned saturation 2160 if (srcIsSigned) { 2161 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2162 dst.SetUnsignedSat(offset + i, true); 2163 } else if (ssrc[i] < 0) { 2164 dst.SetUnsignedSat(offset + i, false); 2165 } 2166 } else { 2167 if (usrc[i] > MaxUintFromFormat(dstform)) { 2168 dst.SetUnsignedSat(offset + i, true); 2169 } 2170 } 2171 2172 int64_t result; 2173 if (srcIsSigned) { 2174 result = ssrc[i] & MaxUintFromFormat(dstform); 2175 } else { 2176 result = usrc[i] & MaxUintFromFormat(dstform); 2177 } 2178 2179 if (dstIsSigned) { 2180 dst.SetInt(dstform, offset + i, result); 2181 } else { 2182 dst.SetUint(dstform, offset + i, result); 2183 } 2184 } 2185 return dst; 2186 } 2187 2188 2189 LogicVRegister Simulator::xtn(VectorFormat vform, 2190 LogicVRegister dst, 2191 const LogicVRegister& src) { 2192 return extractnarrow(vform, dst, true, src, true); 2193 } 2194 2195 2196 LogicVRegister Simulator::sqxtn(VectorFormat vform, 2197 LogicVRegister dst, 2198 const LogicVRegister& src) { 2199 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2200 } 2201 2202 2203 LogicVRegister Simulator::sqxtun(VectorFormat vform, 2204 LogicVRegister dst, 2205 const LogicVRegister& src) { 2206 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2207 } 2208 2209 2210 LogicVRegister Simulator::uqxtn(VectorFormat vform, 2211 LogicVRegister dst, 2212 const LogicVRegister& src) { 2213 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2214 } 2215 2216 2217 LogicVRegister Simulator::absdiff(VectorFormat vform, 2218 LogicVRegister dst, 2219 const LogicVRegister& src1, 2220 const LogicVRegister& src2, 2221 bool issigned) { 2222 dst.ClearForWrite(vform); 2223 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2224 if (issigned) { 2225 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2226 sr = sr > 0 ? sr : -sr; 2227 dst.SetInt(vform, i, sr); 2228 } else { 2229 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2230 sr = sr > 0 ? sr : -sr; 2231 dst.SetUint(vform, i, sr); 2232 } 2233 } 2234 return dst; 2235 } 2236 2237 2238 LogicVRegister Simulator::saba(VectorFormat vform, 2239 LogicVRegister dst, 2240 const LogicVRegister& src1, 2241 const LogicVRegister& src2) { 2242 SimVRegister temp; 2243 dst.ClearForWrite(vform); 2244 absdiff(vform, temp, src1, src2, true); 2245 add(vform, dst, dst, temp); 2246 return dst; 2247 } 2248 2249 2250 LogicVRegister Simulator::uaba(VectorFormat vform, 2251 LogicVRegister dst, 2252 const LogicVRegister& src1, 2253 const LogicVRegister& src2) { 2254 SimVRegister temp; 2255 dst.ClearForWrite(vform); 2256 absdiff(vform, temp, src1, src2, false); 2257 add(vform, dst, dst, temp); 2258 return dst; 2259 } 2260 2261 2262 LogicVRegister Simulator::not_(VectorFormat vform, 2263 LogicVRegister dst, 2264 const LogicVRegister& src) { 2265 dst.ClearForWrite(vform); 2266 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2267 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2268 } 2269 return dst; 2270 } 2271 2272 2273 LogicVRegister Simulator::rbit(VectorFormat vform, 2274 LogicVRegister dst, 2275 const LogicVRegister& src) { 2276 uint64_t result[16]; 2277 int laneCount = LaneCountFromFormat(vform); 2278 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2279 uint64_t reversed_value; 2280 uint64_t value; 2281 for (int i = 0; i < laneCount; i++) { 2282 value = src.Uint(vform, i); 2283 reversed_value = 0; 2284 for (int j = 0; j < laneSizeInBits; j++) { 2285 reversed_value = (reversed_value << 1) | (value & 1); 2286 value >>= 1; 2287 } 2288 result[i] = reversed_value; 2289 } 2290 2291 dst.ClearForWrite(vform); 2292 for (int i = 0; i < laneCount; ++i) { 2293 dst.SetUint(vform, i, result[i]); 2294 } 2295 return dst; 2296 } 2297 2298 2299 LogicVRegister Simulator::rev(VectorFormat vform, 2300 LogicVRegister dst, 2301 const LogicVRegister& src, 2302 int revSize) { 2303 uint64_t result[16]; 2304 int laneCount = LaneCountFromFormat(vform); 2305 int laneSize = LaneSizeInBytesFromFormat(vform); 2306 int lanesPerLoop = revSize / laneSize; 2307 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2308 for (int j = 0; j < lanesPerLoop; j++) { 2309 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2310 } 2311 } 2312 dst.ClearForWrite(vform); 2313 for (int i = 0; i < laneCount; ++i) { 2314 dst.SetUint(vform, i, result[i]); 2315 } 2316 return dst; 2317 } 2318 2319 2320 LogicVRegister Simulator::rev16(VectorFormat vform, 2321 LogicVRegister dst, 2322 const LogicVRegister& src) { 2323 return rev(vform, dst, src, 2); 2324 } 2325 2326 2327 LogicVRegister Simulator::rev32(VectorFormat vform, 2328 LogicVRegister dst, 2329 const LogicVRegister& src) { 2330 return rev(vform, dst, src, 4); 2331 } 2332 2333 2334 LogicVRegister Simulator::rev64(VectorFormat vform, 2335 LogicVRegister dst, 2336 const LogicVRegister& src) { 2337 return rev(vform, dst, src, 8); 2338 } 2339 2340 2341 LogicVRegister Simulator::addlp(VectorFormat vform, 2342 LogicVRegister dst, 2343 const LogicVRegister& src, 2344 bool is_signed, 2345 bool do_accumulate) { 2346 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2347 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32); 2348 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8); 2349 2350 uint64_t result[8]; 2351 int lane_count = LaneCountFromFormat(vform); 2352 for (int i = 0; i < lane_count; i++) { 2353 if (is_signed) { 2354 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + 2355 src.Int(vformsrc, 2 * i + 1)); 2356 } else { 2357 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2358 } 2359 } 2360 2361 dst.ClearForWrite(vform); 2362 for (int i = 0; i < lane_count; ++i) { 2363 if (do_accumulate) { 2364 result[i] += dst.Uint(vform, i); 2365 } 2366 dst.SetUint(vform, i, result[i]); 2367 } 2368 2369 return dst; 2370 } 2371 2372 2373 LogicVRegister Simulator::saddlp(VectorFormat vform, 2374 LogicVRegister dst, 2375 const LogicVRegister& src) { 2376 return addlp(vform, dst, src, true, false); 2377 } 2378 2379 2380 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2381 LogicVRegister dst, 2382 const LogicVRegister& src) { 2383 return addlp(vform, dst, src, false, false); 2384 } 2385 2386 2387 LogicVRegister Simulator::sadalp(VectorFormat vform, 2388 LogicVRegister dst, 2389 const LogicVRegister& src) { 2390 return addlp(vform, dst, src, true, true); 2391 } 2392 2393 2394 LogicVRegister Simulator::uadalp(VectorFormat vform, 2395 LogicVRegister dst, 2396 const LogicVRegister& src) { 2397 return addlp(vform, dst, src, false, true); 2398 } 2399 2400 2401 LogicVRegister Simulator::ext(VectorFormat vform, 2402 LogicVRegister dst, 2403 const LogicVRegister& src1, 2404 const LogicVRegister& src2, 2405 int index) { 2406 uint8_t result[16]; 2407 int laneCount = LaneCountFromFormat(vform); 2408 for (int i = 0; i < laneCount - index; ++i) { 2409 result[i] = src1.Uint(vform, i + index); 2410 } 2411 for (int i = 0; i < index; ++i) { 2412 result[laneCount - index + i] = src2.Uint(vform, i); 2413 } 2414 dst.ClearForWrite(vform); 2415 for (int i = 0; i < laneCount; ++i) { 2416 dst.SetUint(vform, i, result[i]); 2417 } 2418 return dst; 2419 } 2420 2421 2422 LogicVRegister Simulator::dup_element(VectorFormat vform, 2423 LogicVRegister dst, 2424 const LogicVRegister& src, 2425 int src_index) { 2426 int laneCount = LaneCountFromFormat(vform); 2427 uint64_t value = src.Uint(vform, src_index); 2428 dst.ClearForWrite(vform); 2429 for (int i = 0; i < laneCount; ++i) { 2430 dst.SetUint(vform, i, value); 2431 } 2432 return dst; 2433 } 2434 2435 2436 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2437 LogicVRegister dst, 2438 uint64_t imm) { 2439 int laneCount = LaneCountFromFormat(vform); 2440 uint64_t value = imm & MaxUintFromFormat(vform); 2441 dst.ClearForWrite(vform); 2442 for (int i = 0; i < laneCount; ++i) { 2443 dst.SetUint(vform, i, value); 2444 } 2445 return dst; 2446 } 2447 2448 2449 LogicVRegister Simulator::ins_element(VectorFormat vform, 2450 LogicVRegister dst, 2451 int dst_index, 2452 const LogicVRegister& src, 2453 int src_index) { 2454 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2455 return dst; 2456 } 2457 2458 2459 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2460 LogicVRegister dst, 2461 int dst_index, 2462 uint64_t imm) { 2463 uint64_t value = imm & MaxUintFromFormat(vform); 2464 dst.SetUint(vform, dst_index, value); 2465 return dst; 2466 } 2467 2468 2469 LogicVRegister Simulator::movi(VectorFormat vform, 2470 LogicVRegister dst, 2471 uint64_t imm) { 2472 int laneCount = LaneCountFromFormat(vform); 2473 dst.ClearForWrite(vform); 2474 for (int i = 0; i < laneCount; ++i) { 2475 dst.SetUint(vform, i, imm); 2476 } 2477 return dst; 2478 } 2479 2480 2481 LogicVRegister Simulator::mvni(VectorFormat vform, 2482 LogicVRegister dst, 2483 uint64_t imm) { 2484 int laneCount = LaneCountFromFormat(vform); 2485 dst.ClearForWrite(vform); 2486 for (int i = 0; i < laneCount; ++i) { 2487 dst.SetUint(vform, i, ~imm); 2488 } 2489 return dst; 2490 } 2491 2492 2493 LogicVRegister Simulator::orr(VectorFormat vform, 2494 LogicVRegister dst, 2495 const LogicVRegister& src, 2496 uint64_t imm) { 2497 uint64_t result[16]; 2498 int laneCount = LaneCountFromFormat(vform); 2499 for (int i = 0; i < laneCount; ++i) { 2500 result[i] = src.Uint(vform, i) | imm; 2501 } 2502 dst.ClearForWrite(vform); 2503 for (int i = 0; i < laneCount; ++i) { 2504 dst.SetUint(vform, i, result[i]); 2505 } 2506 return dst; 2507 } 2508 2509 2510 LogicVRegister Simulator::uxtl(VectorFormat vform, 2511 LogicVRegister dst, 2512 const LogicVRegister& src) { 2513 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2514 2515 dst.ClearForWrite(vform); 2516 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2517 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2518 } 2519 return dst; 2520 } 2521 2522 2523 LogicVRegister Simulator::sxtl(VectorFormat vform, 2524 LogicVRegister dst, 2525 const LogicVRegister& src) { 2526 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2527 2528 dst.ClearForWrite(vform); 2529 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2530 dst.SetInt(vform, i, src.Int(vform_half, i)); 2531 } 2532 return dst; 2533 } 2534 2535 2536 LogicVRegister Simulator::uxtl2(VectorFormat vform, 2537 LogicVRegister dst, 2538 const LogicVRegister& src) { 2539 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2540 int lane_count = LaneCountFromFormat(vform); 2541 2542 dst.ClearForWrite(vform); 2543 for (int i = 0; i < lane_count; i++) { 2544 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2545 } 2546 return dst; 2547 } 2548 2549 2550 LogicVRegister Simulator::sxtl2(VectorFormat vform, 2551 LogicVRegister dst, 2552 const LogicVRegister& src) { 2553 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2554 int lane_count = LaneCountFromFormat(vform); 2555 2556 dst.ClearForWrite(vform); 2557 for (int i = 0; i < lane_count; i++) { 2558 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2559 } 2560 return dst; 2561 } 2562 2563 2564 LogicVRegister Simulator::shrn(VectorFormat vform, 2565 LogicVRegister dst, 2566 const LogicVRegister& src, 2567 int shift) { 2568 SimVRegister temp; 2569 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2570 VectorFormat vform_dst = vform; 2571 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2572 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2573 } 2574 2575 2576 LogicVRegister Simulator::shrn2(VectorFormat vform, 2577 LogicVRegister dst, 2578 const LogicVRegister& src, 2579 int shift) { 2580 SimVRegister temp; 2581 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2582 VectorFormat vformdst = vform; 2583 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2584 return extractnarrow(vformdst, dst, false, shifted_src, false); 2585 } 2586 2587 2588 LogicVRegister Simulator::rshrn(VectorFormat vform, 2589 LogicVRegister dst, 2590 const LogicVRegister& src, 2591 int shift) { 2592 SimVRegister temp; 2593 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2594 VectorFormat vformdst = vform; 2595 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2596 return extractnarrow(vformdst, dst, false, shifted_src, false); 2597 } 2598 2599 2600 LogicVRegister Simulator::rshrn2(VectorFormat vform, 2601 LogicVRegister dst, 2602 const LogicVRegister& src, 2603 int shift) { 2604 SimVRegister temp; 2605 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2606 VectorFormat vformdst = vform; 2607 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2608 return extractnarrow(vformdst, dst, false, shifted_src, false); 2609 } 2610 2611 2612 LogicVRegister Simulator::Table(VectorFormat vform, 2613 LogicVRegister dst, 2614 const LogicVRegister& ind, 2615 bool zero_out_of_bounds, 2616 const LogicVRegister* tab1, 2617 const LogicVRegister* tab2, 2618 const LogicVRegister* tab3, 2619 const LogicVRegister* tab4) { 2620 VIXL_ASSERT(tab1 != NULL); 2621 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2622 uint64_t result[kMaxLanesPerVector]; 2623 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2624 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2625 } 2626 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2627 uint64_t j = ind.Uint(vform, i); 2628 int tab_idx = static_cast<int>(j >> 4); 2629 int j_idx = static_cast<int>(j & 15); 2630 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { 2631 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2632 } 2633 } 2634 dst.SetUintArray(vform, result); 2635 return dst; 2636 } 2637 2638 2639 LogicVRegister Simulator::tbl(VectorFormat vform, 2640 LogicVRegister dst, 2641 const LogicVRegister& tab, 2642 const LogicVRegister& ind) { 2643 return Table(vform, dst, ind, true, &tab); 2644 } 2645 2646 2647 LogicVRegister Simulator::tbl(VectorFormat vform, 2648 LogicVRegister dst, 2649 const LogicVRegister& tab, 2650 const LogicVRegister& tab2, 2651 const LogicVRegister& ind) { 2652 return Table(vform, dst, ind, true, &tab, &tab2); 2653 } 2654 2655 2656 LogicVRegister Simulator::tbl(VectorFormat vform, 2657 LogicVRegister dst, 2658 const LogicVRegister& tab, 2659 const LogicVRegister& tab2, 2660 const LogicVRegister& tab3, 2661 const LogicVRegister& ind) { 2662 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2663 } 2664 2665 2666 LogicVRegister Simulator::tbl(VectorFormat vform, 2667 LogicVRegister dst, 2668 const LogicVRegister& tab, 2669 const LogicVRegister& tab2, 2670 const LogicVRegister& tab3, 2671 const LogicVRegister& tab4, 2672 const LogicVRegister& ind) { 2673 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2674 } 2675 2676 2677 LogicVRegister Simulator::tbx(VectorFormat vform, 2678 LogicVRegister dst, 2679 const LogicVRegister& tab, 2680 const LogicVRegister& ind) { 2681 return Table(vform, dst, ind, false, &tab); 2682 } 2683 2684 2685 LogicVRegister Simulator::tbx(VectorFormat vform, 2686 LogicVRegister dst, 2687 const LogicVRegister& tab, 2688 const LogicVRegister& tab2, 2689 const LogicVRegister& ind) { 2690 return Table(vform, dst, ind, false, &tab, &tab2); 2691 } 2692 2693 2694 LogicVRegister Simulator::tbx(VectorFormat vform, 2695 LogicVRegister dst, 2696 const LogicVRegister& tab, 2697 const LogicVRegister& tab2, 2698 const LogicVRegister& tab3, 2699 const LogicVRegister& ind) { 2700 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2701 } 2702 2703 2704 LogicVRegister Simulator::tbx(VectorFormat vform, 2705 LogicVRegister dst, 2706 const LogicVRegister& tab, 2707 const LogicVRegister& tab2, 2708 const LogicVRegister& tab3, 2709 const LogicVRegister& tab4, 2710 const LogicVRegister& ind) { 2711 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2712 } 2713 2714 2715 LogicVRegister Simulator::uqshrn(VectorFormat vform, 2716 LogicVRegister dst, 2717 const LogicVRegister& src, 2718 int shift) { 2719 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2720 } 2721 2722 2723 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2724 LogicVRegister dst, 2725 const LogicVRegister& src, 2726 int shift) { 2727 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2728 } 2729 2730 2731 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2732 LogicVRegister dst, 2733 const LogicVRegister& src, 2734 int shift) { 2735 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2736 } 2737 2738 2739 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2740 LogicVRegister dst, 2741 const LogicVRegister& src, 2742 int shift) { 2743 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2744 } 2745 2746 2747 LogicVRegister Simulator::sqshrn(VectorFormat vform, 2748 LogicVRegister dst, 2749 const LogicVRegister& src, 2750 int shift) { 2751 SimVRegister temp; 2752 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2753 VectorFormat vformdst = vform; 2754 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2755 return sqxtn(vformdst, dst, shifted_src); 2756 } 2757 2758 2759 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2760 LogicVRegister dst, 2761 const LogicVRegister& src, 2762 int shift) { 2763 SimVRegister temp; 2764 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2765 VectorFormat vformdst = vform; 2766 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2767 return sqxtn(vformdst, dst, shifted_src); 2768 } 2769 2770 2771 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2772 LogicVRegister dst, 2773 const LogicVRegister& src, 2774 int shift) { 2775 SimVRegister temp; 2776 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2777 VectorFormat vformdst = vform; 2778 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2779 return sqxtn(vformdst, dst, shifted_src); 2780 } 2781 2782 2783 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2784 LogicVRegister dst, 2785 const LogicVRegister& src, 2786 int shift) { 2787 SimVRegister temp; 2788 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2789 VectorFormat vformdst = vform; 2790 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2791 return sqxtn(vformdst, dst, shifted_src); 2792 } 2793 2794 2795 LogicVRegister Simulator::sqshrun(VectorFormat vform, 2796 LogicVRegister dst, 2797 const LogicVRegister& src, 2798 int shift) { 2799 SimVRegister temp; 2800 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2801 VectorFormat vformdst = vform; 2802 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2803 return sqxtun(vformdst, dst, shifted_src); 2804 } 2805 2806 2807 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2808 LogicVRegister dst, 2809 const LogicVRegister& src, 2810 int shift) { 2811 SimVRegister temp; 2812 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2813 VectorFormat vformdst = vform; 2814 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2815 return sqxtun(vformdst, dst, shifted_src); 2816 } 2817 2818 2819 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2820 LogicVRegister dst, 2821 const LogicVRegister& src, 2822 int shift) { 2823 SimVRegister temp; 2824 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2825 VectorFormat vformdst = vform; 2826 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2827 return sqxtun(vformdst, dst, shifted_src); 2828 } 2829 2830 2831 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2832 LogicVRegister dst, 2833 const LogicVRegister& src, 2834 int shift) { 2835 SimVRegister temp; 2836 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2837 VectorFormat vformdst = vform; 2838 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2839 return sqxtun(vformdst, dst, shifted_src); 2840 } 2841 2842 2843 LogicVRegister Simulator::uaddl(VectorFormat vform, 2844 LogicVRegister dst, 2845 const LogicVRegister& src1, 2846 const LogicVRegister& src2) { 2847 SimVRegister temp1, temp2; 2848 uxtl(vform, temp1, src1); 2849 uxtl(vform, temp2, src2); 2850 add(vform, dst, temp1, temp2); 2851 return dst; 2852 } 2853 2854 2855 LogicVRegister Simulator::uaddl2(VectorFormat vform, 2856 LogicVRegister dst, 2857 const LogicVRegister& src1, 2858 const LogicVRegister& src2) { 2859 SimVRegister temp1, temp2; 2860 uxtl2(vform, temp1, src1); 2861 uxtl2(vform, temp2, src2); 2862 add(vform, dst, temp1, temp2); 2863 return dst; 2864 } 2865 2866 2867 LogicVRegister Simulator::uaddw(VectorFormat vform, 2868 LogicVRegister dst, 2869 const LogicVRegister& src1, 2870 const LogicVRegister& src2) { 2871 SimVRegister temp; 2872 uxtl(vform, temp, src2); 2873 add(vform, dst, src1, temp); 2874 return dst; 2875 } 2876 2877 2878 LogicVRegister Simulator::uaddw2(VectorFormat vform, 2879 LogicVRegister dst, 2880 const LogicVRegister& src1, 2881 const LogicVRegister& src2) { 2882 SimVRegister temp; 2883 uxtl2(vform, temp, src2); 2884 add(vform, dst, src1, temp); 2885 return dst; 2886 } 2887 2888 2889 LogicVRegister Simulator::saddl(VectorFormat vform, 2890 LogicVRegister dst, 2891 const LogicVRegister& src1, 2892 const LogicVRegister& src2) { 2893 SimVRegister temp1, temp2; 2894 sxtl(vform, temp1, src1); 2895 sxtl(vform, temp2, src2); 2896 add(vform, dst, temp1, temp2); 2897 return dst; 2898 } 2899 2900 2901 LogicVRegister Simulator::saddl2(VectorFormat vform, 2902 LogicVRegister dst, 2903 const LogicVRegister& src1, 2904 const LogicVRegister& src2) { 2905 SimVRegister temp1, temp2; 2906 sxtl2(vform, temp1, src1); 2907 sxtl2(vform, temp2, src2); 2908 add(vform, dst, temp1, temp2); 2909 return dst; 2910 } 2911 2912 2913 LogicVRegister Simulator::saddw(VectorFormat vform, 2914 LogicVRegister dst, 2915 const LogicVRegister& src1, 2916 const LogicVRegister& src2) { 2917 SimVRegister temp; 2918 sxtl(vform, temp, src2); 2919 add(vform, dst, src1, temp); 2920 return dst; 2921 } 2922 2923 2924 LogicVRegister Simulator::saddw2(VectorFormat vform, 2925 LogicVRegister dst, 2926 const LogicVRegister& src1, 2927 const LogicVRegister& src2) { 2928 SimVRegister temp; 2929 sxtl2(vform, temp, src2); 2930 add(vform, dst, src1, temp); 2931 return dst; 2932 } 2933 2934 2935 LogicVRegister Simulator::usubl(VectorFormat vform, 2936 LogicVRegister dst, 2937 const LogicVRegister& src1, 2938 const LogicVRegister& src2) { 2939 SimVRegister temp1, temp2; 2940 uxtl(vform, temp1, src1); 2941 uxtl(vform, temp2, src2); 2942 sub(vform, dst, temp1, temp2); 2943 return dst; 2944 } 2945 2946 2947 LogicVRegister Simulator::usubl2(VectorFormat vform, 2948 LogicVRegister dst, 2949 const LogicVRegister& src1, 2950 const LogicVRegister& src2) { 2951 SimVRegister temp1, temp2; 2952 uxtl2(vform, temp1, src1); 2953 uxtl2(vform, temp2, src2); 2954 sub(vform, dst, temp1, temp2); 2955 return dst; 2956 } 2957 2958 2959 LogicVRegister Simulator::usubw(VectorFormat vform, 2960 LogicVRegister dst, 2961 const LogicVRegister& src1, 2962 const LogicVRegister& src2) { 2963 SimVRegister temp; 2964 uxtl(vform, temp, src2); 2965 sub(vform, dst, src1, temp); 2966 return dst; 2967 } 2968 2969 2970 LogicVRegister Simulator::usubw2(VectorFormat vform, 2971 LogicVRegister dst, 2972 const LogicVRegister& src1, 2973 const LogicVRegister& src2) { 2974 SimVRegister temp; 2975 uxtl2(vform, temp, src2); 2976 sub(vform, dst, src1, temp); 2977 return dst; 2978 } 2979 2980 2981 LogicVRegister Simulator::ssubl(VectorFormat vform, 2982 LogicVRegister dst, 2983 const LogicVRegister& src1, 2984 const LogicVRegister& src2) { 2985 SimVRegister temp1, temp2; 2986 sxtl(vform, temp1, src1); 2987 sxtl(vform, temp2, src2); 2988 sub(vform, dst, temp1, temp2); 2989 return dst; 2990 } 2991 2992 2993 LogicVRegister Simulator::ssubl2(VectorFormat vform, 2994 LogicVRegister dst, 2995 const LogicVRegister& src1, 2996 const LogicVRegister& src2) { 2997 SimVRegister temp1, temp2; 2998 sxtl2(vform, temp1, src1); 2999 sxtl2(vform, temp2, src2); 3000 sub(vform, dst, temp1, temp2); 3001 return dst; 3002 } 3003 3004 3005 LogicVRegister Simulator::ssubw(VectorFormat vform, 3006 LogicVRegister dst, 3007 const LogicVRegister& src1, 3008 const LogicVRegister& src2) { 3009 SimVRegister temp; 3010 sxtl(vform, temp, src2); 3011 sub(vform, dst, src1, temp); 3012 return dst; 3013 } 3014 3015 3016 LogicVRegister Simulator::ssubw2(VectorFormat vform, 3017 LogicVRegister dst, 3018 const LogicVRegister& src1, 3019 const LogicVRegister& src2) { 3020 SimVRegister temp; 3021 sxtl2(vform, temp, src2); 3022 sub(vform, dst, src1, temp); 3023 return dst; 3024 } 3025 3026 3027 LogicVRegister Simulator::uabal(VectorFormat vform, 3028 LogicVRegister dst, 3029 const LogicVRegister& src1, 3030 const LogicVRegister& src2) { 3031 SimVRegister temp1, temp2; 3032 uxtl(vform, temp1, src1); 3033 uxtl(vform, temp2, src2); 3034 uaba(vform, dst, temp1, temp2); 3035 return dst; 3036 } 3037 3038 3039 LogicVRegister Simulator::uabal2(VectorFormat vform, 3040 LogicVRegister dst, 3041 const LogicVRegister& src1, 3042 const LogicVRegister& src2) { 3043 SimVRegister temp1, temp2; 3044 uxtl2(vform, temp1, src1); 3045 uxtl2(vform, temp2, src2); 3046 uaba(vform, dst, temp1, temp2); 3047 return dst; 3048 } 3049 3050 3051 LogicVRegister Simulator::sabal(VectorFormat vform, 3052 LogicVRegister dst, 3053 const LogicVRegister& src1, 3054 const LogicVRegister& src2) { 3055 SimVRegister temp1, temp2; 3056 sxtl(vform, temp1, src1); 3057 sxtl(vform, temp2, src2); 3058 saba(vform, dst, temp1, temp2); 3059 return dst; 3060 } 3061 3062 3063 LogicVRegister Simulator::sabal2(VectorFormat vform, 3064 LogicVRegister dst, 3065 const LogicVRegister& src1, 3066 const LogicVRegister& src2) { 3067 SimVRegister temp1, temp2; 3068 sxtl2(vform, temp1, src1); 3069 sxtl2(vform, temp2, src2); 3070 saba(vform, dst, temp1, temp2); 3071 return dst; 3072 } 3073 3074 3075 LogicVRegister Simulator::uabdl(VectorFormat vform, 3076 LogicVRegister dst, 3077 const LogicVRegister& src1, 3078 const LogicVRegister& src2) { 3079 SimVRegister temp1, temp2; 3080 uxtl(vform, temp1, src1); 3081 uxtl(vform, temp2, src2); 3082 absdiff(vform, dst, temp1, temp2, false); 3083 return dst; 3084 } 3085 3086 3087 LogicVRegister Simulator::uabdl2(VectorFormat vform, 3088 LogicVRegister dst, 3089 const LogicVRegister& src1, 3090 const LogicVRegister& src2) { 3091 SimVRegister temp1, temp2; 3092 uxtl2(vform, temp1, src1); 3093 uxtl2(vform, temp2, src2); 3094 absdiff(vform, dst, temp1, temp2, false); 3095 return dst; 3096 } 3097 3098 3099 LogicVRegister Simulator::sabdl(VectorFormat vform, 3100 LogicVRegister dst, 3101 const LogicVRegister& src1, 3102 const LogicVRegister& src2) { 3103 SimVRegister temp1, temp2; 3104 sxtl(vform, temp1, src1); 3105 sxtl(vform, temp2, src2); 3106 absdiff(vform, dst, temp1, temp2, true); 3107 return dst; 3108 } 3109 3110 3111 LogicVRegister Simulator::sabdl2(VectorFormat vform, 3112 LogicVRegister dst, 3113 const LogicVRegister& src1, 3114 const LogicVRegister& src2) { 3115 SimVRegister temp1, temp2; 3116 sxtl2(vform, temp1, src1); 3117 sxtl2(vform, temp2, src2); 3118 absdiff(vform, dst, temp1, temp2, true); 3119 return dst; 3120 } 3121 3122 3123 LogicVRegister Simulator::umull(VectorFormat vform, 3124 LogicVRegister dst, 3125 const LogicVRegister& src1, 3126 const LogicVRegister& src2) { 3127 SimVRegister temp1, temp2; 3128 uxtl(vform, temp1, src1); 3129 uxtl(vform, temp2, src2); 3130 mul(vform, dst, temp1, temp2); 3131 return dst; 3132 } 3133 3134 3135 LogicVRegister Simulator::umull2(VectorFormat vform, 3136 LogicVRegister dst, 3137 const LogicVRegister& src1, 3138 const LogicVRegister& src2) { 3139 SimVRegister temp1, temp2; 3140 uxtl2(vform, temp1, src1); 3141 uxtl2(vform, temp2, src2); 3142 mul(vform, dst, temp1, temp2); 3143 return dst; 3144 } 3145 3146 3147 LogicVRegister Simulator::smull(VectorFormat vform, 3148 LogicVRegister dst, 3149 const LogicVRegister& src1, 3150 const LogicVRegister& src2) { 3151 SimVRegister temp1, temp2; 3152 sxtl(vform, temp1, src1); 3153 sxtl(vform, temp2, src2); 3154 mul(vform, dst, temp1, temp2); 3155 return dst; 3156 } 3157 3158 3159 LogicVRegister Simulator::smull2(VectorFormat vform, 3160 LogicVRegister dst, 3161 const LogicVRegister& src1, 3162 const LogicVRegister& src2) { 3163 SimVRegister temp1, temp2; 3164 sxtl2(vform, temp1, src1); 3165 sxtl2(vform, temp2, src2); 3166 mul(vform, dst, temp1, temp2); 3167 return dst; 3168 } 3169 3170 3171 LogicVRegister Simulator::umlsl(VectorFormat vform, 3172 LogicVRegister dst, 3173 const LogicVRegister& src1, 3174 const LogicVRegister& src2) { 3175 SimVRegister temp1, temp2; 3176 uxtl(vform, temp1, src1); 3177 uxtl(vform, temp2, src2); 3178 mls(vform, dst, temp1, temp2); 3179 return dst; 3180 } 3181 3182 3183 LogicVRegister Simulator::umlsl2(VectorFormat vform, 3184 LogicVRegister dst, 3185 const LogicVRegister& src1, 3186 const LogicVRegister& src2) { 3187 SimVRegister temp1, temp2; 3188 uxtl2(vform, temp1, src1); 3189 uxtl2(vform, temp2, src2); 3190 mls(vform, dst, temp1, temp2); 3191 return dst; 3192 } 3193 3194 3195 LogicVRegister Simulator::smlsl(VectorFormat vform, 3196 LogicVRegister dst, 3197 const LogicVRegister& src1, 3198 const LogicVRegister& src2) { 3199 SimVRegister temp1, temp2; 3200 sxtl(vform, temp1, src1); 3201 sxtl(vform, temp2, src2); 3202 mls(vform, dst, temp1, temp2); 3203 return dst; 3204 } 3205 3206 3207 LogicVRegister Simulator::smlsl2(VectorFormat vform, 3208 LogicVRegister dst, 3209 const LogicVRegister& src1, 3210 const LogicVRegister& src2) { 3211 SimVRegister temp1, temp2; 3212 sxtl2(vform, temp1, src1); 3213 sxtl2(vform, temp2, src2); 3214 mls(vform, dst, temp1, temp2); 3215 return dst; 3216 } 3217 3218 3219 LogicVRegister Simulator::umlal(VectorFormat vform, 3220 LogicVRegister dst, 3221 const LogicVRegister& src1, 3222 const LogicVRegister& src2) { 3223 SimVRegister temp1, temp2; 3224 uxtl(vform, temp1, src1); 3225 uxtl(vform, temp2, src2); 3226 mla(vform, dst, temp1, temp2); 3227 return dst; 3228 } 3229 3230 3231 LogicVRegister Simulator::umlal2(VectorFormat vform, 3232 LogicVRegister dst, 3233 const LogicVRegister& src1, 3234 const LogicVRegister& src2) { 3235 SimVRegister temp1, temp2; 3236 uxtl2(vform, temp1, src1); 3237 uxtl2(vform, temp2, src2); 3238 mla(vform, dst, temp1, temp2); 3239 return dst; 3240 } 3241 3242 3243 LogicVRegister Simulator::smlal(VectorFormat vform, 3244 LogicVRegister dst, 3245 const LogicVRegister& src1, 3246 const LogicVRegister& src2) { 3247 SimVRegister temp1, temp2; 3248 sxtl(vform, temp1, src1); 3249 sxtl(vform, temp2, src2); 3250 mla(vform, dst, temp1, temp2); 3251 return dst; 3252 } 3253 3254 3255 LogicVRegister Simulator::smlal2(VectorFormat vform, 3256 LogicVRegister dst, 3257 const LogicVRegister& src1, 3258 const LogicVRegister& src2) { 3259 SimVRegister temp1, temp2; 3260 sxtl2(vform, temp1, src1); 3261 sxtl2(vform, temp2, src2); 3262 mla(vform, dst, temp1, temp2); 3263 return dst; 3264 } 3265 3266 3267 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3268 LogicVRegister dst, 3269 const LogicVRegister& src1, 3270 const LogicVRegister& src2) { 3271 SimVRegister temp; 3272 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3273 return add(vform, dst, dst, product).SignedSaturate(vform); 3274 } 3275 3276 3277 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3278 LogicVRegister dst, 3279 const LogicVRegister& src1, 3280 const LogicVRegister& src2) { 3281 SimVRegister temp; 3282 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3283 return add(vform, dst, dst, product).SignedSaturate(vform); 3284 } 3285 3286 3287 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3288 LogicVRegister dst, 3289 const LogicVRegister& src1, 3290 const LogicVRegister& src2) { 3291 SimVRegister temp; 3292 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3293 return sub(vform, dst, dst, product).SignedSaturate(vform); 3294 } 3295 3296 3297 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3298 LogicVRegister dst, 3299 const LogicVRegister& src1, 3300 const LogicVRegister& src2) { 3301 SimVRegister temp; 3302 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3303 return sub(vform, dst, dst, product).SignedSaturate(vform); 3304 } 3305 3306 3307 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3308 LogicVRegister dst, 3309 const LogicVRegister& src1, 3310 const LogicVRegister& src2) { 3311 SimVRegister temp; 3312 LogicVRegister product = smull(vform, temp, src1, src2); 3313 return add(vform, dst, product, product).SignedSaturate(vform); 3314 } 3315 3316 3317 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3318 LogicVRegister dst, 3319 const LogicVRegister& src1, 3320 const LogicVRegister& src2) { 3321 SimVRegister temp; 3322 LogicVRegister product = smull2(vform, temp, src1, src2); 3323 return add(vform, dst, product, product).SignedSaturate(vform); 3324 } 3325 3326 3327 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3328 LogicVRegister dst, 3329 const LogicVRegister& src1, 3330 const LogicVRegister& src2, 3331 bool round) { 3332 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3333 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3334 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3335 3336 int esize = LaneSizeInBitsFromFormat(vform); 3337 int round_const = round ? (1 << (esize - 2)) : 0; 3338 int64_t product; 3339 3340 dst.ClearForWrite(vform); 3341 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3342 product = src1.Int(vform, i) * src2.Int(vform, i); 3343 product += round_const; 3344 product = product >> (esize - 1); 3345 3346 if (product > MaxIntFromFormat(vform)) { 3347 product = MaxIntFromFormat(vform); 3348 } else if (product < MinIntFromFormat(vform)) { 3349 product = MinIntFromFormat(vform); 3350 } 3351 dst.SetInt(vform, i, product); 3352 } 3353 return dst; 3354 } 3355 3356 3357 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3358 LogicVRegister dst, 3359 const LogicVRegister& src1, 3360 const LogicVRegister& src2) { 3361 return sqrdmulh(vform, dst, src1, src2, false); 3362 } 3363 3364 3365 LogicVRegister Simulator::addhn(VectorFormat vform, 3366 LogicVRegister dst, 3367 const LogicVRegister& src1, 3368 const LogicVRegister& src2) { 3369 SimVRegister temp; 3370 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3371 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3372 return dst; 3373 } 3374 3375 3376 LogicVRegister Simulator::addhn2(VectorFormat vform, 3377 LogicVRegister dst, 3378 const LogicVRegister& src1, 3379 const LogicVRegister& src2) { 3380 SimVRegister temp; 3381 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3382 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3383 return dst; 3384 } 3385 3386 3387 LogicVRegister Simulator::raddhn(VectorFormat vform, 3388 LogicVRegister dst, 3389 const LogicVRegister& src1, 3390 const LogicVRegister& src2) { 3391 SimVRegister temp; 3392 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3393 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3394 return dst; 3395 } 3396 3397 3398 LogicVRegister Simulator::raddhn2(VectorFormat vform, 3399 LogicVRegister dst, 3400 const LogicVRegister& src1, 3401 const LogicVRegister& src2) { 3402 SimVRegister temp; 3403 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3404 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3405 return dst; 3406 } 3407 3408 3409 LogicVRegister Simulator::subhn(VectorFormat vform, 3410 LogicVRegister dst, 3411 const LogicVRegister& src1, 3412 const LogicVRegister& src2) { 3413 SimVRegister temp; 3414 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3415 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3416 return dst; 3417 } 3418 3419 3420 LogicVRegister Simulator::subhn2(VectorFormat vform, 3421 LogicVRegister dst, 3422 const LogicVRegister& src1, 3423 const LogicVRegister& src2) { 3424 SimVRegister temp; 3425 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3426 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3427 return dst; 3428 } 3429 3430 3431 LogicVRegister Simulator::rsubhn(VectorFormat vform, 3432 LogicVRegister dst, 3433 const LogicVRegister& src1, 3434 const LogicVRegister& src2) { 3435 SimVRegister temp; 3436 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3437 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3438 return dst; 3439 } 3440 3441 3442 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3443 LogicVRegister dst, 3444 const LogicVRegister& src1, 3445 const LogicVRegister& src2) { 3446 SimVRegister temp; 3447 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3448 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3449 return dst; 3450 } 3451 3452 3453 LogicVRegister Simulator::trn1(VectorFormat vform, 3454 LogicVRegister dst, 3455 const LogicVRegister& src1, 3456 const LogicVRegister& src2) { 3457 uint64_t result[16]; 3458 int laneCount = LaneCountFromFormat(vform); 3459 int pairs = laneCount / 2; 3460 for (int i = 0; i < pairs; ++i) { 3461 result[2 * i] = src1.Uint(vform, 2 * i); 3462 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3463 } 3464 3465 dst.ClearForWrite(vform); 3466 for (int i = 0; i < laneCount; ++i) { 3467 dst.SetUint(vform, i, result[i]); 3468 } 3469 return dst; 3470 } 3471 3472 3473 LogicVRegister Simulator::trn2(VectorFormat vform, 3474 LogicVRegister dst, 3475 const LogicVRegister& src1, 3476 const LogicVRegister& src2) { 3477 uint64_t result[16]; 3478 int laneCount = LaneCountFromFormat(vform); 3479 int pairs = laneCount / 2; 3480 for (int i = 0; i < pairs; ++i) { 3481 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3482 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3483 } 3484 3485 dst.ClearForWrite(vform); 3486 for (int i = 0; i < laneCount; ++i) { 3487 dst.SetUint(vform, i, result[i]); 3488 } 3489 return dst; 3490 } 3491 3492 3493 LogicVRegister Simulator::zip1(VectorFormat vform, 3494 LogicVRegister dst, 3495 const LogicVRegister& src1, 3496 const LogicVRegister& src2) { 3497 uint64_t result[16]; 3498 int laneCount = LaneCountFromFormat(vform); 3499 int pairs = laneCount / 2; 3500 for (int i = 0; i < pairs; ++i) { 3501 result[2 * i] = src1.Uint(vform, i); 3502 result[(2 * i) + 1] = src2.Uint(vform, i); 3503 } 3504 3505 dst.ClearForWrite(vform); 3506 for (int i = 0; i < laneCount; ++i) { 3507 dst.SetUint(vform, i, result[i]); 3508 } 3509 return dst; 3510 } 3511 3512 3513 LogicVRegister Simulator::zip2(VectorFormat vform, 3514 LogicVRegister dst, 3515 const LogicVRegister& src1, 3516 const LogicVRegister& src2) { 3517 uint64_t result[16]; 3518 int laneCount = LaneCountFromFormat(vform); 3519 int pairs = laneCount / 2; 3520 for (int i = 0; i < pairs; ++i) { 3521 result[2 * i] = src1.Uint(vform, pairs + i); 3522 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3523 } 3524 3525 dst.ClearForWrite(vform); 3526 for (int i = 0; i < laneCount; ++i) { 3527 dst.SetUint(vform, i, result[i]); 3528 } 3529 return dst; 3530 } 3531 3532 3533 LogicVRegister Simulator::uzp1(VectorFormat vform, 3534 LogicVRegister dst, 3535 const LogicVRegister& src1, 3536 const LogicVRegister& src2) { 3537 uint64_t result[32]; 3538 int laneCount = LaneCountFromFormat(vform); 3539 for (int i = 0; i < laneCount; ++i) { 3540 result[i] = src1.Uint(vform, i); 3541 result[laneCount + i] = src2.Uint(vform, i); 3542 } 3543 3544 dst.ClearForWrite(vform); 3545 for (int i = 0; i < laneCount; ++i) { 3546 dst.SetUint(vform, i, result[2 * i]); 3547 } 3548 return dst; 3549 } 3550 3551 3552 LogicVRegister Simulator::uzp2(VectorFormat vform, 3553 LogicVRegister dst, 3554 const LogicVRegister& src1, 3555 const LogicVRegister& src2) { 3556 uint64_t result[32]; 3557 int laneCount = LaneCountFromFormat(vform); 3558 for (int i = 0; i < laneCount; ++i) { 3559 result[i] = src1.Uint(vform, i); 3560 result[laneCount + i] = src2.Uint(vform, i); 3561 } 3562 3563 dst.ClearForWrite(vform); 3564 for (int i = 0; i < laneCount; ++i) { 3565 dst.SetUint(vform, i, result[(2 * i) + 1]); 3566 } 3567 return dst; 3568 } 3569 3570 3571 template <typename T> 3572 T Simulator::FPAdd(T op1, T op2) { 3573 T result = FPProcessNaNs(op1, op2); 3574 if (std::isnan(result)) return result; 3575 3576 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3577 // inf + -inf returns the default NaN. 3578 FPProcessException(); 3579 return FPDefaultNaN<T>(); 3580 } else { 3581 // Other cases should be handled by standard arithmetic. 3582 return op1 + op2; 3583 } 3584 } 3585 3586 3587 template <typename T> 3588 T Simulator::FPSub(T op1, T op2) { 3589 // NaNs should be handled elsewhere. 3590 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3591 3592 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3593 // inf - inf returns the default NaN. 3594 FPProcessException(); 3595 return FPDefaultNaN<T>(); 3596 } else { 3597 // Other cases should be handled by standard arithmetic. 3598 return op1 - op2; 3599 } 3600 } 3601 3602 3603 template <typename T> 3604 T Simulator::FPMul(T op1, T op2) { 3605 // NaNs should be handled elsewhere. 3606 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3607 3608 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3609 // inf * 0.0 returns the default NaN. 3610 FPProcessException(); 3611 return FPDefaultNaN<T>(); 3612 } else { 3613 // Other cases should be handled by standard arithmetic. 3614 return op1 * op2; 3615 } 3616 } 3617 3618 3619 template <typename T> 3620 T Simulator::FPMulx(T op1, T op2) { 3621 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3622 // inf * 0.0 returns +/-2.0. 3623 T two = 2.0; 3624 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3625 } 3626 return FPMul(op1, op2); 3627 } 3628 3629 3630 template <typename T> 3631 T Simulator::FPMulAdd(T a, T op1, T op2) { 3632 T result = FPProcessNaNs3(a, op1, op2); 3633 3634 T sign_a = copysign(1.0, a); 3635 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3636 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3637 bool operation_generates_nan = 3638 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3639 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3640 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3641 3642 if (std::isnan(result)) { 3643 // Generated NaNs override quiet NaNs propagated from a. 3644 if (operation_generates_nan && IsQuietNaN(a)) { 3645 FPProcessException(); 3646 return FPDefaultNaN<T>(); 3647 } else { 3648 return result; 3649 } 3650 } 3651 3652 // If the operation would produce a NaN, return the default NaN. 3653 if (operation_generates_nan) { 3654 FPProcessException(); 3655 return FPDefaultNaN<T>(); 3656 } 3657 3658 // Work around broken fma implementations for exact zero results: The sign of 3659 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3660 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3661 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3662 } 3663 3664 result = FusedMultiplyAdd(op1, op2, a); 3665 VIXL_ASSERT(!std::isnan(result)); 3666 3667 // Work around broken fma implementations for rounded zero results: If a is 3668 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3669 if ((a == 0.0) && (result == 0.0)) { 3670 return copysign(0.0, sign_prod); 3671 } 3672 3673 return result; 3674 } 3675 3676 3677 template <typename T> 3678 T Simulator::FPDiv(T op1, T op2) { 3679 // NaNs should be handled elsewhere. 3680 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3681 3682 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3683 // inf / inf and 0.0 / 0.0 return the default NaN. 3684 FPProcessException(); 3685 return FPDefaultNaN<T>(); 3686 } else { 3687 if (op2 == 0.0) { 3688 FPProcessException(); 3689 if (!std::isnan(op1)) { 3690 double op1_sign = copysign(1.0, op1); 3691 double op2_sign = copysign(1.0, op2); 3692 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); 3693 } 3694 } 3695 3696 // Other cases should be handled by standard arithmetic. 3697 return op1 / op2; 3698 } 3699 } 3700 3701 3702 template <typename T> 3703 T Simulator::FPSqrt(T op) { 3704 if (std::isnan(op)) { 3705 return FPProcessNaN(op); 3706 } else if (op < 0.0) { 3707 FPProcessException(); 3708 return FPDefaultNaN<T>(); 3709 } else { 3710 return sqrt(op); 3711 } 3712 } 3713 3714 3715 template <typename T> 3716 T Simulator::FPMax(T a, T b) { 3717 T result = FPProcessNaNs(a, b); 3718 if (std::isnan(result)) return result; 3719 3720 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3721 // a and b are zero, and the sign differs: return +0.0. 3722 return 0.0; 3723 } else { 3724 return (a > b) ? a : b; 3725 } 3726 } 3727 3728 3729 template <typename T> 3730 T Simulator::FPMaxNM(T a, T b) { 3731 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3732 a = kFP64NegativeInfinity; 3733 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3734 b = kFP64NegativeInfinity; 3735 } 3736 3737 T result = FPProcessNaNs(a, b); 3738 return std::isnan(result) ? result : FPMax(a, b); 3739 } 3740 3741 3742 template <typename T> 3743 T Simulator::FPMin(T a, T b) { 3744 T result = FPProcessNaNs(a, b); 3745 if (std::isnan(result)) return result; 3746 3747 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3748 // a and b are zero, and the sign differs: return -0.0. 3749 return -0.0; 3750 } else { 3751 return (a < b) ? a : b; 3752 } 3753 } 3754 3755 3756 template <typename T> 3757 T Simulator::FPMinNM(T a, T b) { 3758 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3759 a = kFP64PositiveInfinity; 3760 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3761 b = kFP64PositiveInfinity; 3762 } 3763 3764 T result = FPProcessNaNs(a, b); 3765 return std::isnan(result) ? result : FPMin(a, b); 3766 } 3767 3768 3769 template <typename T> 3770 T Simulator::FPRecipStepFused(T op1, T op2) { 3771 const T two = 2.0; 3772 if ((std::isinf(op1) && (op2 == 0.0)) || 3773 ((op1 == 0.0) && (std::isinf(op2)))) { 3774 return two; 3775 } else if (std::isinf(op1) || std::isinf(op2)) { 3776 // Return +inf if signs match, otherwise -inf. 3777 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3778 : kFP64NegativeInfinity; 3779 } else { 3780 return FusedMultiplyAdd(op1, op2, two); 3781 } 3782 } 3783 3784 3785 template <typename T> 3786 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3787 const T one_point_five = 1.5; 3788 const T two = 2.0; 3789 3790 if ((std::isinf(op1) && (op2 == 0.0)) || 3791 ((op1 == 0.0) && (std::isinf(op2)))) { 3792 return one_point_five; 3793 } else if (std::isinf(op1) || std::isinf(op2)) { 3794 // Return +inf if signs match, otherwise -inf. 3795 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3796 : kFP64NegativeInfinity; 3797 } else { 3798 // The multiply-add-halve operation must be fully fused, so avoid interim 3799 // rounding by checking which operand can be losslessly divided by two 3800 // before doing the multiply-add. 3801 if (std::isnormal(op1 / two)) { 3802 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3803 } else if (std::isnormal(op2 / two)) { 3804 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3805 } else { 3806 // Neither operand is normal after halving: the result is dominated by 3807 // the addition term, so just return that. 3808 return one_point_five; 3809 } 3810 } 3811 } 3812 3813 3814 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3815 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3816 (value == kFP64NegativeInfinity)) { 3817 return value; 3818 } else if (std::isnan(value)) { 3819 return FPProcessNaN(value); 3820 } 3821 3822 double int_result = std::floor(value); 3823 double error = value - int_result; 3824 switch (round_mode) { 3825 case FPTieAway: { 3826 // Take care of correctly handling the range ]-0.5, -0.0], which must 3827 // yield -0.0. 3828 if ((-0.5 < value) && (value < 0.0)) { 3829 int_result = -0.0; 3830 3831 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3832 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3833 // result is positive, round up. 3834 int_result++; 3835 } 3836 break; 3837 } 3838 case FPTieEven: { 3839 // Take care of correctly handling the range [-0.5, -0.0], which must 3840 // yield -0.0. 3841 if ((-0.5 <= value) && (value < 0.0)) { 3842 int_result = -0.0; 3843 3844 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3845 // result is odd, round up. 3846 } else if ((error > 0.5) || 3847 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3848 int_result++; 3849 } 3850 break; 3851 } 3852 case FPZero: { 3853 // If value>0 then we take floor(value) 3854 // otherwise, ceil(value). 3855 if (value < 0) { 3856 int_result = ceil(value); 3857 } 3858 break; 3859 } 3860 case FPNegativeInfinity: { 3861 // We always use floor(value). 3862 break; 3863 } 3864 case FPPositiveInfinity: { 3865 // Take care of correctly handling the range ]-1.0, -0.0], which must 3866 // yield -0.0. 3867 if ((-1.0 < value) && (value < 0.0)) { 3868 int_result = -0.0; 3869 3870 // If the error is non-zero, round up. 3871 } else if (error > 0.0) { 3872 int_result++; 3873 } 3874 break; 3875 } 3876 default: 3877 VIXL_UNIMPLEMENTED(); 3878 } 3879 return int_result; 3880 } 3881 3882 3883 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3884 value = FPRoundInt(value, rmode); 3885 if (value >= kWMaxInt) { 3886 return kWMaxInt; 3887 } else if (value < kWMinInt) { 3888 return kWMinInt; 3889 } 3890 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3891 } 3892 3893 3894 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3895 value = FPRoundInt(value, rmode); 3896 if (value >= kXMaxInt) { 3897 return kXMaxInt; 3898 } else if (value < kXMinInt) { 3899 return kXMinInt; 3900 } 3901 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3902 } 3903 3904 3905 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3906 value = FPRoundInt(value, rmode); 3907 if (value >= kWMaxUInt) { 3908 return kWMaxUInt; 3909 } else if (value < 0.0) { 3910 return 0; 3911 } 3912 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3913 } 3914 3915 3916 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3917 value = FPRoundInt(value, rmode); 3918 if (value >= kXMaxUInt) { 3919 return kXMaxUInt; 3920 } else if (value < 0.0) { 3921 return 0; 3922 } 3923 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3924 } 3925 3926 3927 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3928 template <typename T> \ 3929 LogicVRegister Simulator::FN(VectorFormat vform, \ 3930 LogicVRegister dst, \ 3931 const LogicVRegister& src1, \ 3932 const LogicVRegister& src2) { \ 3933 dst.ClearForWrite(vform); \ 3934 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3935 T op1 = src1.Float<T>(i); \ 3936 T op2 = src2.Float<T>(i); \ 3937 T result; \ 3938 if (PROCNAN) { \ 3939 result = FPProcessNaNs(op1, op2); \ 3940 if (!std::isnan(result)) { \ 3941 result = OP(op1, op2); \ 3942 } \ 3943 } else { \ 3944 result = OP(op1, op2); \ 3945 } \ 3946 dst.SetFloat(i, result); \ 3947 } \ 3948 return dst; \ 3949 } \ 3950 \ 3951 LogicVRegister Simulator::FN(VectorFormat vform, \ 3952 LogicVRegister dst, \ 3953 const LogicVRegister& src1, \ 3954 const LogicVRegister& src2) { \ 3955 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3956 FN<float>(vform, dst, src1, src2); \ 3957 } else { \ 3958 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3959 FN<double>(vform, dst, src1, src2); \ 3960 } \ 3961 return dst; \ 3962 } 3963 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3964 #undef DEFINE_NEON_FP_VECTOR_OP 3965 3966 3967 LogicVRegister Simulator::fnmul(VectorFormat vform, 3968 LogicVRegister dst, 3969 const LogicVRegister& src1, 3970 const LogicVRegister& src2) { 3971 SimVRegister temp; 3972 LogicVRegister product = fmul(vform, temp, src1, src2); 3973 return fneg(vform, dst, product); 3974 } 3975 3976 3977 template <typename T> 3978 LogicVRegister Simulator::frecps(VectorFormat vform, 3979 LogicVRegister dst, 3980 const LogicVRegister& src1, 3981 const LogicVRegister& src2) { 3982 dst.ClearForWrite(vform); 3983 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3984 T op1 = -src1.Float<T>(i); 3985 T op2 = src2.Float<T>(i); 3986 T result = FPProcessNaNs(op1, op2); 3987 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3988 } 3989 return dst; 3990 } 3991 3992 3993 LogicVRegister Simulator::frecps(VectorFormat vform, 3994 LogicVRegister dst, 3995 const LogicVRegister& src1, 3996 const LogicVRegister& src2) { 3997 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3998 frecps<float>(vform, dst, src1, src2); 3999 } else { 4000 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4001 frecps<double>(vform, dst, src1, src2); 4002 } 4003 return dst; 4004 } 4005 4006 4007 template <typename T> 4008 LogicVRegister Simulator::frsqrts(VectorFormat vform, 4009 LogicVRegister dst, 4010 const LogicVRegister& src1, 4011 const LogicVRegister& src2) { 4012 dst.ClearForWrite(vform); 4013 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4014 T op1 = -src1.Float<T>(i); 4015 T op2 = src2.Float<T>(i); 4016 T result = FPProcessNaNs(op1, op2); 4017 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 4018 } 4019 return dst; 4020 } 4021 4022 4023 LogicVRegister Simulator::frsqrts(VectorFormat vform, 4024 LogicVRegister dst, 4025 const LogicVRegister& src1, 4026 const LogicVRegister& src2) { 4027 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4028 frsqrts<float>(vform, dst, src1, src2); 4029 } else { 4030 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4031 frsqrts<double>(vform, dst, src1, src2); 4032 } 4033 return dst; 4034 } 4035 4036 4037 template <typename T> 4038 LogicVRegister Simulator::fcmp(VectorFormat vform, 4039 LogicVRegister dst, 4040 const LogicVRegister& src1, 4041 const LogicVRegister& src2, 4042 Condition cond) { 4043 dst.ClearForWrite(vform); 4044 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4045 bool result = false; 4046 T op1 = src1.Float<T>(i); 4047 T op2 = src2.Float<T>(i); 4048 T nan_result = FPProcessNaNs(op1, op2); 4049 if (!std::isnan(nan_result)) { 4050 switch (cond) { 4051 case eq: 4052 result = (op1 == op2); 4053 break; 4054 case ge: 4055 result = (op1 >= op2); 4056 break; 4057 case gt: 4058 result = (op1 > op2); 4059 break; 4060 case le: 4061 result = (op1 <= op2); 4062 break; 4063 case lt: 4064 result = (op1 < op2); 4065 break; 4066 default: 4067 VIXL_UNREACHABLE(); 4068 break; 4069 } 4070 } 4071 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 4072 } 4073 return dst; 4074 } 4075 4076 4077 LogicVRegister Simulator::fcmp(VectorFormat vform, 4078 LogicVRegister dst, 4079 const LogicVRegister& src1, 4080 const LogicVRegister& src2, 4081 Condition cond) { 4082 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4083 fcmp<float>(vform, dst, src1, src2, cond); 4084 } else { 4085 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4086 fcmp<double>(vform, dst, src1, src2, cond); 4087 } 4088 return dst; 4089 } 4090 4091 4092 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4093 LogicVRegister dst, 4094 const LogicVRegister& src, 4095 Condition cond) { 4096 SimVRegister temp; 4097 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4098 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 4099 fcmp<float>(vform, dst, src, zero_reg, cond); 4100 } else { 4101 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4102 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 4103 fcmp<double>(vform, dst, src, zero_reg, cond); 4104 } 4105 return dst; 4106 } 4107 4108 4109 LogicVRegister Simulator::fabscmp(VectorFormat vform, 4110 LogicVRegister dst, 4111 const LogicVRegister& src1, 4112 const LogicVRegister& src2, 4113 Condition cond) { 4114 SimVRegister temp1, temp2; 4115 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4116 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4117 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4118 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4119 } else { 4120 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4121 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4122 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4123 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4124 } 4125 return dst; 4126 } 4127 4128 4129 template <typename T> 4130 LogicVRegister Simulator::fmla(VectorFormat vform, 4131 LogicVRegister dst, 4132 const LogicVRegister& src1, 4133 const LogicVRegister& src2) { 4134 dst.ClearForWrite(vform); 4135 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4136 T op1 = src1.Float<T>(i); 4137 T op2 = src2.Float<T>(i); 4138 T acc = dst.Float<T>(i); 4139 T result = FPMulAdd(acc, op1, op2); 4140 dst.SetFloat(i, result); 4141 } 4142 return dst; 4143 } 4144 4145 4146 LogicVRegister Simulator::fmla(VectorFormat vform, 4147 LogicVRegister dst, 4148 const LogicVRegister& src1, 4149 const LogicVRegister& src2) { 4150 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4151 fmla<float>(vform, dst, src1, src2); 4152 } else { 4153 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4154 fmla<double>(vform, dst, src1, src2); 4155 } 4156 return dst; 4157 } 4158 4159 4160 template <typename T> 4161 LogicVRegister Simulator::fmls(VectorFormat vform, 4162 LogicVRegister dst, 4163 const LogicVRegister& src1, 4164 const LogicVRegister& src2) { 4165 dst.ClearForWrite(vform); 4166 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4167 T op1 = -src1.Float<T>(i); 4168 T op2 = src2.Float<T>(i); 4169 T acc = dst.Float<T>(i); 4170 T result = FPMulAdd(acc, op1, op2); 4171 dst.SetFloat(i, result); 4172 } 4173 return dst; 4174 } 4175 4176 4177 LogicVRegister Simulator::fmls(VectorFormat vform, 4178 LogicVRegister dst, 4179 const LogicVRegister& src1, 4180 const LogicVRegister& src2) { 4181 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4182 fmls<float>(vform, dst, src1, src2); 4183 } else { 4184 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4185 fmls<double>(vform, dst, src1, src2); 4186 } 4187 return dst; 4188 } 4189 4190 4191 template <typename T> 4192 LogicVRegister Simulator::fneg(VectorFormat vform, 4193 LogicVRegister dst, 4194 const LogicVRegister& src) { 4195 dst.ClearForWrite(vform); 4196 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4197 T op = src.Float<T>(i); 4198 op = -op; 4199 dst.SetFloat(i, op); 4200 } 4201 return dst; 4202 } 4203 4204 4205 LogicVRegister Simulator::fneg(VectorFormat vform, 4206 LogicVRegister dst, 4207 const LogicVRegister& src) { 4208 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4209 fneg<float>(vform, dst, src); 4210 } else { 4211 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4212 fneg<double>(vform, dst, src); 4213 } 4214 return dst; 4215 } 4216 4217 4218 template <typename T> 4219 LogicVRegister Simulator::fabs_(VectorFormat vform, 4220 LogicVRegister dst, 4221 const LogicVRegister& src) { 4222 dst.ClearForWrite(vform); 4223 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4224 T op = src.Float<T>(i); 4225 if (copysign(1.0, op) < 0.0) { 4226 op = -op; 4227 } 4228 dst.SetFloat(i, op); 4229 } 4230 return dst; 4231 } 4232 4233 4234 LogicVRegister Simulator::fabs_(VectorFormat vform, 4235 LogicVRegister dst, 4236 const LogicVRegister& src) { 4237 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4238 fabs_<float>(vform, dst, src); 4239 } else { 4240 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4241 fabs_<double>(vform, dst, src); 4242 } 4243 return dst; 4244 } 4245 4246 4247 LogicVRegister Simulator::fabd(VectorFormat vform, 4248 LogicVRegister dst, 4249 const LogicVRegister& src1, 4250 const LogicVRegister& src2) { 4251 SimVRegister temp; 4252 fsub(vform, temp, src1, src2); 4253 fabs_(vform, dst, temp); 4254 return dst; 4255 } 4256 4257 4258 LogicVRegister Simulator::fsqrt(VectorFormat vform, 4259 LogicVRegister dst, 4260 const LogicVRegister& src) { 4261 dst.ClearForWrite(vform); 4262 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4263 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4264 float result = FPSqrt(src.Float<float>(i)); 4265 dst.SetFloat(i, result); 4266 } 4267 } else { 4268 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4269 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4270 double result = FPSqrt(src.Float<double>(i)); 4271 dst.SetFloat(i, result); 4272 } 4273 } 4274 return dst; 4275 } 4276 4277 4278 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4279 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4280 LogicVRegister dst, \ 4281 const LogicVRegister& src1, \ 4282 const LogicVRegister& src2) { \ 4283 SimVRegister temp1, temp2; \ 4284 uzp1(vform, temp1, src1, src2); \ 4285 uzp2(vform, temp2, src1, src2); \ 4286 FN(vform, dst, temp1, temp2); \ 4287 return dst; \ 4288 } \ 4289 \ 4290 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4291 LogicVRegister dst, \ 4292 const LogicVRegister& src) { \ 4293 if (vform == kFormatS) { \ 4294 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4295 dst.SetFloat(0, result); \ 4296 } else { \ 4297 VIXL_ASSERT(vform == kFormatD); \ 4298 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4299 dst.SetFloat(0, result); \ 4300 } \ 4301 dst.ClearForWrite(vform); \ 4302 return dst; \ 4303 } 4304 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4305 #undef DEFINE_NEON_FP_PAIR_OP 4306 4307 4308 LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4309 LogicVRegister dst, 4310 const LogicVRegister& src, 4311 FPMinMaxOp Op) { 4312 VIXL_ASSERT(vform == kFormat4S); 4313 USE(vform); 4314 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4315 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4316 float result = (this->*Op)(result1, result2); 4317 dst.ClearForWrite(kFormatS); 4318 dst.SetFloat<float>(0, result); 4319 return dst; 4320 } 4321 4322 4323 LogicVRegister Simulator::fmaxv(VectorFormat vform, 4324 LogicVRegister dst, 4325 const LogicVRegister& src) { 4326 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4327 } 4328 4329 4330 LogicVRegister Simulator::fminv(VectorFormat vform, 4331 LogicVRegister dst, 4332 const LogicVRegister& src) { 4333 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4334 } 4335 4336 4337 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4338 LogicVRegister dst, 4339 const LogicVRegister& src) { 4340 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4341 } 4342 4343 4344 LogicVRegister Simulator::fminnmv(VectorFormat vform, 4345 LogicVRegister dst, 4346 const LogicVRegister& src) { 4347 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4348 } 4349 4350 4351 LogicVRegister Simulator::fmul(VectorFormat vform, 4352 LogicVRegister dst, 4353 const LogicVRegister& src1, 4354 const LogicVRegister& src2, 4355 int index) { 4356 dst.ClearForWrite(vform); 4357 SimVRegister temp; 4358 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4359 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4360 fmul<float>(vform, dst, src1, index_reg); 4361 4362 } else { 4363 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4364 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4365 fmul<double>(vform, dst, src1, index_reg); 4366 } 4367 return dst; 4368 } 4369 4370 4371 LogicVRegister Simulator::fmla(VectorFormat vform, 4372 LogicVRegister dst, 4373 const LogicVRegister& src1, 4374 const LogicVRegister& src2, 4375 int index) { 4376 dst.ClearForWrite(vform); 4377 SimVRegister temp; 4378 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4379 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4380 fmla<float>(vform, dst, src1, index_reg); 4381 4382 } else { 4383 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4384 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4385 fmla<double>(vform, dst, src1, index_reg); 4386 } 4387 return dst; 4388 } 4389 4390 4391 LogicVRegister Simulator::fmls(VectorFormat vform, 4392 LogicVRegister dst, 4393 const LogicVRegister& src1, 4394 const LogicVRegister& src2, 4395 int index) { 4396 dst.ClearForWrite(vform); 4397 SimVRegister temp; 4398 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4399 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4400 fmls<float>(vform, dst, src1, index_reg); 4401 4402 } else { 4403 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4404 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4405 fmls<double>(vform, dst, src1, index_reg); 4406 } 4407 return dst; 4408 } 4409 4410 4411 LogicVRegister Simulator::fmulx(VectorFormat vform, 4412 LogicVRegister dst, 4413 const LogicVRegister& src1, 4414 const LogicVRegister& src2, 4415 int index) { 4416 dst.ClearForWrite(vform); 4417 SimVRegister temp; 4418 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4419 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4420 fmulx<float>(vform, dst, src1, index_reg); 4421 4422 } else { 4423 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4424 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4425 fmulx<double>(vform, dst, src1, index_reg); 4426 } 4427 return dst; 4428 } 4429 4430 4431 LogicVRegister Simulator::frint(VectorFormat vform, 4432 LogicVRegister dst, 4433 const LogicVRegister& src, 4434 FPRounding rounding_mode, 4435 bool inexact_exception) { 4436 dst.ClearForWrite(vform); 4437 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4438 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4439 float input = src.Float<float>(i); 4440 float rounded = FPRoundInt(input, rounding_mode); 4441 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4442 FPProcessException(); 4443 } 4444 dst.SetFloat<float>(i, rounded); 4445 } 4446 } else { 4447 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4448 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4449 double input = src.Float<double>(i); 4450 double rounded = FPRoundInt(input, rounding_mode); 4451 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4452 FPProcessException(); 4453 } 4454 dst.SetFloat<double>(i, rounded); 4455 } 4456 } 4457 return dst; 4458 } 4459 4460 4461 LogicVRegister Simulator::fcvts(VectorFormat vform, 4462 LogicVRegister dst, 4463 const LogicVRegister& src, 4464 FPRounding rounding_mode, 4465 int fbits) { 4466 dst.ClearForWrite(vform); 4467 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4468 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4469 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4470 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4471 } 4472 } else { 4473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4474 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4475 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4476 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4477 } 4478 } 4479 return dst; 4480 } 4481 4482 4483 LogicVRegister Simulator::fcvtu(VectorFormat vform, 4484 LogicVRegister dst, 4485 const LogicVRegister& src, 4486 FPRounding rounding_mode, 4487 int fbits) { 4488 dst.ClearForWrite(vform); 4489 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4490 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4491 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4492 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4493 } 4494 } else { 4495 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4496 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4497 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4498 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4499 } 4500 } 4501 return dst; 4502 } 4503 4504 4505 LogicVRegister Simulator::fcvtl(VectorFormat vform, 4506 LogicVRegister dst, 4507 const LogicVRegister& src) { 4508 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4509 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4510 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4511 } 4512 } else { 4513 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4514 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4515 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4516 } 4517 } 4518 return dst; 4519 } 4520 4521 4522 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4523 LogicVRegister dst, 4524 const LogicVRegister& src) { 4525 int lane_count = LaneCountFromFormat(vform); 4526 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4527 for (int i = 0; i < lane_count; i++) { 4528 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4529 } 4530 } else { 4531 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4532 for (int i = 0; i < lane_count; i++) { 4533 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4534 } 4535 } 4536 return dst; 4537 } 4538 4539 4540 LogicVRegister Simulator::fcvtn(VectorFormat vform, 4541 LogicVRegister dst, 4542 const LogicVRegister& src) { 4543 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4544 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4545 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4546 } 4547 } else { 4548 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4549 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4550 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4551 } 4552 } 4553 return dst; 4554 } 4555 4556 4557 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4558 LogicVRegister dst, 4559 const LogicVRegister& src) { 4560 int lane_count = LaneCountFromFormat(vform) / 2; 4561 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4562 for (int i = lane_count - 1; i >= 0; i--) { 4563 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4564 } 4565 } else { 4566 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4567 for (int i = lane_count - 1; i >= 0; i--) { 4568 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4569 } 4570 } 4571 return dst; 4572 } 4573 4574 4575 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4576 LogicVRegister dst, 4577 const LogicVRegister& src) { 4578 dst.ClearForWrite(vform); 4579 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4580 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4581 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4582 } 4583 return dst; 4584 } 4585 4586 4587 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4588 LogicVRegister dst, 4589 const LogicVRegister& src) { 4590 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4591 int lane_count = LaneCountFromFormat(vform) / 2; 4592 for (int i = lane_count - 1; i >= 0; i--) { 4593 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4594 } 4595 return dst; 4596 } 4597 4598 4599 // Based on reference C function recip_sqrt_estimate from ARM ARM. 4600 double Simulator::recip_sqrt_estimate(double a) { 4601 int q0, q1, s; 4602 double r; 4603 if (a < 0.5) { 4604 q0 = static_cast<int>(a * 512.0); 4605 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4606 } else { 4607 q1 = static_cast<int>(a * 256.0); 4608 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4609 } 4610 s = static_cast<int>(256.0 * r + 0.5); 4611 return static_cast<double>(s) / 256.0; 4612 } 4613 4614 4615 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4616 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4617 } 4618 4619 4620 template <typename T> 4621 T Simulator::FPRecipSqrtEstimate(T op) { 4622 if (std::isnan(op)) { 4623 return FPProcessNaN(op); 4624 } else if (op == 0.0) { 4625 if (copysign(1.0, op) < 0.0) { 4626 return kFP64NegativeInfinity; 4627 } else { 4628 return kFP64PositiveInfinity; 4629 } 4630 } else if (copysign(1.0, op) < 0.0) { 4631 FPProcessException(); 4632 return FPDefaultNaN<T>(); 4633 } else if (std::isinf(op)) { 4634 return 0.0; 4635 } else { 4636 uint64_t fraction; 4637 int exp, result_exp; 4638 4639 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4640 exp = FloatExp(op); 4641 fraction = FloatMantissa(op); 4642 fraction <<= 29; 4643 } else { 4644 exp = DoubleExp(op); 4645 fraction = DoubleMantissa(op); 4646 } 4647 4648 if (exp == 0) { 4649 while (Bits(fraction, 51, 51) == 0) { 4650 fraction = Bits(fraction, 50, 0) << 1; 4651 exp -= 1; 4652 } 4653 fraction = Bits(fraction, 50, 0) << 1; 4654 } 4655 4656 double scaled; 4657 if (Bits(exp, 0, 0) == 0) { 4658 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4659 } else { 4660 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4661 } 4662 4663 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4664 result_exp = (380 - exp) / 2; 4665 } else { 4666 result_exp = (3068 - exp) / 2; 4667 } 4668 4669 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 4670 4671 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4672 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4673 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 4674 return FloatPack(0, exp_bits, est_bits); 4675 } else { 4676 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 4677 } 4678 } 4679 } 4680 4681 4682 LogicVRegister Simulator::frsqrte(VectorFormat vform, 4683 LogicVRegister dst, 4684 const LogicVRegister& src) { 4685 dst.ClearForWrite(vform); 4686 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4687 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4688 float input = src.Float<float>(i); 4689 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4690 } 4691 } else { 4692 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4693 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4694 double input = src.Float<double>(i); 4695 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4696 } 4697 } 4698 return dst; 4699 } 4700 4701 template <typename T> 4702 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4703 uint32_t sign; 4704 4705 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4706 sign = FloatSign(op); 4707 } else { 4708 sign = DoubleSign(op); 4709 } 4710 4711 if (std::isnan(op)) { 4712 return FPProcessNaN(op); 4713 } else if (std::isinf(op)) { 4714 return (sign == 1) ? -0.0 : 0.0; 4715 } else if (op == 0.0) { 4716 FPProcessException(); // FPExc_DivideByZero exception. 4717 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4718 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4719 (std::fabs(op) < std::pow(2.0, -128.0))) || 4720 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4721 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4722 bool overflow_to_inf = false; 4723 switch (rounding) { 4724 case FPTieEven: 4725 overflow_to_inf = true; 4726 break; 4727 case FPPositiveInfinity: 4728 overflow_to_inf = (sign == 0); 4729 break; 4730 case FPNegativeInfinity: 4731 overflow_to_inf = (sign == 1); 4732 break; 4733 case FPZero: 4734 overflow_to_inf = false; 4735 break; 4736 default: 4737 break; 4738 } 4739 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4740 if (overflow_to_inf) { 4741 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4742 } else { 4743 // Return FPMaxNormal(sign). 4744 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4745 return FloatPack(sign, 0xfe, 0x07fffff); 4746 } else { 4747 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 4748 } 4749 } 4750 } else { 4751 uint64_t fraction; 4752 int exp, result_exp; 4753 uint32_t sign; 4754 4755 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4756 sign = FloatSign(op); 4757 exp = FloatExp(op); 4758 fraction = FloatMantissa(op); 4759 fraction <<= 29; 4760 } else { 4761 sign = DoubleSign(op); 4762 exp = DoubleExp(op); 4763 fraction = DoubleMantissa(op); 4764 } 4765 4766 if (exp == 0) { 4767 if (Bits(fraction, 51, 51) == 0) { 4768 exp -= 1; 4769 fraction = Bits(fraction, 49, 0) << 2; 4770 } else { 4771 fraction = Bits(fraction, 50, 0) << 1; 4772 } 4773 } 4774 4775 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4776 4777 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4778 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4779 } else { 4780 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4781 } 4782 4783 double estimate = recip_estimate(scaled); 4784 4785 fraction = DoubleMantissa(estimate); 4786 if (result_exp == 0) { 4787 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4788 } else if (result_exp == -1) { 4789 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4790 result_exp = 0; 4791 } 4792 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4793 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4794 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4795 return FloatPack(sign, exp_bits, frac_bits); 4796 } else { 4797 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4798 } 4799 } 4800 } 4801 4802 4803 LogicVRegister Simulator::frecpe(VectorFormat vform, 4804 LogicVRegister dst, 4805 const LogicVRegister& src, 4806 FPRounding round) { 4807 dst.ClearForWrite(vform); 4808 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4809 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4810 float input = src.Float<float>(i); 4811 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4812 } 4813 } else { 4814 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4815 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4816 double input = src.Float<double>(i); 4817 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4818 } 4819 } 4820 return dst; 4821 } 4822 4823 4824 LogicVRegister Simulator::ursqrte(VectorFormat vform, 4825 LogicVRegister dst, 4826 const LogicVRegister& src) { 4827 dst.ClearForWrite(vform); 4828 uint64_t operand; 4829 uint32_t result; 4830 double dp_operand, dp_result; 4831 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4832 operand = src.Uint(vform, i); 4833 if (operand <= 0x3FFFFFFF) { 4834 result = 0xFFFFFFFF; 4835 } else { 4836 dp_operand = operand * std::pow(2.0, -32); 4837 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4838 result = static_cast<uint32_t>(dp_result); 4839 } 4840 dst.SetUint(vform, i, result); 4841 } 4842 return dst; 4843 } 4844 4845 4846 // Based on reference C function recip_estimate from ARM ARM. 4847 double Simulator::recip_estimate(double a) { 4848 int q, s; 4849 double r; 4850 q = static_cast<int>(a * 512.0); 4851 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4852 s = static_cast<int>(256.0 * r + 0.5); 4853 return static_cast<double>(s) / 256.0; 4854 } 4855 4856 4857 LogicVRegister Simulator::urecpe(VectorFormat vform, 4858 LogicVRegister dst, 4859 const LogicVRegister& src) { 4860 dst.ClearForWrite(vform); 4861 uint64_t operand; 4862 uint32_t result; 4863 double dp_operand, dp_result; 4864 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4865 operand = src.Uint(vform, i); 4866 if (operand <= 0x7FFFFFFF) { 4867 result = 0xFFFFFFFF; 4868 } else { 4869 dp_operand = operand * std::pow(2.0, -32); 4870 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4871 result = static_cast<uint32_t>(dp_result); 4872 } 4873 dst.SetUint(vform, i, result); 4874 } 4875 return dst; 4876 } 4877 4878 template <typename T> 4879 LogicVRegister Simulator::frecpx(VectorFormat vform, 4880 LogicVRegister dst, 4881 const LogicVRegister& src) { 4882 dst.ClearForWrite(vform); 4883 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4884 T op = src.Float<T>(i); 4885 T result; 4886 if (std::isnan(op)) { 4887 result = FPProcessNaN(op); 4888 } else { 4889 int exp; 4890 uint32_t sign; 4891 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4892 sign = FloatSign(op); 4893 exp = FloatExp(op); 4894 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4895 result = FloatPack(sign, exp, 0); 4896 } else { 4897 sign = DoubleSign(op); 4898 exp = DoubleExp(op); 4899 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4900 result = DoublePack(sign, exp, 0); 4901 } 4902 } 4903 dst.SetFloat(i, result); 4904 } 4905 return dst; 4906 } 4907 4908 4909 LogicVRegister Simulator::frecpx(VectorFormat vform, 4910 LogicVRegister dst, 4911 const LogicVRegister& src) { 4912 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4913 frecpx<float>(vform, dst, src); 4914 } else { 4915 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4916 frecpx<double>(vform, dst, src); 4917 } 4918 return dst; 4919 } 4920 4921 LogicVRegister Simulator::scvtf(VectorFormat vform, 4922 LogicVRegister dst, 4923 const LogicVRegister& src, 4924 int fbits, 4925 FPRounding round) { 4926 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4927 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4928 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4929 dst.SetFloat<float>(i, result); 4930 } else { 4931 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4932 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4933 dst.SetFloat<double>(i, result); 4934 } 4935 } 4936 return dst; 4937 } 4938 4939 4940 LogicVRegister Simulator::ucvtf(VectorFormat vform, 4941 LogicVRegister dst, 4942 const LogicVRegister& src, 4943 int fbits, 4944 FPRounding round) { 4945 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4946 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4947 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4948 dst.SetFloat<float>(i, result); 4949 } else { 4950 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4951 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4952 dst.SetFloat<double>(i, result); 4953 } 4954 } 4955 return dst; 4956 } 4957 4958 4959 } // namespace aarch64 4960 } // namespace vixl 4961 4962 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 4963