1 // Copyright 2015, ARM Limited 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #include <cmath> 28 #include "vixl/a64/simulator-a64.h" 29 30 namespace vixl { 31 32 template<> double Simulator::FPDefaultNaN<double>() { 33 return kFP64DefaultNaN; 34 } 35 36 37 template<> float Simulator::FPDefaultNaN<float>() { 38 return kFP32DefaultNaN; 39 } 40 41 // See FPRound for a description of this function. 42 static inline double FPRoundToDouble(int64_t sign, int64_t exponent, 43 uint64_t mantissa, FPRounding round_mode) { 44 int64_t bits = 45 FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 46 exponent, 47 mantissa, 48 round_mode); 49 return rawbits_to_double(bits); 50 } 51 52 53 // See FPRound for a description of this function. 54 static inline float FPRoundToFloat(int64_t sign, int64_t exponent, 55 uint64_t mantissa, FPRounding round_mode) { 56 int32_t bits = 57 FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 58 exponent, 59 mantissa, 60 round_mode); 61 return rawbits_to_float(bits); 62 } 63 64 65 // See FPRound for a description of this function. 66 static inline float16 FPRoundToFloat16(int64_t sign, 67 int64_t exponent, 68 uint64_t mantissa, 69 FPRounding round_mode) { 70 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( 71 sign, exponent, mantissa, round_mode); 72 } 73 74 75 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 76 if (src >= 0) { 77 return UFixedToDouble(src, fbits, round); 78 } else { 79 // This works for all negative values, including INT64_MIN. 80 return -UFixedToDouble(-src, fbits, round); 81 } 82 } 83 84 85 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 86 // An input of 0 is a special case because the result is effectively 87 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 88 if (src == 0) { 89 return 0.0; 90 } 91 92 // Calculate the exponent. The highest significant bit will have the value 93 // 2^exponent. 94 const int highest_significant_bit = 63 - CountLeadingZeros(src); 95 const int64_t exponent = highest_significant_bit - fbits; 96 97 return FPRoundToDouble(0, exponent, src, round); 98 } 99 100 101 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 102 if (src >= 0) { 103 return UFixedToFloat(src, fbits, round); 104 } else { 105 // This works for all negative values, including INT64_MIN. 106 return -UFixedToFloat(-src, fbits, round); 107 } 108 } 109 110 111 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 112 // An input of 0 is a special case because the result is effectively 113 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 114 if (src == 0) { 115 return 0.0f; 116 } 117 118 // Calculate the exponent. The highest significant bit will have the value 119 // 2^exponent. 120 const int highest_significant_bit = 63 - CountLeadingZeros(src); 121 const int32_t exponent = highest_significant_bit - fbits; 122 123 return FPRoundToFloat(0, exponent, src, round); 124 } 125 126 127 double Simulator::FPToDouble(float value) { 128 switch (std::fpclassify(value)) { 129 case FP_NAN: { 130 if (IsSignallingNaN(value)) { 131 FPProcessException(); 132 } 133 if (DN()) return kFP64DefaultNaN; 134 135 // Convert NaNs as the processor would: 136 // - The sign is propagated. 137 // - The payload (mantissa) is transferred entirely, except that the top 138 // bit is forced to '1', making the result a quiet NaN. The unused 139 // (low-order) payload bits are set to 0. 140 uint32_t raw = float_to_rawbits(value); 141 142 uint64_t sign = raw >> 31; 143 uint64_t exponent = (1 << 11) - 1; 144 uint64_t payload = unsigned_bitextract_64(21, 0, raw); 145 payload <<= (52 - 23); // The unused low-order bits should be 0. 146 payload |= (UINT64_C(1) << 51); // Force a quiet NaN. 147 148 return rawbits_to_double((sign << 63) | (exponent << 52) | payload); 149 } 150 151 case FP_ZERO: 152 case FP_NORMAL: 153 case FP_SUBNORMAL: 154 case FP_INFINITE: { 155 // All other inputs are preserved in a standard cast, because every value 156 // representable using an IEEE-754 float is also representable using an 157 // IEEE-754 double. 158 return static_cast<double>(value); 159 } 160 } 161 162 VIXL_UNREACHABLE(); 163 return static_cast<double>(value); 164 } 165 166 167 float Simulator::FPToFloat(float16 value) { 168 uint32_t sign = value >> 15; 169 uint32_t exponent = unsigned_bitextract_32( 170 kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits, 171 value); 172 uint32_t mantissa = unsigned_bitextract_32( 173 kFloat16MantissaBits - 1, 0, value); 174 175 switch (float16classify(value)) { 176 case FP_ZERO: 177 return (sign == 0) ? 0.0f : -0.0f; 178 179 case FP_INFINITE: 180 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 181 182 case FP_SUBNORMAL: { 183 // Calculate shift required to put mantissa into the most-significant bits 184 // of the destination mantissa. 185 int shift = CountLeadingZeros(mantissa << (32 - 10)); 186 187 // Shift mantissa and discard implicit '1'. 188 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 189 mantissa &= (1 << kFloatMantissaBits) - 1; 190 191 // Adjust the exponent for the shift applied, and rebias. 192 exponent = exponent - shift + (-15 + 127); 193 break; 194 } 195 196 case FP_NAN: 197 if (IsSignallingNaN(value)) { 198 FPProcessException(); 199 } 200 if (DN()) return kFP32DefaultNaN; 201 202 // Convert NaNs as the processor would: 203 // - The sign is propagated. 204 // - The payload (mantissa) is transferred entirely, except that the top 205 // bit is forced to '1', making the result a quiet NaN. The unused 206 // (low-order) payload bits are set to 0. 207 exponent = (1 << kFloatExponentBits) - 1; 208 209 // Increase bits in mantissa, making low-order bits 0. 210 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 211 mantissa |= 1 << 22; // Force a quiet NaN. 212 break; 213 214 case FP_NORMAL: 215 // Increase bits in mantissa, making low-order bits 0. 216 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 217 218 // Change exponent bias. 219 exponent += (-15 + 127); 220 break; 221 222 default: VIXL_UNREACHABLE(); 223 } 224 return rawbits_to_float((sign << 31) | 225 (exponent << kFloatMantissaBits) | 226 mantissa); 227 } 228 229 230 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 231 // Only the FPTieEven rounding mode is implemented. 232 VIXL_ASSERT(round_mode == FPTieEven); 233 USE(round_mode); 234 235 uint32_t raw = float_to_rawbits(value); 236 int32_t sign = raw >> 31; 237 int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127; 238 uint32_t mantissa = unsigned_bitextract_32(22, 0, raw); 239 240 switch (std::fpclassify(value)) { 241 case FP_NAN: { 242 if (IsSignallingNaN(value)) { 243 FPProcessException(); 244 } 245 if (DN()) return kFP16DefaultNaN; 246 247 // Convert NaNs as the processor would: 248 // - The sign is propagated. 249 // - The payload (mantissa) is transferred as much as possible, except 250 // that the top bit is forced to '1', making the result a quiet NaN. 251 float16 result = (sign == 0) ? kFP16PositiveInfinity 252 : kFP16NegativeInfinity; 253 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 254 result |= (1 << 9); // Force a quiet NaN; 255 return result; 256 } 257 258 case FP_ZERO: 259 return (sign == 0) ? 0 : 0x8000; 260 261 case FP_INFINITE: 262 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 263 264 case FP_NORMAL: 265 case FP_SUBNORMAL: { 266 // Convert float-to-half as the processor would, assuming that FPCR.FZ 267 // (flush-to-zero) is not set. 268 269 // Add the implicit '1' bit to the mantissa. 270 mantissa += (1 << 23); 271 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 272 } 273 } 274 275 VIXL_UNREACHABLE(); 276 return 0; 277 } 278 279 280 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 281 // Only the FPTieEven rounding mode is implemented. 282 VIXL_ASSERT(round_mode == FPTieEven); 283 USE(round_mode); 284 285 uint64_t raw = double_to_rawbits(value); 286 int32_t sign = raw >> 63; 287 int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023; 288 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw); 289 290 switch (std::fpclassify(value)) { 291 case FP_NAN: { 292 if (IsSignallingNaN(value)) { 293 FPProcessException(); 294 } 295 if (DN()) return kFP16DefaultNaN; 296 297 // Convert NaNs as the processor would: 298 // - The sign is propagated. 299 // - The payload (mantissa) is transferred as much as possible, except 300 // that the top bit is forced to '1', making the result a quiet NaN. 301 float16 result = (sign == 0) ? kFP16PositiveInfinity 302 : kFP16NegativeInfinity; 303 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 304 result |= (1 << 9); // Force a quiet NaN; 305 return result; 306 } 307 308 case FP_ZERO: 309 return (sign == 0) ? 0 : 0x8000; 310 311 case FP_INFINITE: 312 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 313 314 case FP_NORMAL: 315 case FP_SUBNORMAL: { 316 // Convert double-to-half as the processor would, assuming that FPCR.FZ 317 // (flush-to-zero) is not set. 318 319 // Add the implicit '1' bit to the mantissa. 320 mantissa += (UINT64_C(1) << 52); 321 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 322 } 323 } 324 325 VIXL_UNREACHABLE(); 326 return 0; 327 } 328 329 330 float Simulator::FPToFloat(double value, FPRounding round_mode) { 331 // Only the FPTieEven rounding mode is implemented. 332 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 333 USE(round_mode); 334 335 switch (std::fpclassify(value)) { 336 case FP_NAN: { 337 if (IsSignallingNaN(value)) { 338 FPProcessException(); 339 } 340 if (DN()) return kFP32DefaultNaN; 341 342 // Convert NaNs as the processor would: 343 // - The sign is propagated. 344 // - The payload (mantissa) is transferred as much as possible, except 345 // that the top bit is forced to '1', making the result a quiet NaN. 346 uint64_t raw = double_to_rawbits(value); 347 348 uint32_t sign = raw >> 63; 349 uint32_t exponent = (1 << 8) - 1; 350 uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw); 351 payload |= (1 << 22); // Force a quiet NaN. 352 353 return rawbits_to_float((sign << 31) | (exponent << 23) | payload); 354 } 355 356 case FP_ZERO: 357 case FP_INFINITE: { 358 // In a C++ cast, any value representable in the target type will be 359 // unchanged. This is always the case for +/-0.0 and infinities. 360 return static_cast<float>(value); 361 } 362 363 case FP_NORMAL: 364 case FP_SUBNORMAL: { 365 // Convert double-to-float as the processor would, assuming that FPCR.FZ 366 // (flush-to-zero) is not set. 367 uint64_t raw = double_to_rawbits(value); 368 // Extract the IEEE-754 double components. 369 uint32_t sign = raw >> 63; 370 // Extract the exponent and remove the IEEE-754 encoding bias. 371 int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023; 372 // Extract the mantissa and add the implicit '1' bit. 373 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw); 374 if (std::fpclassify(value) == FP_NORMAL) { 375 mantissa |= (UINT64_C(1) << 52); 376 } 377 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 378 } 379 } 380 381 VIXL_UNREACHABLE(); 382 return value; 383 } 384 385 386 void Simulator::ld1(VectorFormat vform, 387 LogicVRegister dst, 388 uint64_t addr) { 389 dst.ClearForWrite(vform); 390 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 391 dst.ReadUintFromMem(vform, i, addr); 392 addr += LaneSizeInBytesFromFormat(vform); 393 } 394 } 395 396 397 void Simulator::ld1(VectorFormat vform, 398 LogicVRegister dst, 399 int index, 400 uint64_t addr) { 401 dst.ReadUintFromMem(vform, index, addr); 402 } 403 404 405 void Simulator::ld1r(VectorFormat vform, 406 LogicVRegister dst, 407 uint64_t addr) { 408 dst.ClearForWrite(vform); 409 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 410 dst.ReadUintFromMem(vform, i, addr); 411 } 412 } 413 414 415 void Simulator::ld2(VectorFormat vform, 416 LogicVRegister dst1, 417 LogicVRegister dst2, 418 uint64_t addr1) { 419 dst1.ClearForWrite(vform); 420 dst2.ClearForWrite(vform); 421 int esize = LaneSizeInBytesFromFormat(vform); 422 uint64_t addr2 = addr1 + esize; 423 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 424 dst1.ReadUintFromMem(vform, i, addr1); 425 dst2.ReadUintFromMem(vform, i, addr2); 426 addr1 += 2 * esize; 427 addr2 += 2 * esize; 428 } 429 } 430 431 432 void Simulator::ld2(VectorFormat vform, 433 LogicVRegister dst1, 434 LogicVRegister dst2, 435 int index, 436 uint64_t addr1) { 437 dst1.ClearForWrite(vform); 438 dst2.ClearForWrite(vform); 439 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 440 dst1.ReadUintFromMem(vform, index, addr1); 441 dst2.ReadUintFromMem(vform, index, addr2); 442 } 443 444 445 void Simulator::ld2r(VectorFormat vform, 446 LogicVRegister dst1, 447 LogicVRegister dst2, 448 uint64_t addr) { 449 dst1.ClearForWrite(vform); 450 dst2.ClearForWrite(vform); 451 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 452 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 453 dst1.ReadUintFromMem(vform, i, addr); 454 dst2.ReadUintFromMem(vform, i, addr2); 455 } 456 } 457 458 459 void Simulator::ld3(VectorFormat vform, 460 LogicVRegister dst1, 461 LogicVRegister dst2, 462 LogicVRegister dst3, 463 uint64_t addr1) { 464 dst1.ClearForWrite(vform); 465 dst2.ClearForWrite(vform); 466 dst3.ClearForWrite(vform); 467 int esize = LaneSizeInBytesFromFormat(vform); 468 uint64_t addr2 = addr1 + esize; 469 uint64_t addr3 = addr2 + esize; 470 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 471 dst1.ReadUintFromMem(vform, i, addr1); 472 dst2.ReadUintFromMem(vform, i, addr2); 473 dst3.ReadUintFromMem(vform, i, addr3); 474 addr1 += 3 * esize; 475 addr2 += 3 * esize; 476 addr3 += 3 * esize; 477 } 478 } 479 480 481 void Simulator::ld3(VectorFormat vform, 482 LogicVRegister dst1, 483 LogicVRegister dst2, 484 LogicVRegister dst3, 485 int index, 486 uint64_t addr1) { 487 dst1.ClearForWrite(vform); 488 dst2.ClearForWrite(vform); 489 dst3.ClearForWrite(vform); 490 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 491 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 492 dst1.ReadUintFromMem(vform, index, addr1); 493 dst2.ReadUintFromMem(vform, index, addr2); 494 dst3.ReadUintFromMem(vform, index, addr3); 495 } 496 497 498 void Simulator::ld3r(VectorFormat vform, 499 LogicVRegister dst1, 500 LogicVRegister dst2, 501 LogicVRegister dst3, 502 uint64_t addr) { 503 dst1.ClearForWrite(vform); 504 dst2.ClearForWrite(vform); 505 dst3.ClearForWrite(vform); 506 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 507 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 508 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 509 dst1.ReadUintFromMem(vform, i, addr); 510 dst2.ReadUintFromMem(vform, i, addr2); 511 dst3.ReadUintFromMem(vform, i, addr3); 512 } 513 } 514 515 516 void Simulator::ld4(VectorFormat vform, 517 LogicVRegister dst1, 518 LogicVRegister dst2, 519 LogicVRegister dst3, 520 LogicVRegister dst4, 521 uint64_t addr1) { 522 dst1.ClearForWrite(vform); 523 dst2.ClearForWrite(vform); 524 dst3.ClearForWrite(vform); 525 dst4.ClearForWrite(vform); 526 int esize = LaneSizeInBytesFromFormat(vform); 527 uint64_t addr2 = addr1 + esize; 528 uint64_t addr3 = addr2 + esize; 529 uint64_t addr4 = addr3 + esize; 530 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 531 dst1.ReadUintFromMem(vform, i, addr1); 532 dst2.ReadUintFromMem(vform, i, addr2); 533 dst3.ReadUintFromMem(vform, i, addr3); 534 dst4.ReadUintFromMem(vform, i, addr4); 535 addr1 += 4 * esize; 536 addr2 += 4 * esize; 537 addr3 += 4 * esize; 538 addr4 += 4 * esize; 539 } 540 } 541 542 543 void Simulator::ld4(VectorFormat vform, 544 LogicVRegister dst1, 545 LogicVRegister dst2, 546 LogicVRegister dst3, 547 LogicVRegister dst4, 548 int index, 549 uint64_t addr1) { 550 dst1.ClearForWrite(vform); 551 dst2.ClearForWrite(vform); 552 dst3.ClearForWrite(vform); 553 dst4.ClearForWrite(vform); 554 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 555 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 556 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 557 dst1.ReadUintFromMem(vform, index, addr1); 558 dst2.ReadUintFromMem(vform, index, addr2); 559 dst3.ReadUintFromMem(vform, index, addr3); 560 dst4.ReadUintFromMem(vform, index, addr4); 561 } 562 563 564 void Simulator::ld4r(VectorFormat vform, 565 LogicVRegister dst1, 566 LogicVRegister dst2, 567 LogicVRegister dst3, 568 LogicVRegister dst4, 569 uint64_t addr) { 570 dst1.ClearForWrite(vform); 571 dst2.ClearForWrite(vform); 572 dst3.ClearForWrite(vform); 573 dst4.ClearForWrite(vform); 574 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 575 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 576 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 577 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 578 dst1.ReadUintFromMem(vform, i, addr); 579 dst2.ReadUintFromMem(vform, i, addr2); 580 dst3.ReadUintFromMem(vform, i, addr3); 581 dst4.ReadUintFromMem(vform, i, addr4); 582 } 583 } 584 585 586 void Simulator::st1(VectorFormat vform, 587 LogicVRegister src, 588 uint64_t addr) { 589 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 590 src.WriteUintToMem(vform, i, addr); 591 addr += LaneSizeInBytesFromFormat(vform); 592 } 593 } 594 595 596 void Simulator::st1(VectorFormat vform, 597 LogicVRegister src, 598 int index, 599 uint64_t addr) { 600 src.WriteUintToMem(vform, index, addr); 601 } 602 603 604 void Simulator::st2(VectorFormat vform, 605 LogicVRegister dst, 606 LogicVRegister dst2, 607 uint64_t addr) { 608 int esize = LaneSizeInBytesFromFormat(vform); 609 uint64_t addr2 = addr + esize; 610 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 611 dst.WriteUintToMem(vform, i, addr); 612 dst2.WriteUintToMem(vform, i, addr2); 613 addr += 2 * esize; 614 addr2 += 2 * esize; 615 } 616 } 617 618 619 void Simulator::st2(VectorFormat vform, 620 LogicVRegister dst, 621 LogicVRegister dst2, 622 int index, 623 uint64_t addr) { 624 int esize = LaneSizeInBytesFromFormat(vform); 625 dst.WriteUintToMem(vform, index, addr); 626 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 627 } 628 629 630 void Simulator::st3(VectorFormat vform, 631 LogicVRegister dst, 632 LogicVRegister dst2, 633 LogicVRegister dst3, 634 uint64_t addr) { 635 int esize = LaneSizeInBytesFromFormat(vform); 636 uint64_t addr2 = addr + esize; 637 uint64_t addr3 = addr2 + esize; 638 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 639 dst.WriteUintToMem(vform, i, addr); 640 dst2.WriteUintToMem(vform, i, addr2); 641 dst3.WriteUintToMem(vform, i, addr3); 642 addr += 3 * esize; 643 addr2 += 3 * esize; 644 addr3 += 3 * esize; 645 } 646 } 647 648 649 void Simulator::st3(VectorFormat vform, 650 LogicVRegister dst, 651 LogicVRegister dst2, 652 LogicVRegister dst3, 653 int index, 654 uint64_t addr) { 655 int esize = LaneSizeInBytesFromFormat(vform); 656 dst.WriteUintToMem(vform, index, addr); 657 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 658 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 659 } 660 661 662 void Simulator::st4(VectorFormat vform, 663 LogicVRegister dst, 664 LogicVRegister dst2, 665 LogicVRegister dst3, 666 LogicVRegister dst4, 667 uint64_t addr) { 668 int esize = LaneSizeInBytesFromFormat(vform); 669 uint64_t addr2 = addr + esize; 670 uint64_t addr3 = addr2 + esize; 671 uint64_t addr4 = addr3 + esize; 672 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 673 dst.WriteUintToMem(vform, i, addr); 674 dst2.WriteUintToMem(vform, i, addr2); 675 dst3.WriteUintToMem(vform, i, addr3); 676 dst4.WriteUintToMem(vform, i, addr4); 677 addr += 4 * esize; 678 addr2 += 4 * esize; 679 addr3 += 4 * esize; 680 addr4 += 4 * esize; 681 } 682 } 683 684 685 void Simulator::st4(VectorFormat vform, 686 LogicVRegister dst, 687 LogicVRegister dst2, 688 LogicVRegister dst3, 689 LogicVRegister dst4, 690 int index, 691 uint64_t addr) { 692 int esize = LaneSizeInBytesFromFormat(vform); 693 dst.WriteUintToMem(vform, index, addr); 694 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 695 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 696 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 697 } 698 699 700 LogicVRegister Simulator::cmp(VectorFormat vform, 701 LogicVRegister dst, 702 const LogicVRegister& src1, 703 const LogicVRegister& src2, 704 Condition cond) { 705 dst.ClearForWrite(vform); 706 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 707 int64_t sa = src1.Int(vform, i); 708 int64_t sb = src2.Int(vform, i); 709 uint64_t ua = src1.Uint(vform, i); 710 uint64_t ub = src2.Uint(vform, i); 711 bool result = false; 712 switch (cond) { 713 case eq: result = (ua == ub); break; 714 case ge: result = (sa >= sb); break; 715 case gt: result = (sa > sb) ; break; 716 case hi: result = (ua > ub) ; break; 717 case hs: result = (ua >= ub); break; 718 case lt: result = (sa < sb) ; break; 719 case le: result = (sa <= sb); break; 720 default: VIXL_UNREACHABLE(); break; 721 } 722 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 723 } 724 return dst; 725 } 726 727 728 LogicVRegister Simulator::cmp(VectorFormat vform, 729 LogicVRegister dst, 730 const LogicVRegister& src1, 731 int imm, 732 Condition cond) { 733 SimVRegister temp; 734 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 735 return cmp(vform, dst, src1, imm_reg, cond); 736 } 737 738 739 LogicVRegister Simulator::cmptst(VectorFormat vform, 740 LogicVRegister dst, 741 const LogicVRegister& src1, 742 const LogicVRegister& src2) { 743 dst.ClearForWrite(vform); 744 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 745 uint64_t ua = src1.Uint(vform, i); 746 uint64_t ub = src2.Uint(vform, i); 747 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 748 } 749 return dst; 750 } 751 752 753 LogicVRegister Simulator::add(VectorFormat vform, 754 LogicVRegister dst, 755 const LogicVRegister& src1, 756 const LogicVRegister& src2) { 757 dst.ClearForWrite(vform); 758 // TODO(all): consider assigning the result of LaneCountFromFormat to a local. 759 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 760 // Test for unsigned saturation. 761 uint64_t ua = src1.UintLeftJustified(vform, i); 762 uint64_t ub = src2.UintLeftJustified(vform, i); 763 uint64_t ur = ua + ub; 764 if (ur < ua) { 765 dst.SetUnsignedSat(i, true); 766 } 767 768 // Test for signed saturation. 769 int64_t sa = src1.IntLeftJustified(vform, i); 770 int64_t sb = src2.IntLeftJustified(vform, i); 771 int64_t sr = sa + sb; 772 // If the signs of the operands are the same, but different from the result, 773 // there was an overflow. 774 if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 775 dst.SetSignedSat(i, sa >= 0); 776 } 777 778 dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i)); 779 } 780 return dst; 781 } 782 783 784 LogicVRegister Simulator::addp(VectorFormat vform, 785 LogicVRegister dst, 786 const LogicVRegister& src1, 787 const LogicVRegister& src2) { 788 SimVRegister temp1, temp2; 789 uzp1(vform, temp1, src1, src2); 790 uzp2(vform, temp2, src1, src2); 791 add(vform, dst, temp1, temp2); 792 return dst; 793 } 794 795 796 LogicVRegister Simulator::mla(VectorFormat vform, 797 LogicVRegister dst, 798 const LogicVRegister& src1, 799 const LogicVRegister& src2) { 800 SimVRegister temp; 801 mul(vform, temp, src1, src2); 802 add(vform, dst, dst, temp); 803 return dst; 804 } 805 806 807 LogicVRegister Simulator::mls(VectorFormat vform, 808 LogicVRegister dst, 809 const LogicVRegister& src1, 810 const LogicVRegister& src2) { 811 SimVRegister temp; 812 mul(vform, temp, src1, src2); 813 sub(vform, dst, dst, temp); 814 return dst; 815 } 816 817 818 LogicVRegister Simulator::mul(VectorFormat vform, 819 LogicVRegister dst, 820 const LogicVRegister& src1, 821 const LogicVRegister& src2) { 822 dst.ClearForWrite(vform); 823 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 824 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 825 } 826 return dst; 827 } 828 829 830 LogicVRegister Simulator::mul(VectorFormat vform, 831 LogicVRegister dst, 832 const LogicVRegister& src1, 833 const LogicVRegister& src2, 834 int index) { 835 SimVRegister temp; 836 VectorFormat indexform = VectorFormatFillQ(vform); 837 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 838 } 839 840 841 LogicVRegister Simulator::mla(VectorFormat vform, 842 LogicVRegister dst, 843 const LogicVRegister& src1, 844 const LogicVRegister& src2, 845 int index) { 846 SimVRegister temp; 847 VectorFormat indexform = VectorFormatFillQ(vform); 848 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 849 } 850 851 852 LogicVRegister Simulator::mls(VectorFormat vform, 853 LogicVRegister dst, 854 const LogicVRegister& src1, 855 const LogicVRegister& src2, 856 int index) { 857 SimVRegister temp; 858 VectorFormat indexform = VectorFormatFillQ(vform); 859 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 860 } 861 862 863 LogicVRegister Simulator::smull(VectorFormat vform, 864 LogicVRegister dst, 865 const LogicVRegister& src1, 866 const LogicVRegister& src2, 867 int index) { 868 SimVRegister temp; 869 VectorFormat indexform = 870 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 871 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 872 } 873 874 875 LogicVRegister Simulator::smull2(VectorFormat vform, 876 LogicVRegister dst, 877 const LogicVRegister& src1, 878 const LogicVRegister& src2, 879 int index) { 880 SimVRegister temp; 881 VectorFormat indexform = 882 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 883 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 884 } 885 886 887 LogicVRegister Simulator::umull(VectorFormat vform, 888 LogicVRegister dst, 889 const LogicVRegister& src1, 890 const LogicVRegister& src2, 891 int index) { 892 SimVRegister temp; 893 VectorFormat indexform = 894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 895 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 896 } 897 898 899 LogicVRegister Simulator::umull2(VectorFormat vform, 900 LogicVRegister dst, 901 const LogicVRegister& src1, 902 const LogicVRegister& src2, 903 int index) { 904 SimVRegister temp; 905 VectorFormat indexform = 906 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 907 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 908 } 909 910 911 LogicVRegister Simulator::smlal(VectorFormat vform, 912 LogicVRegister dst, 913 const LogicVRegister& src1, 914 const LogicVRegister& src2, 915 int index) { 916 SimVRegister temp; 917 VectorFormat indexform = 918 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 919 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 920 } 921 922 923 LogicVRegister Simulator::smlal2(VectorFormat vform, 924 LogicVRegister dst, 925 const LogicVRegister& src1, 926 const LogicVRegister& src2, 927 int index) { 928 SimVRegister temp; 929 VectorFormat indexform = 930 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 931 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 932 } 933 934 935 LogicVRegister Simulator::umlal(VectorFormat vform, 936 LogicVRegister dst, 937 const LogicVRegister& src1, 938 const LogicVRegister& src2, 939 int index) { 940 SimVRegister temp; 941 VectorFormat indexform = 942 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 943 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 944 } 945 946 947 LogicVRegister Simulator::umlal2(VectorFormat vform, 948 LogicVRegister dst, 949 const LogicVRegister& src1, 950 const LogicVRegister& src2, 951 int index) { 952 SimVRegister temp; 953 VectorFormat indexform = 954 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 955 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 956 } 957 958 959 LogicVRegister Simulator::smlsl(VectorFormat vform, 960 LogicVRegister dst, 961 const LogicVRegister& src1, 962 const LogicVRegister& src2, 963 int index) { 964 SimVRegister temp; 965 VectorFormat indexform = 966 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 967 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 968 } 969 970 971 LogicVRegister Simulator::smlsl2(VectorFormat vform, 972 LogicVRegister dst, 973 const LogicVRegister& src1, 974 const LogicVRegister& src2, 975 int index) { 976 SimVRegister temp; 977 VectorFormat indexform = 978 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 979 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 980 } 981 982 983 LogicVRegister Simulator::umlsl(VectorFormat vform, 984 LogicVRegister dst, 985 const LogicVRegister& src1, 986 const LogicVRegister& src2, 987 int index) { 988 SimVRegister temp; 989 VectorFormat indexform = 990 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 991 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 992 } 993 994 995 LogicVRegister Simulator::umlsl2(VectorFormat vform, 996 LogicVRegister dst, 997 const LogicVRegister& src1, 998 const LogicVRegister& src2, 999 int index) { 1000 SimVRegister temp; 1001 VectorFormat indexform = 1002 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1003 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1004 } 1005 1006 1007 LogicVRegister Simulator::sqdmull(VectorFormat vform, 1008 LogicVRegister dst, 1009 const LogicVRegister& src1, 1010 const LogicVRegister& src2, 1011 int index) { 1012 SimVRegister temp; 1013 VectorFormat indexform = 1014 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1015 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1016 } 1017 1018 1019 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 1020 LogicVRegister dst, 1021 const LogicVRegister& src1, 1022 const LogicVRegister& src2, 1023 int index) { 1024 SimVRegister temp; 1025 VectorFormat indexform = 1026 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1027 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1028 } 1029 1030 1031 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 1032 LogicVRegister dst, 1033 const LogicVRegister& src1, 1034 const LogicVRegister& src2, 1035 int index) { 1036 SimVRegister temp; 1037 VectorFormat indexform = 1038 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1039 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1040 } 1041 1042 1043 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 1044 LogicVRegister dst, 1045 const LogicVRegister& src1, 1046 const LogicVRegister& src2, 1047 int index) { 1048 SimVRegister temp; 1049 VectorFormat indexform = 1050 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1051 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1052 } 1053 1054 1055 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 1056 LogicVRegister dst, 1057 const LogicVRegister& src1, 1058 const LogicVRegister& src2, 1059 int index) { 1060 SimVRegister temp; 1061 VectorFormat indexform = 1062 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1063 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1064 } 1065 1066 1067 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 1068 LogicVRegister dst, 1069 const LogicVRegister& src1, 1070 const LogicVRegister& src2, 1071 int index) { 1072 SimVRegister temp; 1073 VectorFormat indexform = 1074 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 1075 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1076 } 1077 1078 1079 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 1080 LogicVRegister dst, 1081 const LogicVRegister& src1, 1082 const LogicVRegister& src2, 1083 int index) { 1084 SimVRegister temp; 1085 VectorFormat indexform = VectorFormatFillQ(vform); 1086 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1087 } 1088 1089 1090 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 1091 LogicVRegister dst, 1092 const LogicVRegister& src1, 1093 const LogicVRegister& src2, 1094 int index) { 1095 SimVRegister temp; 1096 VectorFormat indexform = VectorFormatFillQ(vform); 1097 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 1098 } 1099 1100 1101 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { 1102 uint16_t result = 0; 1103 uint16_t extended_op2 = op2; 1104 for (int i = 0; i < 8; ++i) { 1105 if ((op1 >> i) & 1) { 1106 result = result ^ (extended_op2 << i); 1107 } 1108 } 1109 return result; 1110 } 1111 1112 1113 LogicVRegister Simulator::pmul(VectorFormat vform, 1114 LogicVRegister dst, 1115 const LogicVRegister& src1, 1116 const LogicVRegister& src2) { 1117 dst.ClearForWrite(vform); 1118 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1119 dst.SetUint(vform, i, 1120 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 1121 } 1122 return dst; 1123 } 1124 1125 1126 LogicVRegister Simulator::pmull(VectorFormat vform, 1127 LogicVRegister dst, 1128 const LogicVRegister& src1, 1129 const LogicVRegister& src2) { 1130 VectorFormat vform_src = VectorFormatHalfWidth(vform); 1131 dst.ClearForWrite(vform); 1132 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1133 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i), 1134 src2.Uint(vform_src, i))); 1135 } 1136 return dst; 1137 } 1138 1139 1140 LogicVRegister Simulator::pmull2(VectorFormat vform, 1141 LogicVRegister dst, 1142 const LogicVRegister& src1, 1143 const LogicVRegister& src2) { 1144 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 1145 dst.ClearForWrite(vform); 1146 int lane_count = LaneCountFromFormat(vform); 1147 for (int i = 0; i < lane_count; i++) { 1148 dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i), 1149 src2.Uint(vform_src, lane_count + i))); 1150 } 1151 return dst; 1152 } 1153 1154 1155 LogicVRegister Simulator::sub(VectorFormat vform, 1156 LogicVRegister dst, 1157 const LogicVRegister& src1, 1158 const LogicVRegister& src2) { 1159 dst.ClearForWrite(vform); 1160 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1161 // Test for unsigned saturation. 1162 if (src2.Uint(vform, i) > src1.Uint(vform, i)) { 1163 dst.SetUnsignedSat(i, false); 1164 } 1165 1166 // Test for signed saturation. 1167 int64_t sa = src1.IntLeftJustified(vform, i); 1168 int64_t sb = src2.IntLeftJustified(vform, i); 1169 int64_t sr = sa - sb; 1170 // If the signs of the operands are different, and the sign of the first 1171 // operand doesn't match the result, there was an overflow. 1172 if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) { 1173 dst.SetSignedSat(i, sr < 0); 1174 } 1175 1176 dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i)); 1177 } 1178 return dst; 1179 } 1180 1181 1182 LogicVRegister Simulator::and_(VectorFormat vform, 1183 LogicVRegister dst, 1184 const LogicVRegister& src1, 1185 const LogicVRegister& src2) { 1186 dst.ClearForWrite(vform); 1187 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1188 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1189 } 1190 return dst; 1191 } 1192 1193 1194 LogicVRegister Simulator::orr(VectorFormat vform, 1195 LogicVRegister dst, 1196 const LogicVRegister& src1, 1197 const LogicVRegister& src2) { 1198 dst.ClearForWrite(vform); 1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1200 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1201 } 1202 return dst; 1203 } 1204 1205 1206 LogicVRegister Simulator::orn(VectorFormat vform, 1207 LogicVRegister dst, 1208 const LogicVRegister& src1, 1209 const LogicVRegister& src2) { 1210 dst.ClearForWrite(vform); 1211 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1212 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1213 } 1214 return dst; 1215 } 1216 1217 1218 LogicVRegister Simulator::eor(VectorFormat vform, 1219 LogicVRegister dst, 1220 const LogicVRegister& src1, 1221 const LogicVRegister& src2) { 1222 dst.ClearForWrite(vform); 1223 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1224 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1225 } 1226 return dst; 1227 } 1228 1229 1230 LogicVRegister Simulator::bic(VectorFormat vform, 1231 LogicVRegister dst, 1232 const LogicVRegister& src1, 1233 const LogicVRegister& src2) { 1234 dst.ClearForWrite(vform); 1235 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1236 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1237 } 1238 return dst; 1239 } 1240 1241 1242 LogicVRegister Simulator::bic(VectorFormat vform, 1243 LogicVRegister dst, 1244 const LogicVRegister& src, 1245 uint64_t imm) { 1246 uint64_t result[16]; 1247 int laneCount = LaneCountFromFormat(vform); 1248 for (int i = 0; i < laneCount; ++i) { 1249 result[i] = src.Uint(vform, i) & ~imm; 1250 } 1251 dst.ClearForWrite(vform); 1252 for (int i = 0; i < laneCount; ++i) { 1253 dst.SetUint(vform, i, result[i]); 1254 } 1255 return dst; 1256 } 1257 1258 1259 LogicVRegister Simulator::bif(VectorFormat vform, 1260 LogicVRegister dst, 1261 const LogicVRegister& src1, 1262 const LogicVRegister& src2) { 1263 dst.ClearForWrite(vform); 1264 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1265 uint64_t operand1 = dst.Uint(vform, i); 1266 uint64_t operand2 = ~src2.Uint(vform, i); 1267 uint64_t operand3 = src1.Uint(vform, i); 1268 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1269 dst.SetUint(vform, i, result); 1270 } 1271 return dst; 1272 } 1273 1274 1275 LogicVRegister Simulator::bit(VectorFormat vform, 1276 LogicVRegister dst, 1277 const LogicVRegister& src1, 1278 const LogicVRegister& src2) { 1279 dst.ClearForWrite(vform); 1280 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1281 uint64_t operand1 = dst.Uint(vform, i); 1282 uint64_t operand2 = src2.Uint(vform, i); 1283 uint64_t operand3 = src1.Uint(vform, i); 1284 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1285 dst.SetUint(vform, i, result); 1286 } 1287 return dst; 1288 } 1289 1290 1291 LogicVRegister Simulator::bsl(VectorFormat vform, 1292 LogicVRegister dst, 1293 const LogicVRegister& src1, 1294 const LogicVRegister& src2) { 1295 dst.ClearForWrite(vform); 1296 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1297 uint64_t operand1 = src2.Uint(vform, i); 1298 uint64_t operand2 = dst.Uint(vform, i); 1299 uint64_t operand3 = src1.Uint(vform, i); 1300 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1301 dst.SetUint(vform, i, result); 1302 } 1303 return dst; 1304 } 1305 1306 1307 LogicVRegister Simulator::sminmax(VectorFormat vform, 1308 LogicVRegister dst, 1309 const LogicVRegister& src1, 1310 const LogicVRegister& src2, 1311 bool max) { 1312 dst.ClearForWrite(vform); 1313 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1314 int64_t src1_val = src1.Int(vform, i); 1315 int64_t src2_val = src2.Int(vform, i); 1316 int64_t dst_val; 1317 if (max == true) { 1318 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1319 } else { 1320 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1321 } 1322 dst.SetInt(vform, i, dst_val); 1323 } 1324 return dst; 1325 } 1326 1327 1328 LogicVRegister Simulator::smax(VectorFormat vform, 1329 LogicVRegister dst, 1330 const LogicVRegister& src1, 1331 const LogicVRegister& src2) { 1332 return sminmax(vform, dst, src1, src2, true); 1333 } 1334 1335 1336 LogicVRegister Simulator::smin(VectorFormat vform, 1337 LogicVRegister dst, 1338 const LogicVRegister& src1, 1339 const LogicVRegister& src2) { 1340 return sminmax(vform, dst, src1, src2, false); 1341 } 1342 1343 1344 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1345 LogicVRegister dst, 1346 int dst_index, 1347 const LogicVRegister& src, 1348 bool max) { 1349 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1350 int64_t src1_val = src.Int(vform, i); 1351 int64_t src2_val = src.Int(vform, i + 1); 1352 int64_t dst_val; 1353 if (max == true) { 1354 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1355 } else { 1356 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1357 } 1358 dst.SetInt(vform, dst_index + (i >> 1), dst_val); 1359 } 1360 return dst; 1361 } 1362 1363 1364 LogicVRegister Simulator::smaxp(VectorFormat vform, 1365 LogicVRegister dst, 1366 const LogicVRegister& src1, 1367 const LogicVRegister& src2) { 1368 dst.ClearForWrite(vform); 1369 sminmaxp(vform, dst, 0, src1, true); 1370 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1371 return dst; 1372 } 1373 1374 1375 LogicVRegister Simulator::sminp(VectorFormat vform, 1376 LogicVRegister dst, 1377 const LogicVRegister& src1, 1378 const LogicVRegister& src2) { 1379 dst.ClearForWrite(vform); 1380 sminmaxp(vform, dst, 0, src1, false); 1381 sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1382 return dst; 1383 } 1384 1385 1386 LogicVRegister Simulator::addp(VectorFormat vform, 1387 LogicVRegister dst, 1388 const LogicVRegister& src) { 1389 VIXL_ASSERT(vform == kFormatD); 1390 1391 int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1); 1392 dst.ClearForWrite(vform); 1393 dst.SetInt(vform, 0, dst_val); 1394 return dst; 1395 } 1396 1397 1398 LogicVRegister Simulator::addv(VectorFormat vform, 1399 LogicVRegister dst, 1400 const LogicVRegister& src) { 1401 VectorFormat vform_dst 1402 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1403 1404 1405 int64_t dst_val = 0; 1406 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1407 dst_val += src.Int(vform, i); 1408 } 1409 1410 dst.ClearForWrite(vform_dst); 1411 dst.SetInt(vform_dst, 0, dst_val); 1412 return dst; 1413 } 1414 1415 1416 LogicVRegister Simulator::saddlv(VectorFormat vform, 1417 LogicVRegister dst, 1418 const LogicVRegister& src) { 1419 VectorFormat vform_dst 1420 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1421 1422 int64_t dst_val = 0; 1423 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1424 dst_val += src.Int(vform, i); 1425 } 1426 1427 dst.ClearForWrite(vform_dst); 1428 dst.SetInt(vform_dst, 0, dst_val); 1429 return dst; 1430 } 1431 1432 1433 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1434 LogicVRegister dst, 1435 const LogicVRegister& src) { 1436 VectorFormat vform_dst 1437 = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1438 1439 uint64_t dst_val = 0; 1440 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1441 dst_val += src.Uint(vform, i); 1442 } 1443 1444 dst.ClearForWrite(vform_dst); 1445 dst.SetUint(vform_dst, 0, dst_val); 1446 return dst; 1447 } 1448 1449 1450 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1451 LogicVRegister dst, 1452 const LogicVRegister& src, 1453 bool max) { 1454 dst.ClearForWrite(vform); 1455 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1456 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1457 dst.SetInt(vform, i, 0); 1458 int64_t src_val = src.Int(vform, i); 1459 if (max == true) { 1460 dst_val = (src_val > dst_val) ? src_val : dst_val; 1461 } else { 1462 dst_val = (src_val < dst_val) ? src_val : dst_val; 1463 } 1464 } 1465 dst.SetInt(vform, 0, dst_val); 1466 return dst; 1467 } 1468 1469 1470 LogicVRegister Simulator::smaxv(VectorFormat vform, 1471 LogicVRegister dst, 1472 const LogicVRegister& src) { 1473 sminmaxv(vform, dst, src, true); 1474 return dst; 1475 } 1476 1477 1478 LogicVRegister Simulator::sminv(VectorFormat vform, 1479 LogicVRegister dst, 1480 const LogicVRegister& src) { 1481 sminmaxv(vform, dst, src, false); 1482 return dst; 1483 } 1484 1485 1486 LogicVRegister Simulator::uminmax(VectorFormat vform, 1487 LogicVRegister dst, 1488 const LogicVRegister& src1, 1489 const LogicVRegister& src2, 1490 bool max) { 1491 dst.ClearForWrite(vform); 1492 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1493 uint64_t src1_val = src1.Uint(vform, i); 1494 uint64_t src2_val = src2.Uint(vform, i); 1495 uint64_t dst_val; 1496 if (max == true) { 1497 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1498 } else { 1499 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1500 } 1501 dst.SetUint(vform, i, dst_val); 1502 } 1503 return dst; 1504 } 1505 1506 1507 LogicVRegister Simulator::umax(VectorFormat vform, 1508 LogicVRegister dst, 1509 const LogicVRegister& src1, 1510 const LogicVRegister& src2) { 1511 return uminmax(vform, dst, src1, src2, true); 1512 } 1513 1514 1515 LogicVRegister Simulator::umin(VectorFormat vform, 1516 LogicVRegister dst, 1517 const LogicVRegister& src1, 1518 const LogicVRegister& src2) { 1519 return uminmax(vform, dst, src1, src2, false); 1520 } 1521 1522 1523 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1524 LogicVRegister dst, 1525 int dst_index, 1526 const LogicVRegister& src, 1527 bool max) { 1528 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1529 uint64_t src1_val = src.Uint(vform, i); 1530 uint64_t src2_val = src.Uint(vform, i + 1); 1531 uint64_t dst_val; 1532 if (max == true) { 1533 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1534 } else { 1535 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1536 } 1537 dst.SetUint(vform, dst_index + (i >> 1), dst_val); 1538 } 1539 return dst; 1540 } 1541 1542 1543 LogicVRegister Simulator::umaxp(VectorFormat vform, 1544 LogicVRegister dst, 1545 const LogicVRegister& src1, 1546 const LogicVRegister& src2) { 1547 dst.ClearForWrite(vform); 1548 uminmaxp(vform, dst, 0, src1, true); 1549 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true); 1550 return dst; 1551 } 1552 1553 1554 LogicVRegister Simulator::uminp(VectorFormat vform, 1555 LogicVRegister dst, 1556 const LogicVRegister& src1, 1557 const LogicVRegister& src2) { 1558 dst.ClearForWrite(vform); 1559 uminmaxp(vform, dst, 0, src1, false); 1560 uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false); 1561 return dst; 1562 } 1563 1564 1565 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1566 LogicVRegister dst, 1567 const LogicVRegister& src, 1568 bool max) { 1569 dst.ClearForWrite(vform); 1570 uint64_t dst_val = max ? 0 : UINT64_MAX; 1571 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1572 dst.SetUint(vform, i, 0); 1573 uint64_t src_val = src.Uint(vform, i); 1574 if (max == true) { 1575 dst_val = (src_val > dst_val) ? src_val : dst_val; 1576 } else { 1577 dst_val = (src_val < dst_val) ? src_val : dst_val; 1578 } 1579 } 1580 dst.SetUint(vform, 0, dst_val); 1581 return dst; 1582 } 1583 1584 1585 LogicVRegister Simulator::umaxv(VectorFormat vform, 1586 LogicVRegister dst, 1587 const LogicVRegister& src) { 1588 uminmaxv(vform, dst, src, true); 1589 return dst; 1590 } 1591 1592 1593 LogicVRegister Simulator::uminv(VectorFormat vform, 1594 LogicVRegister dst, 1595 const LogicVRegister& src) { 1596 uminmaxv(vform, dst, src, false); 1597 return dst; 1598 } 1599 1600 1601 LogicVRegister Simulator::shl(VectorFormat vform, 1602 LogicVRegister dst, 1603 const LogicVRegister& src, 1604 int shift) { 1605 VIXL_ASSERT(shift >= 0); 1606 SimVRegister temp; 1607 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1608 return ushl(vform, dst, src, shiftreg); 1609 } 1610 1611 1612 LogicVRegister Simulator::sshll(VectorFormat vform, 1613 LogicVRegister dst, 1614 const LogicVRegister& src, 1615 int shift) { 1616 VIXL_ASSERT(shift >= 0); 1617 SimVRegister temp1, temp2; 1618 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1619 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1620 return sshl(vform, dst, extendedreg, shiftreg); 1621 } 1622 1623 1624 LogicVRegister Simulator::sshll2(VectorFormat vform, 1625 LogicVRegister dst, 1626 const LogicVRegister& src, 1627 int shift) { 1628 VIXL_ASSERT(shift >= 0); 1629 SimVRegister temp1, temp2; 1630 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1631 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1632 return sshl(vform, dst, extendedreg, shiftreg); 1633 } 1634 1635 1636 LogicVRegister Simulator::shll(VectorFormat vform, 1637 LogicVRegister dst, 1638 const LogicVRegister& src) { 1639 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1640 return sshll(vform, dst, src, shift); 1641 } 1642 1643 1644 LogicVRegister Simulator::shll2(VectorFormat vform, 1645 LogicVRegister dst, 1646 const LogicVRegister& src) { 1647 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1648 return sshll2(vform, dst, src, shift); 1649 } 1650 1651 1652 LogicVRegister Simulator::ushll(VectorFormat vform, 1653 LogicVRegister dst, 1654 const LogicVRegister& src, 1655 int shift) { 1656 VIXL_ASSERT(shift >= 0); 1657 SimVRegister temp1, temp2; 1658 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1659 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1660 return ushl(vform, dst, extendedreg, shiftreg); 1661 } 1662 1663 1664 LogicVRegister Simulator::ushll2(VectorFormat vform, 1665 LogicVRegister dst, 1666 const LogicVRegister& src, 1667 int shift) { 1668 VIXL_ASSERT(shift >= 0); 1669 SimVRegister temp1, temp2; 1670 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1671 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1672 return ushl(vform, dst, extendedreg, shiftreg); 1673 } 1674 1675 1676 LogicVRegister Simulator::sli(VectorFormat vform, 1677 LogicVRegister dst, 1678 const LogicVRegister& src, 1679 int shift) { 1680 dst.ClearForWrite(vform); 1681 int laneCount = LaneCountFromFormat(vform); 1682 for (int i = 0; i < laneCount; i++) { 1683 uint64_t src_lane = src.Uint(vform, i); 1684 uint64_t dst_lane = dst.Uint(vform, i); 1685 uint64_t shifted = src_lane << shift; 1686 uint64_t mask = MaxUintFromFormat(vform) << shift; 1687 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1688 } 1689 return dst; 1690 } 1691 1692 1693 LogicVRegister Simulator::sqshl(VectorFormat vform, 1694 LogicVRegister dst, 1695 const LogicVRegister& src, 1696 int shift) { 1697 VIXL_ASSERT(shift >= 0); 1698 SimVRegister temp; 1699 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1700 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1701 } 1702 1703 1704 LogicVRegister Simulator::uqshl(VectorFormat vform, 1705 LogicVRegister dst, 1706 const LogicVRegister& src, 1707 int shift) { 1708 VIXL_ASSERT(shift >= 0); 1709 SimVRegister temp; 1710 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1711 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1712 } 1713 1714 1715 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1716 LogicVRegister dst, 1717 const LogicVRegister& src, 1718 int shift) { 1719 VIXL_ASSERT(shift >= 0); 1720 SimVRegister temp; 1721 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1722 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1723 } 1724 1725 1726 LogicVRegister Simulator::sri(VectorFormat vform, 1727 LogicVRegister dst, 1728 const LogicVRegister& src, 1729 int shift) { 1730 dst.ClearForWrite(vform); 1731 int laneCount = LaneCountFromFormat(vform); 1732 VIXL_ASSERT((shift > 0) && 1733 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1734 for (int i = 0; i < laneCount; i++) { 1735 uint64_t src_lane = src.Uint(vform, i); 1736 uint64_t dst_lane = dst.Uint(vform, i); 1737 uint64_t shifted; 1738 uint64_t mask; 1739 if (shift == 64) { 1740 shifted = 0; 1741 mask = 0; 1742 } else { 1743 shifted = src_lane >> shift; 1744 mask = MaxUintFromFormat(vform) >> shift; 1745 } 1746 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1747 } 1748 return dst; 1749 } 1750 1751 1752 LogicVRegister Simulator::ushr(VectorFormat vform, 1753 LogicVRegister dst, 1754 const LogicVRegister& src, 1755 int shift) { 1756 VIXL_ASSERT(shift >= 0); 1757 SimVRegister temp; 1758 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1759 return ushl(vform, dst, src, shiftreg); 1760 } 1761 1762 1763 LogicVRegister Simulator::sshr(VectorFormat vform, 1764 LogicVRegister dst, 1765 const LogicVRegister& src, 1766 int shift) { 1767 VIXL_ASSERT(shift >= 0); 1768 SimVRegister temp; 1769 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1770 return sshl(vform, dst, src, shiftreg); 1771 } 1772 1773 1774 LogicVRegister Simulator::ssra(VectorFormat vform, 1775 LogicVRegister dst, 1776 const LogicVRegister& src, 1777 int shift) { 1778 SimVRegister temp; 1779 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1780 return add(vform, dst, dst, shifted_reg); 1781 } 1782 1783 1784 LogicVRegister Simulator::usra(VectorFormat vform, 1785 LogicVRegister dst, 1786 const LogicVRegister& src, 1787 int shift) { 1788 SimVRegister temp; 1789 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1790 return add(vform, dst, dst, shifted_reg); 1791 } 1792 1793 1794 LogicVRegister Simulator::srsra(VectorFormat vform, 1795 LogicVRegister dst, 1796 const LogicVRegister& src, 1797 int shift) { 1798 SimVRegister temp; 1799 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1800 return add(vform, dst, dst, shifted_reg); 1801 } 1802 1803 1804 LogicVRegister Simulator::ursra(VectorFormat vform, 1805 LogicVRegister dst, 1806 const LogicVRegister& src, 1807 int shift) { 1808 SimVRegister temp; 1809 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1810 return add(vform, dst, dst, shifted_reg); 1811 } 1812 1813 1814 LogicVRegister Simulator::cls(VectorFormat vform, 1815 LogicVRegister dst, 1816 const LogicVRegister& src) { 1817 uint64_t result[16]; 1818 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1819 int laneCount = LaneCountFromFormat(vform); 1820 for (int i = 0; i < laneCount; i++) { 1821 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1822 } 1823 1824 dst.ClearForWrite(vform); 1825 for (int i = 0; i < laneCount; ++i) { 1826 dst.SetUint(vform, i, result[i]); 1827 } 1828 return dst; 1829 } 1830 1831 1832 LogicVRegister Simulator::clz(VectorFormat vform, 1833 LogicVRegister dst, 1834 const LogicVRegister& src) { 1835 uint64_t result[16]; 1836 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1837 int laneCount = LaneCountFromFormat(vform); 1838 for (int i = 0; i < laneCount; i++) { 1839 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1840 } 1841 1842 dst.ClearForWrite(vform); 1843 for (int i = 0; i < laneCount; ++i) { 1844 dst.SetUint(vform, i, result[i]); 1845 } 1846 return dst; 1847 } 1848 1849 1850 LogicVRegister Simulator::cnt(VectorFormat vform, 1851 LogicVRegister dst, 1852 const LogicVRegister& src) { 1853 uint64_t result[16]; 1854 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1855 int laneCount = LaneCountFromFormat(vform); 1856 for (int i = 0; i < laneCount; i++) { 1857 uint64_t value = src.Uint(vform, i); 1858 result[i] = 0; 1859 for (int j = 0; j < laneSizeInBits; j++) { 1860 result[i] += (value & 1); 1861 value >>= 1; 1862 } 1863 } 1864 1865 dst.ClearForWrite(vform); 1866 for (int i = 0; i < laneCount; ++i) { 1867 dst.SetUint(vform, i, result[i]); 1868 } 1869 return dst; 1870 } 1871 1872 1873 LogicVRegister Simulator::sshl(VectorFormat vform, 1874 LogicVRegister dst, 1875 const LogicVRegister& src1, 1876 const LogicVRegister& src2) { 1877 dst.ClearForWrite(vform); 1878 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1879 int8_t shift_val = src2.Int(vform, i); 1880 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1881 1882 // Set signed saturation state. 1883 if ((shift_val > CountLeadingSignBits(lj_src_val)) && 1884 (lj_src_val != 0)) { 1885 dst.SetSignedSat(i, lj_src_val >= 0); 1886 } 1887 1888 // Set unsigned saturation state. 1889 if (lj_src_val < 0) { 1890 dst.SetUnsignedSat(i, false); 1891 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1892 (lj_src_val != 0)) { 1893 dst.SetUnsignedSat(i, true); 1894 } 1895 1896 int64_t src_val = src1.Int(vform, i); 1897 if (shift_val > 63) { 1898 dst.SetInt(vform, i, 0); 1899 } else if (shift_val < -63) { 1900 dst.SetRounding(i, src_val < 0); 1901 dst.SetInt(vform, i, (src_val < 0) ? -1 : 0); 1902 } else { 1903 if (shift_val < 0) { 1904 // Set rounding state. Rounding only needed on right shifts. 1905 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1906 dst.SetRounding(i, true); 1907 } 1908 src_val >>= -shift_val; 1909 } else { 1910 src_val <<= shift_val; 1911 } 1912 dst.SetInt(vform, i, src_val); 1913 } 1914 } 1915 return dst; 1916 } 1917 1918 1919 LogicVRegister Simulator::ushl(VectorFormat vform, 1920 LogicVRegister dst, 1921 const LogicVRegister& src1, 1922 const LogicVRegister& src2) { 1923 dst.ClearForWrite(vform); 1924 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1925 int8_t shift_val = src2.Int(vform, i); 1926 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1927 1928 // Set saturation state. 1929 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1930 dst.SetUnsignedSat(i, true); 1931 } 1932 1933 uint64_t src_val = src1.Uint(vform, i); 1934 if ((shift_val > 63) || (shift_val < -64)) { 1935 dst.SetUint(vform, i, 0); 1936 } else { 1937 if (shift_val < 0) { 1938 // Set rounding state. Rounding only needed on right shifts. 1939 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1940 dst.SetRounding(i, true); 1941 } 1942 1943 if (shift_val == -64) { 1944 src_val = 0; 1945 } else { 1946 src_val >>= -shift_val; 1947 } 1948 } else { 1949 src_val <<= shift_val; 1950 } 1951 dst.SetUint(vform, i, src_val); 1952 } 1953 } 1954 return dst; 1955 } 1956 1957 1958 LogicVRegister Simulator::neg(VectorFormat vform, 1959 LogicVRegister dst, 1960 const LogicVRegister& src) { 1961 dst.ClearForWrite(vform); 1962 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1963 // Test for signed saturation. 1964 int64_t sa = src.Int(vform, i); 1965 if (sa == MinIntFromFormat(vform)) { 1966 dst.SetSignedSat(i, true); 1967 } 1968 dst.SetInt(vform, i, -sa); 1969 } 1970 return dst; 1971 } 1972 1973 1974 LogicVRegister Simulator::suqadd(VectorFormat vform, 1975 LogicVRegister dst, 1976 const LogicVRegister& src) { 1977 dst.ClearForWrite(vform); 1978 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1979 int64_t sa = dst.IntLeftJustified(vform, i); 1980 uint64_t ub = src.UintLeftJustified(vform, i); 1981 int64_t sr = sa + ub; 1982 1983 if (sr < sa) { // Test for signed positive saturation. 1984 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1985 } else { 1986 dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i)); 1987 } 1988 } 1989 return dst; 1990 } 1991 1992 1993 LogicVRegister Simulator::usqadd(VectorFormat vform, 1994 LogicVRegister dst, 1995 const LogicVRegister& src) { 1996 dst.ClearForWrite(vform); 1997 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1998 uint64_t ua = dst.UintLeftJustified(vform, i); 1999 int64_t sb = src.IntLeftJustified(vform, i); 2000 uint64_t ur = ua + sb; 2001 2002 if ((sb > 0) && (ur <= ua)) { 2003 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 2004 } else if ((sb < 0) && (ur >= ua)) { 2005 dst.SetUint(vform, i, 0); // Negative saturation. 2006 } else { 2007 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 2008 } 2009 } 2010 return dst; 2011 } 2012 2013 2014 LogicVRegister Simulator::abs(VectorFormat vform, 2015 LogicVRegister dst, 2016 const LogicVRegister& src) { 2017 dst.ClearForWrite(vform); 2018 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2019 // Test for signed saturation. 2020 int64_t sa = src.Int(vform, i); 2021 if (sa == MinIntFromFormat(vform)) { 2022 dst.SetSignedSat(i, true); 2023 } 2024 if (sa < 0) { 2025 dst.SetInt(vform, i, -sa); 2026 } else { 2027 dst.SetInt(vform, i, sa); 2028 } 2029 } 2030 return dst; 2031 } 2032 2033 2034 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 2035 LogicVRegister dst, 2036 bool dstIsSigned, 2037 const LogicVRegister& src, 2038 bool srcIsSigned) { 2039 bool upperhalf = false; 2040 VectorFormat srcform = kFormatUndefined; 2041 int64_t ssrc[8]; 2042 uint64_t usrc[8]; 2043 2044 switch (dstform) { 2045 case kFormat8B : upperhalf = false; srcform = kFormat8H; break; 2046 case kFormat16B: upperhalf = true; srcform = kFormat8H; break; 2047 case kFormat4H : upperhalf = false; srcform = kFormat4S; break; 2048 case kFormat8H : upperhalf = true; srcform = kFormat4S; break; 2049 case kFormat2S : upperhalf = false; srcform = kFormat2D; break; 2050 case kFormat4S : upperhalf = true; srcform = kFormat2D; break; 2051 case kFormatB : upperhalf = false; srcform = kFormatH; break; 2052 case kFormatH : upperhalf = false; srcform = kFormatS; break; 2053 case kFormatS : upperhalf = false; srcform = kFormatD; break; 2054 default:VIXL_UNIMPLEMENTED(); 2055 } 2056 2057 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2058 ssrc[i] = src.Int(srcform, i); 2059 usrc[i] = src.Uint(srcform, i); 2060 } 2061 2062 int offset; 2063 if (upperhalf) { 2064 offset = LaneCountFromFormat(dstform) / 2; 2065 } else { 2066 offset = 0; 2067 dst.ClearForWrite(dstform); 2068 } 2069 2070 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 2071 // Test for signed saturation 2072 if (ssrc[i] > MaxIntFromFormat(dstform)) { 2073 dst.SetSignedSat(offset + i, true); 2074 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 2075 dst.SetSignedSat(offset + i, false); 2076 } 2077 2078 // Test for unsigned saturation 2079 if (srcIsSigned) { 2080 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 2081 dst.SetUnsignedSat(offset + i, true); 2082 } else if (ssrc[i] < 0) { 2083 dst.SetUnsignedSat(offset + i, false); 2084 } 2085 } else { 2086 if (usrc[i] > MaxUintFromFormat(dstform)) { 2087 dst.SetUnsignedSat(offset + i, true); 2088 } 2089 } 2090 2091 int64_t result; 2092 if (srcIsSigned) { 2093 result = ssrc[i] & MaxUintFromFormat(dstform); 2094 } else { 2095 result = usrc[i] & MaxUintFromFormat(dstform); 2096 } 2097 2098 if (dstIsSigned) { 2099 dst.SetInt(dstform, offset + i, result); 2100 } else { 2101 dst.SetUint(dstform, offset + i, result); 2102 } 2103 } 2104 return dst; 2105 } 2106 2107 2108 LogicVRegister Simulator::xtn(VectorFormat vform, 2109 LogicVRegister dst, 2110 const LogicVRegister& src) { 2111 return extractnarrow(vform, dst, true, src, true); 2112 } 2113 2114 2115 LogicVRegister Simulator::sqxtn(VectorFormat vform, 2116 LogicVRegister dst, 2117 const LogicVRegister& src) { 2118 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2119 } 2120 2121 2122 LogicVRegister Simulator::sqxtun(VectorFormat vform, 2123 LogicVRegister dst, 2124 const LogicVRegister& src) { 2125 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2126 } 2127 2128 2129 LogicVRegister Simulator::uqxtn(VectorFormat vform, 2130 LogicVRegister dst, 2131 const LogicVRegister& src) { 2132 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2133 } 2134 2135 2136 LogicVRegister Simulator::absdiff(VectorFormat vform, 2137 LogicVRegister dst, 2138 const LogicVRegister& src1, 2139 const LogicVRegister& src2, 2140 bool issigned) { 2141 dst.ClearForWrite(vform); 2142 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2143 if (issigned) { 2144 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2145 sr = sr > 0 ? sr : -sr; 2146 dst.SetInt(vform, i, sr); 2147 } else { 2148 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2149 sr = sr > 0 ? sr : -sr; 2150 dst.SetUint(vform, i, sr); 2151 } 2152 } 2153 return dst; 2154 } 2155 2156 2157 LogicVRegister Simulator::saba(VectorFormat vform, 2158 LogicVRegister dst, 2159 const LogicVRegister& src1, 2160 const LogicVRegister& src2) { 2161 SimVRegister temp; 2162 dst.ClearForWrite(vform); 2163 absdiff(vform, temp, src1, src2, true); 2164 add(vform, dst, dst, temp); 2165 return dst; 2166 } 2167 2168 2169 LogicVRegister Simulator::uaba(VectorFormat vform, 2170 LogicVRegister dst, 2171 const LogicVRegister& src1, 2172 const LogicVRegister& src2) { 2173 SimVRegister temp; 2174 dst.ClearForWrite(vform); 2175 absdiff(vform, temp, src1, src2, false); 2176 add(vform, dst, dst, temp); 2177 return dst; 2178 } 2179 2180 2181 LogicVRegister Simulator::not_(VectorFormat vform, 2182 LogicVRegister dst, 2183 const LogicVRegister& src) { 2184 dst.ClearForWrite(vform); 2185 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2186 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2187 } 2188 return dst; 2189 } 2190 2191 2192 LogicVRegister Simulator::rbit(VectorFormat vform, 2193 LogicVRegister dst, 2194 const LogicVRegister& src) { 2195 uint64_t result[16]; 2196 int laneCount = LaneCountFromFormat(vform); 2197 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2198 uint64_t reversed_value; 2199 uint64_t value; 2200 for (int i = 0; i < laneCount; i++) { 2201 value = src.Uint(vform, i); 2202 reversed_value = 0; 2203 for (int j = 0; j < laneSizeInBits; j++) { 2204 reversed_value = (reversed_value << 1) | (value & 1); 2205 value >>= 1; 2206 } 2207 result[i] = reversed_value; 2208 } 2209 2210 dst.ClearForWrite(vform); 2211 for (int i = 0; i < laneCount; ++i) { 2212 dst.SetUint(vform, i, result[i]); 2213 } 2214 return dst; 2215 } 2216 2217 2218 LogicVRegister Simulator::rev(VectorFormat vform, 2219 LogicVRegister dst, 2220 const LogicVRegister& src, 2221 int revSize) { 2222 uint64_t result[16]; 2223 int laneCount = LaneCountFromFormat(vform); 2224 int laneSize = LaneSizeInBytesFromFormat(vform); 2225 int lanesPerLoop = revSize / laneSize; 2226 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2227 for (int j = 0; j < lanesPerLoop; j++) { 2228 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2229 } 2230 } 2231 dst.ClearForWrite(vform); 2232 for (int i = 0; i < laneCount; ++i) { 2233 dst.SetUint(vform, i, result[i]); 2234 } 2235 return dst; 2236 } 2237 2238 2239 LogicVRegister Simulator::rev16(VectorFormat vform, 2240 LogicVRegister dst, 2241 const LogicVRegister& src) { 2242 return rev(vform, dst, src, 2); 2243 } 2244 2245 2246 LogicVRegister Simulator::rev32(VectorFormat vform, 2247 LogicVRegister dst, 2248 const LogicVRegister& src) { 2249 return rev(vform, dst, src, 4); 2250 } 2251 2252 2253 LogicVRegister Simulator::rev64(VectorFormat vform, 2254 LogicVRegister dst, 2255 const LogicVRegister& src) { 2256 return rev(vform, dst, src, 8); 2257 } 2258 2259 2260 LogicVRegister Simulator::addlp(VectorFormat vform, 2261 LogicVRegister dst, 2262 const LogicVRegister& src, 2263 bool is_signed, 2264 bool do_accumulate) { 2265 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2266 2267 int64_t sr[16]; 2268 uint64_t ur[16]; 2269 2270 int laneCount = LaneCountFromFormat(vform); 2271 for (int i = 0; i < laneCount; ++i) { 2272 if (is_signed) { 2273 sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1); 2274 } else { 2275 ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2276 } 2277 } 2278 2279 dst.ClearForWrite(vform); 2280 for (int i = 0; i < laneCount; ++i) { 2281 if (do_accumulate) { 2282 if (is_signed) { 2283 dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]); 2284 } else { 2285 dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]); 2286 } 2287 } else { 2288 if (is_signed) { 2289 dst.SetInt(vform, i, sr[i]); 2290 } else { 2291 dst.SetUint(vform, i, ur[i]); 2292 } 2293 } 2294 } 2295 2296 return dst; 2297 } 2298 2299 2300 LogicVRegister Simulator::saddlp(VectorFormat vform, 2301 LogicVRegister dst, 2302 const LogicVRegister& src) { 2303 return addlp(vform, dst, src, true, false); 2304 } 2305 2306 2307 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2308 LogicVRegister dst, 2309 const LogicVRegister& src) { 2310 return addlp(vform, dst, src, false, false); 2311 } 2312 2313 2314 LogicVRegister Simulator::sadalp(VectorFormat vform, 2315 LogicVRegister dst, 2316 const LogicVRegister& src) { 2317 return addlp(vform, dst, src, true, true); 2318 } 2319 2320 2321 LogicVRegister Simulator::uadalp(VectorFormat vform, 2322 LogicVRegister dst, 2323 const LogicVRegister& src) { 2324 return addlp(vform, dst, src, false, true); 2325 } 2326 2327 2328 LogicVRegister Simulator::ext(VectorFormat vform, 2329 LogicVRegister dst, 2330 const LogicVRegister& src1, 2331 const LogicVRegister& src2, 2332 int index) { 2333 uint8_t result[16]; 2334 int laneCount = LaneCountFromFormat(vform); 2335 for (int i = 0; i < laneCount - index; ++i) { 2336 result[i] = src1.Uint(vform, i + index); 2337 } 2338 for (int i = 0; i < index; ++i) { 2339 result[laneCount - index + i] = src2.Uint(vform, i); 2340 } 2341 dst.ClearForWrite(vform); 2342 for (int i = 0; i < laneCount; ++i) { 2343 dst.SetUint(vform, i, result[i]); 2344 } 2345 return dst; 2346 } 2347 2348 2349 LogicVRegister Simulator::dup_element(VectorFormat vform, 2350 LogicVRegister dst, 2351 const LogicVRegister& src, 2352 int src_index) { 2353 int laneCount = LaneCountFromFormat(vform); 2354 uint64_t value = src.Uint(vform, src_index); 2355 dst.ClearForWrite(vform); 2356 for (int i = 0; i < laneCount; ++i) { 2357 dst.SetUint(vform, i, value); 2358 } 2359 return dst; 2360 } 2361 2362 2363 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2364 LogicVRegister dst, 2365 uint64_t imm) { 2366 int laneCount = LaneCountFromFormat(vform); 2367 uint64_t value = imm & MaxUintFromFormat(vform); 2368 dst.ClearForWrite(vform); 2369 for (int i = 0; i < laneCount; ++i) { 2370 dst.SetUint(vform, i, value); 2371 } 2372 return dst; 2373 } 2374 2375 2376 LogicVRegister Simulator::ins_element(VectorFormat vform, 2377 LogicVRegister dst, 2378 int dst_index, 2379 const LogicVRegister& src, 2380 int src_index) { 2381 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2382 return dst; 2383 } 2384 2385 2386 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2387 LogicVRegister dst, 2388 int dst_index, 2389 uint64_t imm) { 2390 uint64_t value = imm & MaxUintFromFormat(vform); 2391 dst.SetUint(vform, dst_index, value); 2392 return dst; 2393 } 2394 2395 2396 LogicVRegister Simulator::movi(VectorFormat vform, 2397 LogicVRegister dst, 2398 uint64_t imm) { 2399 int laneCount = LaneCountFromFormat(vform); 2400 dst.ClearForWrite(vform); 2401 for (int i = 0; i < laneCount; ++i) { 2402 dst.SetUint(vform, i, imm); 2403 } 2404 return dst; 2405 } 2406 2407 2408 LogicVRegister Simulator::mvni(VectorFormat vform, 2409 LogicVRegister dst, 2410 uint64_t imm) { 2411 int laneCount = LaneCountFromFormat(vform); 2412 dst.ClearForWrite(vform); 2413 for (int i = 0; i < laneCount; ++i) { 2414 dst.SetUint(vform, i, ~imm); 2415 } 2416 return dst; 2417 } 2418 2419 2420 LogicVRegister Simulator::orr(VectorFormat vform, 2421 LogicVRegister dst, 2422 const LogicVRegister& src, 2423 uint64_t imm) { 2424 uint64_t result[16]; 2425 int laneCount = LaneCountFromFormat(vform); 2426 for (int i = 0; i < laneCount; ++i) { 2427 result[i] = src.Uint(vform, i) | imm; 2428 } 2429 dst.ClearForWrite(vform); 2430 for (int i = 0; i < laneCount; ++i) { 2431 dst.SetUint(vform, i, result[i]); 2432 } 2433 return dst; 2434 } 2435 2436 2437 LogicVRegister Simulator::uxtl(VectorFormat vform, 2438 LogicVRegister dst, 2439 const LogicVRegister& src) { 2440 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2441 2442 dst.ClearForWrite(vform); 2443 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2444 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2445 } 2446 return dst; 2447 } 2448 2449 2450 LogicVRegister Simulator::sxtl(VectorFormat vform, 2451 LogicVRegister dst, 2452 const LogicVRegister& src) { 2453 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2454 2455 dst.ClearForWrite(vform); 2456 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2457 dst.SetInt(vform, i, src.Int(vform_half, i)); 2458 } 2459 return dst; 2460 } 2461 2462 2463 LogicVRegister Simulator::uxtl2(VectorFormat vform, 2464 LogicVRegister dst, 2465 const LogicVRegister& src) { 2466 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2467 int lane_count = LaneCountFromFormat(vform); 2468 2469 dst.ClearForWrite(vform); 2470 for (int i = 0; i < lane_count; i++) { 2471 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2472 } 2473 return dst; 2474 } 2475 2476 2477 LogicVRegister Simulator::sxtl2(VectorFormat vform, 2478 LogicVRegister dst, 2479 const LogicVRegister& src) { 2480 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2481 int lane_count = LaneCountFromFormat(vform); 2482 2483 dst.ClearForWrite(vform); 2484 for (int i = 0; i < lane_count; i++) { 2485 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2486 } 2487 return dst; 2488 } 2489 2490 2491 LogicVRegister Simulator::shrn(VectorFormat vform, 2492 LogicVRegister dst, 2493 const LogicVRegister& src, 2494 int shift) { 2495 SimVRegister temp; 2496 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2497 VectorFormat vform_dst = vform; 2498 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2499 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2500 } 2501 2502 2503 LogicVRegister Simulator::shrn2(VectorFormat vform, 2504 LogicVRegister dst, 2505 const LogicVRegister& src, 2506 int shift) { 2507 SimVRegister temp; 2508 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2509 VectorFormat vformdst = vform; 2510 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2511 return extractnarrow(vformdst, dst, false, shifted_src, false); 2512 } 2513 2514 2515 LogicVRegister Simulator::rshrn(VectorFormat vform, 2516 LogicVRegister dst, 2517 const LogicVRegister& src, 2518 int shift) { 2519 SimVRegister temp; 2520 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2521 VectorFormat vformdst = vform; 2522 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2523 return extractnarrow(vformdst, dst, false, shifted_src, false); 2524 } 2525 2526 2527 LogicVRegister Simulator::rshrn2(VectorFormat vform, 2528 LogicVRegister dst, 2529 const LogicVRegister& src, 2530 int shift) { 2531 SimVRegister temp; 2532 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2533 VectorFormat vformdst = vform; 2534 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2535 return extractnarrow(vformdst, dst, false, shifted_src, false); 2536 } 2537 2538 2539 LogicVRegister Simulator::tbl(VectorFormat vform, 2540 LogicVRegister dst, 2541 const LogicVRegister& tab, 2542 const LogicVRegister& ind) { 2543 movi(vform, dst, 0); 2544 return tbx(vform, dst, tab, ind); 2545 } 2546 2547 2548 LogicVRegister Simulator::tbl(VectorFormat vform, 2549 LogicVRegister dst, 2550 const LogicVRegister& tab, 2551 const LogicVRegister& tab2, 2552 const LogicVRegister& ind) { 2553 movi(vform, dst, 0); 2554 return tbx(vform, dst, tab, tab2, ind); 2555 } 2556 2557 2558 LogicVRegister Simulator::tbl(VectorFormat vform, 2559 LogicVRegister dst, 2560 const LogicVRegister& tab, 2561 const LogicVRegister& tab2, 2562 const LogicVRegister& tab3, 2563 const LogicVRegister& ind) { 2564 movi(vform, dst, 0); 2565 return tbx(vform, dst, tab, tab2, tab3, ind); 2566 } 2567 2568 2569 LogicVRegister Simulator::tbl(VectorFormat vform, 2570 LogicVRegister dst, 2571 const LogicVRegister& tab, 2572 const LogicVRegister& tab2, 2573 const LogicVRegister& tab3, 2574 const LogicVRegister& tab4, 2575 const LogicVRegister& ind) { 2576 movi(vform, dst, 0); 2577 return tbx(vform, dst, tab, tab2, tab3, tab4, ind); 2578 } 2579 2580 2581 LogicVRegister Simulator::tbx(VectorFormat vform, 2582 LogicVRegister dst, 2583 const LogicVRegister& tab, 2584 const LogicVRegister& ind) { 2585 dst.ClearForWrite(vform); 2586 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2587 unsigned j = ind.Uint(vform, i); 2588 switch (j >> 4) { 2589 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2590 } 2591 } 2592 return dst; 2593 } 2594 2595 2596 LogicVRegister Simulator::tbx(VectorFormat vform, 2597 LogicVRegister dst, 2598 const LogicVRegister& tab, 2599 const LogicVRegister& tab2, 2600 const LogicVRegister& ind) { 2601 dst.ClearForWrite(vform); 2602 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2603 unsigned j = ind.Uint(vform, i); 2604 switch (j >> 4) { 2605 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2606 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2607 } 2608 } 2609 return dst; 2610 } 2611 2612 2613 LogicVRegister Simulator::tbx(VectorFormat vform, 2614 LogicVRegister dst, 2615 const LogicVRegister& tab, 2616 const LogicVRegister& tab2, 2617 const LogicVRegister& tab3, 2618 const LogicVRegister& ind) { 2619 dst.ClearForWrite(vform); 2620 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2621 unsigned j = ind.Uint(vform, i); 2622 switch (j >> 4) { 2623 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2624 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2625 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2626 } 2627 } 2628 return dst; 2629 } 2630 2631 2632 LogicVRegister Simulator::tbx(VectorFormat vform, 2633 LogicVRegister dst, 2634 const LogicVRegister& tab, 2635 const LogicVRegister& tab2, 2636 const LogicVRegister& tab3, 2637 const LogicVRegister& tab4, 2638 const LogicVRegister& ind) { 2639 dst.ClearForWrite(vform); 2640 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2641 unsigned j = ind.Uint(vform, i); 2642 switch (j >> 4) { 2643 case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break; 2644 case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break; 2645 case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break; 2646 case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break; 2647 } 2648 } 2649 return dst; 2650 } 2651 2652 2653 LogicVRegister Simulator::uqshrn(VectorFormat vform, 2654 LogicVRegister dst, 2655 const LogicVRegister& src, 2656 int shift) { 2657 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2658 } 2659 2660 2661 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2662 LogicVRegister dst, 2663 const LogicVRegister& src, 2664 int shift) { 2665 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2666 } 2667 2668 2669 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2670 LogicVRegister dst, 2671 const LogicVRegister& src, 2672 int shift) { 2673 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2674 } 2675 2676 2677 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2678 LogicVRegister dst, 2679 const LogicVRegister& src, 2680 int shift) { 2681 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2682 } 2683 2684 2685 LogicVRegister Simulator::sqshrn(VectorFormat vform, 2686 LogicVRegister dst, 2687 const LogicVRegister& src, 2688 int shift) { 2689 SimVRegister temp; 2690 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2691 VectorFormat vformdst = vform; 2692 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2693 return sqxtn(vformdst, dst, shifted_src); 2694 } 2695 2696 2697 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2698 LogicVRegister dst, 2699 const LogicVRegister& src, 2700 int shift) { 2701 SimVRegister temp; 2702 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2703 VectorFormat vformdst = vform; 2704 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2705 return sqxtn(vformdst, dst, shifted_src); 2706 } 2707 2708 2709 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2710 LogicVRegister dst, 2711 const LogicVRegister& src, 2712 int shift) { 2713 SimVRegister temp; 2714 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2715 VectorFormat vformdst = vform; 2716 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2717 return sqxtn(vformdst, dst, shifted_src); 2718 } 2719 2720 2721 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2722 LogicVRegister dst, 2723 const LogicVRegister& src, 2724 int shift) { 2725 SimVRegister temp; 2726 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2727 VectorFormat vformdst = vform; 2728 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2729 return sqxtn(vformdst, dst, shifted_src); 2730 } 2731 2732 2733 LogicVRegister Simulator::sqshrun(VectorFormat vform, 2734 LogicVRegister dst, 2735 const LogicVRegister& src, 2736 int shift) { 2737 SimVRegister temp; 2738 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2739 VectorFormat vformdst = vform; 2740 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2741 return sqxtun(vformdst, dst, shifted_src); 2742 } 2743 2744 2745 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2746 LogicVRegister dst, 2747 const LogicVRegister& src, 2748 int shift) { 2749 SimVRegister temp; 2750 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2751 VectorFormat vformdst = vform; 2752 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2753 return sqxtun(vformdst, dst, shifted_src); 2754 } 2755 2756 2757 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2758 LogicVRegister dst, 2759 const LogicVRegister& src, 2760 int shift) { 2761 SimVRegister temp; 2762 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2763 VectorFormat vformdst = vform; 2764 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2765 return sqxtun(vformdst, dst, shifted_src); 2766 } 2767 2768 2769 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2770 LogicVRegister dst, 2771 const LogicVRegister& src, 2772 int shift) { 2773 SimVRegister temp; 2774 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2775 VectorFormat vformdst = vform; 2776 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2777 return sqxtun(vformdst, dst, shifted_src); 2778 } 2779 2780 2781 LogicVRegister Simulator::uaddl(VectorFormat vform, 2782 LogicVRegister dst, 2783 const LogicVRegister& src1, 2784 const LogicVRegister& src2) { 2785 SimVRegister temp1, temp2; 2786 uxtl(vform, temp1, src1); 2787 uxtl(vform, temp2, src2); 2788 add(vform, dst, temp1, temp2); 2789 return dst; 2790 } 2791 2792 2793 LogicVRegister Simulator::uaddl2(VectorFormat vform, 2794 LogicVRegister dst, 2795 const LogicVRegister& src1, 2796 const LogicVRegister& src2) { 2797 SimVRegister temp1, temp2; 2798 uxtl2(vform, temp1, src1); 2799 uxtl2(vform, temp2, src2); 2800 add(vform, dst, temp1, temp2); 2801 return dst; 2802 } 2803 2804 2805 LogicVRegister Simulator::uaddw(VectorFormat vform, 2806 LogicVRegister dst, 2807 const LogicVRegister& src1, 2808 const LogicVRegister& src2) { 2809 SimVRegister temp; 2810 uxtl(vform, temp, src2); 2811 add(vform, dst, src1, temp); 2812 return dst; 2813 } 2814 2815 2816 LogicVRegister Simulator::uaddw2(VectorFormat vform, 2817 LogicVRegister dst, 2818 const LogicVRegister& src1, 2819 const LogicVRegister& src2) { 2820 SimVRegister temp; 2821 uxtl2(vform, temp, src2); 2822 add(vform, dst, src1, temp); 2823 return dst; 2824 } 2825 2826 2827 LogicVRegister Simulator::saddl(VectorFormat vform, 2828 LogicVRegister dst, 2829 const LogicVRegister& src1, 2830 const LogicVRegister& src2) { 2831 SimVRegister temp1, temp2; 2832 sxtl(vform, temp1, src1); 2833 sxtl(vform, temp2, src2); 2834 add(vform, dst, temp1, temp2); 2835 return dst; 2836 } 2837 2838 2839 LogicVRegister Simulator::saddl2(VectorFormat vform, 2840 LogicVRegister dst, 2841 const LogicVRegister& src1, 2842 const LogicVRegister& src2) { 2843 SimVRegister temp1, temp2; 2844 sxtl2(vform, temp1, src1); 2845 sxtl2(vform, temp2, src2); 2846 add(vform, dst, temp1, temp2); 2847 return dst; 2848 } 2849 2850 2851 LogicVRegister Simulator::saddw(VectorFormat vform, 2852 LogicVRegister dst, 2853 const LogicVRegister& src1, 2854 const LogicVRegister& src2) { 2855 SimVRegister temp; 2856 sxtl(vform, temp, src2); 2857 add(vform, dst, src1, temp); 2858 return dst; 2859 } 2860 2861 2862 LogicVRegister Simulator::saddw2(VectorFormat vform, 2863 LogicVRegister dst, 2864 const LogicVRegister& src1, 2865 const LogicVRegister& src2) { 2866 SimVRegister temp; 2867 sxtl2(vform, temp, src2); 2868 add(vform, dst, src1, temp); 2869 return dst; 2870 } 2871 2872 2873 LogicVRegister Simulator::usubl(VectorFormat vform, 2874 LogicVRegister dst, 2875 const LogicVRegister& src1, 2876 const LogicVRegister& src2) { 2877 SimVRegister temp1, temp2; 2878 uxtl(vform, temp1, src1); 2879 uxtl(vform, temp2, src2); 2880 sub(vform, dst, temp1, temp2); 2881 return dst; 2882 } 2883 2884 2885 LogicVRegister Simulator::usubl2(VectorFormat vform, 2886 LogicVRegister dst, 2887 const LogicVRegister& src1, 2888 const LogicVRegister& src2) { 2889 SimVRegister temp1, temp2; 2890 uxtl2(vform, temp1, src1); 2891 uxtl2(vform, temp2, src2); 2892 sub(vform, dst, temp1, temp2); 2893 return dst; 2894 } 2895 2896 2897 LogicVRegister Simulator::usubw(VectorFormat vform, 2898 LogicVRegister dst, 2899 const LogicVRegister& src1, 2900 const LogicVRegister& src2) { 2901 SimVRegister temp; 2902 uxtl(vform, temp, src2); 2903 sub(vform, dst, src1, temp); 2904 return dst; 2905 } 2906 2907 2908 LogicVRegister Simulator::usubw2(VectorFormat vform, 2909 LogicVRegister dst, 2910 const LogicVRegister& src1, 2911 const LogicVRegister& src2) { 2912 SimVRegister temp; 2913 uxtl2(vform, temp, src2); 2914 sub(vform, dst, src1, temp); 2915 return dst; 2916 } 2917 2918 2919 LogicVRegister Simulator::ssubl(VectorFormat vform, 2920 LogicVRegister dst, 2921 const LogicVRegister& src1, 2922 const LogicVRegister& src2) { 2923 SimVRegister temp1, temp2; 2924 sxtl(vform, temp1, src1); 2925 sxtl(vform, temp2, src2); 2926 sub(vform, dst, temp1, temp2); 2927 return dst; 2928 } 2929 2930 2931 LogicVRegister Simulator::ssubl2(VectorFormat vform, 2932 LogicVRegister dst, 2933 const LogicVRegister& src1, 2934 const LogicVRegister& src2) { 2935 SimVRegister temp1, temp2; 2936 sxtl2(vform, temp1, src1); 2937 sxtl2(vform, temp2, src2); 2938 sub(vform, dst, temp1, temp2); 2939 return dst; 2940 } 2941 2942 2943 LogicVRegister Simulator::ssubw(VectorFormat vform, 2944 LogicVRegister dst, 2945 const LogicVRegister& src1, 2946 const LogicVRegister& src2) { 2947 SimVRegister temp; 2948 sxtl(vform, temp, src2); 2949 sub(vform, dst, src1, temp); 2950 return dst; 2951 } 2952 2953 2954 LogicVRegister Simulator::ssubw2(VectorFormat vform, 2955 LogicVRegister dst, 2956 const LogicVRegister& src1, 2957 const LogicVRegister& src2) { 2958 SimVRegister temp; 2959 sxtl2(vform, temp, src2); 2960 sub(vform, dst, src1, temp); 2961 return dst; 2962 } 2963 2964 2965 LogicVRegister Simulator::uabal(VectorFormat vform, 2966 LogicVRegister dst, 2967 const LogicVRegister& src1, 2968 const LogicVRegister& src2) { 2969 SimVRegister temp1, temp2; 2970 uxtl(vform, temp1, src1); 2971 uxtl(vform, temp2, src2); 2972 uaba(vform, dst, temp1, temp2); 2973 return dst; 2974 } 2975 2976 2977 LogicVRegister Simulator::uabal2(VectorFormat vform, 2978 LogicVRegister dst, 2979 const LogicVRegister& src1, 2980 const LogicVRegister& src2) { 2981 SimVRegister temp1, temp2; 2982 uxtl2(vform, temp1, src1); 2983 uxtl2(vform, temp2, src2); 2984 uaba(vform, dst, temp1, temp2); 2985 return dst; 2986 } 2987 2988 2989 LogicVRegister Simulator::sabal(VectorFormat vform, 2990 LogicVRegister dst, 2991 const LogicVRegister& src1, 2992 const LogicVRegister& src2) { 2993 SimVRegister temp1, temp2; 2994 sxtl(vform, temp1, src1); 2995 sxtl(vform, temp2, src2); 2996 saba(vform, dst, temp1, temp2); 2997 return dst; 2998 } 2999 3000 3001 LogicVRegister Simulator::sabal2(VectorFormat vform, 3002 LogicVRegister dst, 3003 const LogicVRegister& src1, 3004 const LogicVRegister& src2) { 3005 SimVRegister temp1, temp2; 3006 sxtl2(vform, temp1, src1); 3007 sxtl2(vform, temp2, src2); 3008 saba(vform, dst, temp1, temp2); 3009 return dst; 3010 } 3011 3012 3013 LogicVRegister Simulator::uabdl(VectorFormat vform, 3014 LogicVRegister dst, 3015 const LogicVRegister& src1, 3016 const LogicVRegister& src2) { 3017 SimVRegister temp1, temp2; 3018 uxtl(vform, temp1, src1); 3019 uxtl(vform, temp2, src2); 3020 absdiff(vform, dst, temp1, temp2, false); 3021 return dst; 3022 } 3023 3024 3025 LogicVRegister Simulator::uabdl2(VectorFormat vform, 3026 LogicVRegister dst, 3027 const LogicVRegister& src1, 3028 const LogicVRegister& src2) { 3029 SimVRegister temp1, temp2; 3030 uxtl2(vform, temp1, src1); 3031 uxtl2(vform, temp2, src2); 3032 absdiff(vform, dst, temp1, temp2, false); 3033 return dst; 3034 } 3035 3036 3037 LogicVRegister Simulator::sabdl(VectorFormat vform, 3038 LogicVRegister dst, 3039 const LogicVRegister& src1, 3040 const LogicVRegister& src2) { 3041 SimVRegister temp1, temp2; 3042 sxtl(vform, temp1, src1); 3043 sxtl(vform, temp2, src2); 3044 absdiff(vform, dst, temp1, temp2, true); 3045 return dst; 3046 } 3047 3048 3049 LogicVRegister Simulator::sabdl2(VectorFormat vform, 3050 LogicVRegister dst, 3051 const LogicVRegister& src1, 3052 const LogicVRegister& src2) { 3053 SimVRegister temp1, temp2; 3054 sxtl2(vform, temp1, src1); 3055 sxtl2(vform, temp2, src2); 3056 absdiff(vform, dst, temp1, temp2, true); 3057 return dst; 3058 } 3059 3060 3061 LogicVRegister Simulator::umull(VectorFormat vform, 3062 LogicVRegister dst, 3063 const LogicVRegister& src1, 3064 const LogicVRegister& src2) { 3065 SimVRegister temp1, temp2; 3066 uxtl(vform, temp1, src1); 3067 uxtl(vform, temp2, src2); 3068 mul(vform, dst, temp1, temp2); 3069 return dst; 3070 } 3071 3072 3073 LogicVRegister Simulator::umull2(VectorFormat vform, 3074 LogicVRegister dst, 3075 const LogicVRegister& src1, 3076 const LogicVRegister& src2) { 3077 SimVRegister temp1, temp2; 3078 uxtl2(vform, temp1, src1); 3079 uxtl2(vform, temp2, src2); 3080 mul(vform, dst, temp1, temp2); 3081 return dst; 3082 } 3083 3084 3085 LogicVRegister Simulator::smull(VectorFormat vform, 3086 LogicVRegister dst, 3087 const LogicVRegister& src1, 3088 const LogicVRegister& src2) { 3089 SimVRegister temp1, temp2; 3090 sxtl(vform, temp1, src1); 3091 sxtl(vform, temp2, src2); 3092 mul(vform, dst, temp1, temp2); 3093 return dst; 3094 } 3095 3096 3097 LogicVRegister Simulator::smull2(VectorFormat vform, 3098 LogicVRegister dst, 3099 const LogicVRegister& src1, 3100 const LogicVRegister& src2) { 3101 SimVRegister temp1, temp2; 3102 sxtl2(vform, temp1, src1); 3103 sxtl2(vform, temp2, src2); 3104 mul(vform, dst, temp1, temp2); 3105 return dst; 3106 } 3107 3108 3109 LogicVRegister Simulator::umlsl(VectorFormat vform, 3110 LogicVRegister dst, 3111 const LogicVRegister& src1, 3112 const LogicVRegister& src2) { 3113 SimVRegister temp1, temp2; 3114 uxtl(vform, temp1, src1); 3115 uxtl(vform, temp2, src2); 3116 mls(vform, dst, temp1, temp2); 3117 return dst; 3118 } 3119 3120 3121 LogicVRegister Simulator::umlsl2(VectorFormat vform, 3122 LogicVRegister dst, 3123 const LogicVRegister& src1, 3124 const LogicVRegister& src2) { 3125 SimVRegister temp1, temp2; 3126 uxtl2(vform, temp1, src1); 3127 uxtl2(vform, temp2, src2); 3128 mls(vform, dst, temp1, temp2); 3129 return dst; 3130 } 3131 3132 3133 LogicVRegister Simulator::smlsl(VectorFormat vform, 3134 LogicVRegister dst, 3135 const LogicVRegister& src1, 3136 const LogicVRegister& src2) { 3137 SimVRegister temp1, temp2; 3138 sxtl(vform, temp1, src1); 3139 sxtl(vform, temp2, src2); 3140 mls(vform, dst, temp1, temp2); 3141 return dst; 3142 } 3143 3144 3145 LogicVRegister Simulator::smlsl2(VectorFormat vform, 3146 LogicVRegister dst, 3147 const LogicVRegister& src1, 3148 const LogicVRegister& src2) { 3149 SimVRegister temp1, temp2; 3150 sxtl2(vform, temp1, src1); 3151 sxtl2(vform, temp2, src2); 3152 mls(vform, dst, temp1, temp2); 3153 return dst; 3154 } 3155 3156 3157 LogicVRegister Simulator::umlal(VectorFormat vform, 3158 LogicVRegister dst, 3159 const LogicVRegister& src1, 3160 const LogicVRegister& src2) { 3161 SimVRegister temp1, temp2; 3162 uxtl(vform, temp1, src1); 3163 uxtl(vform, temp2, src2); 3164 mla(vform, dst, temp1, temp2); 3165 return dst; 3166 } 3167 3168 3169 LogicVRegister Simulator::umlal2(VectorFormat vform, 3170 LogicVRegister dst, 3171 const LogicVRegister& src1, 3172 const LogicVRegister& src2) { 3173 SimVRegister temp1, temp2; 3174 uxtl2(vform, temp1, src1); 3175 uxtl2(vform, temp2, src2); 3176 mla(vform, dst, temp1, temp2); 3177 return dst; 3178 } 3179 3180 3181 LogicVRegister Simulator::smlal(VectorFormat vform, 3182 LogicVRegister dst, 3183 const LogicVRegister& src1, 3184 const LogicVRegister& src2) { 3185 SimVRegister temp1, temp2; 3186 sxtl(vform, temp1, src1); 3187 sxtl(vform, temp2, src2); 3188 mla(vform, dst, temp1, temp2); 3189 return dst; 3190 } 3191 3192 3193 LogicVRegister Simulator::smlal2(VectorFormat vform, 3194 LogicVRegister dst, 3195 const LogicVRegister& src1, 3196 const LogicVRegister& src2) { 3197 SimVRegister temp1, temp2; 3198 sxtl2(vform, temp1, src1); 3199 sxtl2(vform, temp2, src2); 3200 mla(vform, dst, temp1, temp2); 3201 return dst; 3202 } 3203 3204 3205 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3206 LogicVRegister dst, 3207 const LogicVRegister& src1, 3208 const LogicVRegister& src2) { 3209 SimVRegister temp; 3210 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3211 return add(vform, dst, dst, product).SignedSaturate(vform); 3212 } 3213 3214 3215 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3216 LogicVRegister dst, 3217 const LogicVRegister& src1, 3218 const LogicVRegister& src2) { 3219 SimVRegister temp; 3220 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3221 return add(vform, dst, dst, product).SignedSaturate(vform); 3222 } 3223 3224 3225 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3226 LogicVRegister dst, 3227 const LogicVRegister& src1, 3228 const LogicVRegister& src2) { 3229 SimVRegister temp; 3230 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3231 return sub(vform, dst, dst, product).SignedSaturate(vform); 3232 } 3233 3234 3235 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3236 LogicVRegister dst, 3237 const LogicVRegister& src1, 3238 const LogicVRegister& src2) { 3239 SimVRegister temp; 3240 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3241 return sub(vform, dst, dst, product).SignedSaturate(vform); 3242 } 3243 3244 3245 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3246 LogicVRegister dst, 3247 const LogicVRegister& src1, 3248 const LogicVRegister& src2) { 3249 SimVRegister temp; 3250 LogicVRegister product = smull(vform, temp, src1, src2); 3251 return add(vform, dst, product, product).SignedSaturate(vform); 3252 } 3253 3254 3255 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3256 LogicVRegister dst, 3257 const LogicVRegister& src1, 3258 const LogicVRegister& src2) { 3259 SimVRegister temp; 3260 LogicVRegister product = smull2(vform, temp, src1, src2); 3261 return add(vform, dst, product, product).SignedSaturate(vform); 3262 } 3263 3264 3265 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3266 LogicVRegister dst, 3267 const LogicVRegister& src1, 3268 const LogicVRegister& src2, 3269 bool round) { 3270 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3271 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3272 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3273 3274 int esize = LaneSizeInBitsFromFormat(vform); 3275 int round_const = round ? (1 << (esize - 2)) : 0; 3276 int64_t product; 3277 3278 dst.ClearForWrite(vform); 3279 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3280 product = src1.Int(vform, i) * src2.Int(vform, i); 3281 product += round_const; 3282 product = product >> (esize - 1); 3283 3284 if (product > MaxIntFromFormat(vform)) { 3285 product = MaxIntFromFormat(vform); 3286 } else if (product < MinIntFromFormat(vform)) { 3287 product = MinIntFromFormat(vform); 3288 } 3289 dst.SetInt(vform, i, product); 3290 } 3291 return dst; 3292 } 3293 3294 3295 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3296 LogicVRegister dst, 3297 const LogicVRegister& src1, 3298 const LogicVRegister& src2) { 3299 return sqrdmulh(vform, dst, src1, src2, false); 3300 } 3301 3302 3303 LogicVRegister Simulator::addhn(VectorFormat vform, 3304 LogicVRegister dst, 3305 const LogicVRegister& src1, 3306 const LogicVRegister& src2) { 3307 SimVRegister temp; 3308 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3309 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3310 return dst; 3311 } 3312 3313 3314 LogicVRegister Simulator::addhn2(VectorFormat vform, 3315 LogicVRegister dst, 3316 const LogicVRegister& src1, 3317 const LogicVRegister& src2) { 3318 SimVRegister temp; 3319 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3320 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3321 return dst; 3322 } 3323 3324 3325 LogicVRegister Simulator::raddhn(VectorFormat vform, 3326 LogicVRegister dst, 3327 const LogicVRegister& src1, 3328 const LogicVRegister& src2) { 3329 SimVRegister temp; 3330 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3331 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3332 return dst; 3333 } 3334 3335 3336 LogicVRegister Simulator::raddhn2(VectorFormat vform, 3337 LogicVRegister dst, 3338 const LogicVRegister& src1, 3339 const LogicVRegister& src2) { 3340 SimVRegister temp; 3341 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3342 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3343 return dst; 3344 } 3345 3346 3347 LogicVRegister Simulator::subhn(VectorFormat vform, 3348 LogicVRegister dst, 3349 const LogicVRegister& src1, 3350 const LogicVRegister& src2) { 3351 SimVRegister temp; 3352 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3353 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3354 return dst; 3355 } 3356 3357 3358 LogicVRegister Simulator::subhn2(VectorFormat vform, 3359 LogicVRegister dst, 3360 const LogicVRegister& src1, 3361 const LogicVRegister& src2) { 3362 SimVRegister temp; 3363 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3364 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3365 return dst; 3366 } 3367 3368 3369 LogicVRegister Simulator::rsubhn(VectorFormat vform, 3370 LogicVRegister dst, 3371 const LogicVRegister& src1, 3372 const LogicVRegister& src2) { 3373 SimVRegister temp; 3374 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3375 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3376 return dst; 3377 } 3378 3379 3380 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3381 LogicVRegister dst, 3382 const LogicVRegister& src1, 3383 const LogicVRegister& src2) { 3384 SimVRegister temp; 3385 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3386 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3387 return dst; 3388 } 3389 3390 3391 LogicVRegister Simulator::trn1(VectorFormat vform, 3392 LogicVRegister dst, 3393 const LogicVRegister& src1, 3394 const LogicVRegister& src2) { 3395 uint64_t result[16]; 3396 int laneCount = LaneCountFromFormat(vform); 3397 int pairs = laneCount / 2; 3398 for (int i = 0; i < pairs; ++i) { 3399 result[2 * i] = src1.Uint(vform, 2 * i); 3400 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3401 } 3402 3403 dst.ClearForWrite(vform); 3404 for (int i = 0; i < laneCount; ++i) { 3405 dst.SetUint(vform, i, result[i]); 3406 } 3407 return dst; 3408 } 3409 3410 3411 LogicVRegister Simulator::trn2(VectorFormat vform, 3412 LogicVRegister dst, 3413 const LogicVRegister& src1, 3414 const LogicVRegister& src2) { 3415 uint64_t result[16]; 3416 int laneCount = LaneCountFromFormat(vform); 3417 int pairs = laneCount / 2; 3418 for (int i = 0; i < pairs; ++i) { 3419 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3420 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3421 } 3422 3423 dst.ClearForWrite(vform); 3424 for (int i = 0; i < laneCount; ++i) { 3425 dst.SetUint(vform, i, result[i]); 3426 } 3427 return dst; 3428 } 3429 3430 3431 LogicVRegister Simulator::zip1(VectorFormat vform, 3432 LogicVRegister dst, 3433 const LogicVRegister& src1, 3434 const LogicVRegister& src2) { 3435 uint64_t result[16]; 3436 int laneCount = LaneCountFromFormat(vform); 3437 int pairs = laneCount / 2; 3438 for (int i = 0; i < pairs; ++i) { 3439 result[2 * i] = src1.Uint(vform, i); 3440 result[(2 * i) + 1] = src2.Uint(vform, i); 3441 } 3442 3443 dst.ClearForWrite(vform); 3444 for (int i = 0; i < laneCount; ++i) { 3445 dst.SetUint(vform, i, result[i]); 3446 } 3447 return dst; 3448 } 3449 3450 3451 LogicVRegister Simulator::zip2(VectorFormat vform, 3452 LogicVRegister dst, 3453 const LogicVRegister& src1, 3454 const LogicVRegister& src2) { 3455 uint64_t result[16]; 3456 int laneCount = LaneCountFromFormat(vform); 3457 int pairs = laneCount / 2; 3458 for (int i = 0; i < pairs; ++i) { 3459 result[2 * i] = src1.Uint(vform, pairs + i); 3460 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3461 } 3462 3463 dst.ClearForWrite(vform); 3464 for (int i = 0; i < laneCount; ++i) { 3465 dst.SetUint(vform, i, result[i]); 3466 } 3467 return dst; 3468 } 3469 3470 3471 LogicVRegister Simulator::uzp1(VectorFormat vform, 3472 LogicVRegister dst, 3473 const LogicVRegister& src1, 3474 const LogicVRegister& src2) { 3475 uint64_t result[32]; 3476 int laneCount = LaneCountFromFormat(vform); 3477 for (int i = 0; i < laneCount; ++i) { 3478 result[i] = src1.Uint(vform, i); 3479 result[laneCount + i] = src2.Uint(vform, i); 3480 } 3481 3482 dst.ClearForWrite(vform); 3483 for (int i = 0; i < laneCount; ++i) { 3484 dst.SetUint(vform, i, result[2 * i]); 3485 } 3486 return dst; 3487 } 3488 3489 3490 LogicVRegister Simulator::uzp2(VectorFormat vform, 3491 LogicVRegister dst, 3492 const LogicVRegister& src1, 3493 const LogicVRegister& src2) { 3494 uint64_t result[32]; 3495 int laneCount = LaneCountFromFormat(vform); 3496 for (int i = 0; i < laneCount; ++i) { 3497 result[i] = src1.Uint(vform, i); 3498 result[laneCount + i] = src2.Uint(vform, i); 3499 } 3500 3501 dst.ClearForWrite(vform); 3502 for (int i = 0; i < laneCount; ++i) { 3503 dst.SetUint(vform, i, result[ (2 * i) + 1]); 3504 } 3505 return dst; 3506 } 3507 3508 3509 template <typename T> 3510 T Simulator::FPAdd(T op1, T op2) { 3511 T result = FPProcessNaNs(op1, op2); 3512 if (std::isnan(result)) return result; 3513 3514 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 3515 // inf + -inf returns the default NaN. 3516 FPProcessException(); 3517 return FPDefaultNaN<T>(); 3518 } else { 3519 // Other cases should be handled by standard arithmetic. 3520 return op1 + op2; 3521 } 3522 } 3523 3524 3525 template <typename T> 3526 T Simulator::FPSub(T op1, T op2) { 3527 // NaNs should be handled elsewhere. 3528 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3529 3530 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 3531 // inf - inf returns the default NaN. 3532 FPProcessException(); 3533 return FPDefaultNaN<T>(); 3534 } else { 3535 // Other cases should be handled by standard arithmetic. 3536 return op1 - op2; 3537 } 3538 } 3539 3540 3541 template <typename T> 3542 T Simulator::FPMul(T op1, T op2) { 3543 // NaNs should be handled elsewhere. 3544 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3545 3546 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3547 // inf * 0.0 returns the default NaN. 3548 FPProcessException(); 3549 return FPDefaultNaN<T>(); 3550 } else { 3551 // Other cases should be handled by standard arithmetic. 3552 return op1 * op2; 3553 } 3554 } 3555 3556 3557 template<typename T> 3558 T Simulator::FPMulx(T op1, T op2) { 3559 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 3560 // inf * 0.0 returns +/-2.0. 3561 T two = 2.0; 3562 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3563 } 3564 return FPMul(op1, op2); 3565 } 3566 3567 3568 template<typename T> 3569 T Simulator::FPMulAdd(T a, T op1, T op2) { 3570 T result = FPProcessNaNs3(a, op1, op2); 3571 3572 T sign_a = copysign(1.0, a); 3573 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3574 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 3575 bool operation_generates_nan = 3576 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 3577 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 3578 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3579 3580 if (std::isnan(result)) { 3581 // Generated NaNs override quiet NaNs propagated from a. 3582 if (operation_generates_nan && IsQuietNaN(a)) { 3583 FPProcessException(); 3584 return FPDefaultNaN<T>(); 3585 } else { 3586 return result; 3587 } 3588 } 3589 3590 // If the operation would produce a NaN, return the default NaN. 3591 if (operation_generates_nan) { 3592 FPProcessException(); 3593 return FPDefaultNaN<T>(); 3594 } 3595 3596 // Work around broken fma implementations for exact zero results: The sign of 3597 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3598 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3599 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3600 } 3601 3602 result = FusedMultiplyAdd(op1, op2, a); 3603 VIXL_ASSERT(!std::isnan(result)); 3604 3605 // Work around broken fma implementations for rounded zero results: If a is 3606 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3607 if ((a == 0.0) && (result == 0.0)) { 3608 return copysign(0.0, sign_prod); 3609 } 3610 3611 return result; 3612 } 3613 3614 3615 template <typename T> 3616 T Simulator::FPDiv(T op1, T op2) { 3617 // NaNs should be handled elsewhere. 3618 VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2)); 3619 3620 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3621 // inf / inf and 0.0 / 0.0 return the default NaN. 3622 FPProcessException(); 3623 return FPDefaultNaN<T>(); 3624 } else { 3625 if (op2 == 0.0) FPProcessException(); 3626 3627 // Other cases should be handled by standard arithmetic. 3628 return op1 / op2; 3629 } 3630 } 3631 3632 3633 template <typename T> 3634 T Simulator::FPSqrt(T op) { 3635 if (std::isnan(op)) { 3636 return FPProcessNaN(op); 3637 } else if (op < 0.0) { 3638 FPProcessException(); 3639 return FPDefaultNaN<T>(); 3640 } else { 3641 return sqrt(op); 3642 } 3643 } 3644 3645 3646 template <typename T> 3647 T Simulator::FPMax(T a, T b) { 3648 T result = FPProcessNaNs(a, b); 3649 if (std::isnan(result)) return result; 3650 3651 if ((a == 0.0) && (b == 0.0) && 3652 (copysign(1.0, a) != copysign(1.0, b))) { 3653 // a and b are zero, and the sign differs: return +0.0. 3654 return 0.0; 3655 } else { 3656 return (a > b) ? a : b; 3657 } 3658 } 3659 3660 3661 template <typename T> 3662 T Simulator::FPMaxNM(T a, T b) { 3663 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3664 a = kFP64NegativeInfinity; 3665 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3666 b = kFP64NegativeInfinity; 3667 } 3668 3669 T result = FPProcessNaNs(a, b); 3670 return std::isnan(result) ? result : FPMax(a, b); 3671 } 3672 3673 3674 template <typename T> 3675 T Simulator::FPMin(T a, T b) { 3676 T result = FPProcessNaNs(a, b); 3677 if (std::isnan(result)) return result; 3678 3679 if ((a == 0.0) && (b == 0.0) && 3680 (copysign(1.0, a) != copysign(1.0, b))) { 3681 // a and b are zero, and the sign differs: return -0.0. 3682 return -0.0; 3683 } else { 3684 return (a < b) ? a : b; 3685 } 3686 } 3687 3688 3689 template <typename T> 3690 T Simulator::FPMinNM(T a, T b) { 3691 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3692 a = kFP64PositiveInfinity; 3693 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3694 b = kFP64PositiveInfinity; 3695 } 3696 3697 T result = FPProcessNaNs(a, b); 3698 return std::isnan(result) ? result : FPMin(a, b); 3699 } 3700 3701 3702 template <typename T> 3703 T Simulator::FPRecipStepFused(T op1, T op2) { 3704 const T two = 2.0; 3705 if ((std::isinf(op1) && (op2 == 0.0)) 3706 || ((op1 == 0.0) && (std::isinf(op2)))) { 3707 return two; 3708 } else if (std::isinf(op1) || std::isinf(op2)) { 3709 // Return +inf if signs match, otherwise -inf. 3710 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3711 : kFP64NegativeInfinity; 3712 } else { 3713 return FusedMultiplyAdd(op1, op2, two); 3714 } 3715 } 3716 3717 3718 template <typename T> 3719 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3720 const T one_point_five = 1.5; 3721 const T two = 2.0; 3722 3723 if ((std::isinf(op1) && (op2 == 0.0)) 3724 || ((op1 == 0.0) && (std::isinf(op2)))) { 3725 return one_point_five; 3726 } else if (std::isinf(op1) || std::isinf(op2)) { 3727 // Return +inf if signs match, otherwise -inf. 3728 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3729 : kFP64NegativeInfinity; 3730 } else { 3731 // The multiply-add-halve operation must be fully fused, so avoid interim 3732 // rounding by checking which operand can be losslessly divided by two 3733 // before doing the multiply-add. 3734 if (std::isnormal(op1 / two)) { 3735 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3736 } else if (std::isnormal(op2 / two)) { 3737 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3738 } else { 3739 // Neither operand is normal after halving: the result is dominated by 3740 // the addition term, so just return that. 3741 return one_point_five; 3742 } 3743 } 3744 } 3745 3746 3747 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3748 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3749 (value == kFP64NegativeInfinity)) { 3750 return value; 3751 } else if (std::isnan(value)) { 3752 return FPProcessNaN(value); 3753 } 3754 3755 double int_result = std::floor(value); 3756 double error = value - int_result; 3757 switch (round_mode) { 3758 case FPTieAway: { 3759 // Take care of correctly handling the range ]-0.5, -0.0], which must 3760 // yield -0.0. 3761 if ((-0.5 < value) && (value < 0.0)) { 3762 int_result = -0.0; 3763 3764 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3765 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3766 // result is positive, round up. 3767 int_result++; 3768 } 3769 break; 3770 } 3771 case FPTieEven: { 3772 // Take care of correctly handling the range [-0.5, -0.0], which must 3773 // yield -0.0. 3774 if ((-0.5 <= value) && (value < 0.0)) { 3775 int_result = -0.0; 3776 3777 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3778 // result is odd, round up. 3779 } else if ((error > 0.5) || 3780 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3781 int_result++; 3782 } 3783 break; 3784 } 3785 case FPZero: { 3786 // If value>0 then we take floor(value) 3787 // otherwise, ceil(value). 3788 if (value < 0) { 3789 int_result = ceil(value); 3790 } 3791 break; 3792 } 3793 case FPNegativeInfinity: { 3794 // We always use floor(value). 3795 break; 3796 } 3797 case FPPositiveInfinity: { 3798 // Take care of correctly handling the range ]-1.0, -0.0], which must 3799 // yield -0.0. 3800 if ((-1.0 < value) && (value < 0.0)) { 3801 int_result = -0.0; 3802 3803 // If the error is non-zero, round up. 3804 } else if (error > 0.0) { 3805 int_result++; 3806 } 3807 break; 3808 } 3809 default: VIXL_UNIMPLEMENTED(); 3810 } 3811 return int_result; 3812 } 3813 3814 3815 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3816 value = FPRoundInt(value, rmode); 3817 if (value >= kWMaxInt) { 3818 return kWMaxInt; 3819 } else if (value < kWMinInt) { 3820 return kWMinInt; 3821 } 3822 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3823 } 3824 3825 3826 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3827 value = FPRoundInt(value, rmode); 3828 if (value >= kXMaxInt) { 3829 return kXMaxInt; 3830 } else if (value < kXMinInt) { 3831 return kXMinInt; 3832 } 3833 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3834 } 3835 3836 3837 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3838 value = FPRoundInt(value, rmode); 3839 if (value >= kWMaxUInt) { 3840 return kWMaxUInt; 3841 } else if (value < 0.0) { 3842 return 0; 3843 } 3844 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3845 } 3846 3847 3848 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3849 value = FPRoundInt(value, rmode); 3850 if (value >= kXMaxUInt) { 3851 return kXMaxUInt; 3852 } else if (value < 0.0) { 3853 return 0; 3854 } 3855 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3856 } 3857 3858 3859 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3860 template <typename T> \ 3861 LogicVRegister Simulator::FN(VectorFormat vform, \ 3862 LogicVRegister dst, \ 3863 const LogicVRegister& src1, \ 3864 const LogicVRegister& src2) { \ 3865 dst.ClearForWrite(vform); \ 3866 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3867 T op1 = src1.Float<T>(i); \ 3868 T op2 = src2.Float<T>(i); \ 3869 T result; \ 3870 if (PROCNAN) { \ 3871 result = FPProcessNaNs(op1, op2); \ 3872 if (!std::isnan(result)) { \ 3873 result = OP(op1, op2); \ 3874 } \ 3875 } else { \ 3876 result = OP(op1, op2); \ 3877 } \ 3878 dst.SetFloat(i, result); \ 3879 } \ 3880 return dst; \ 3881 } \ 3882 \ 3883 LogicVRegister Simulator::FN(VectorFormat vform, \ 3884 LogicVRegister dst, \ 3885 const LogicVRegister& src1, \ 3886 const LogicVRegister& src2) { \ 3887 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 3888 FN<float>(vform, dst, src1, src2); \ 3889 } else { \ 3890 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 3891 FN<double>(vform, dst, src1, src2); \ 3892 } \ 3893 return dst; \ 3894 } 3895 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3896 #undef DEFINE_NEON_FP_VECTOR_OP 3897 3898 3899 LogicVRegister Simulator::fnmul(VectorFormat vform, 3900 LogicVRegister dst, 3901 const LogicVRegister& src1, 3902 const LogicVRegister& src2) { 3903 SimVRegister temp; 3904 LogicVRegister product = fmul(vform, temp, src1, src2); 3905 return fneg(vform, dst, product); 3906 } 3907 3908 3909 template <typename T> 3910 LogicVRegister Simulator::frecps(VectorFormat vform, 3911 LogicVRegister dst, 3912 const LogicVRegister& src1, 3913 const LogicVRegister& src2) { 3914 dst.ClearForWrite(vform); 3915 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3916 T op1 = -src1.Float<T>(i); 3917 T op2 = src2.Float<T>(i); 3918 T result = FPProcessNaNs(op1, op2); 3919 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3920 } 3921 return dst; 3922 } 3923 3924 3925 LogicVRegister Simulator::frecps(VectorFormat vform, 3926 LogicVRegister dst, 3927 const LogicVRegister& src1, 3928 const LogicVRegister& src2) { 3929 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3930 frecps<float>(vform, dst, src1, src2); 3931 } else { 3932 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3933 frecps<double>(vform, dst, src1, src2); 3934 } 3935 return dst; 3936 } 3937 3938 3939 template <typename T> 3940 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3941 LogicVRegister dst, 3942 const LogicVRegister& src1, 3943 const LogicVRegister& src2) { 3944 dst.ClearForWrite(vform); 3945 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3946 T op1 = -src1.Float<T>(i); 3947 T op2 = src2.Float<T>(i); 3948 T result = FPProcessNaNs(op1, op2); 3949 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3950 } 3951 return dst; 3952 } 3953 3954 3955 LogicVRegister Simulator::frsqrts(VectorFormat vform, 3956 LogicVRegister dst, 3957 const LogicVRegister& src1, 3958 const LogicVRegister& src2) { 3959 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 3960 frsqrts<float>(vform, dst, src1, src2); 3961 } else { 3962 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 3963 frsqrts<double>(vform, dst, src1, src2); 3964 } 3965 return dst; 3966 } 3967 3968 3969 template <typename T> 3970 LogicVRegister Simulator::fcmp(VectorFormat vform, 3971 LogicVRegister dst, 3972 const LogicVRegister& src1, 3973 const LogicVRegister& src2, 3974 Condition cond) { 3975 dst.ClearForWrite(vform); 3976 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3977 bool result = false; 3978 T op1 = src1.Float<T>(i); 3979 T op2 = src2.Float<T>(i); 3980 T nan_result = FPProcessNaNs(op1, op2); 3981 if (!std::isnan(nan_result)) { 3982 switch (cond) { 3983 case eq: result = (op1 == op2); break; 3984 case ge: result = (op1 >= op2); break; 3985 case gt: result = (op1 > op2) ; break; 3986 case le: result = (op1 <= op2); break; 3987 case lt: result = (op1 < op2) ; break; 3988 default: VIXL_UNREACHABLE(); break; 3989 } 3990 } 3991 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 3992 } 3993 return dst; 3994 } 3995 3996 3997 LogicVRegister Simulator::fcmp(VectorFormat vform, 3998 LogicVRegister dst, 3999 const LogicVRegister& src1, 4000 const LogicVRegister& src2, 4001 Condition cond) { 4002 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4003 fcmp<float>(vform, dst, src1, src2, cond); 4004 } else { 4005 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4006 fcmp<double>(vform, dst, src1, src2, cond); 4007 } 4008 return dst; 4009 } 4010 4011 4012 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4013 LogicVRegister dst, 4014 const LogicVRegister& src, 4015 Condition cond) { 4016 SimVRegister temp; 4017 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4018 LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0)); 4019 fcmp<float>(vform, dst, src, zero_reg, cond); 4020 } else { 4021 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4022 LogicVRegister zero_reg = dup_immediate(vform, temp, 4023 double_to_rawbits(0.0)); 4024 fcmp<double>(vform, dst, src, zero_reg, cond); 4025 } 4026 return dst; 4027 } 4028 4029 4030 LogicVRegister Simulator::fabscmp(VectorFormat vform, 4031 LogicVRegister dst, 4032 const LogicVRegister& src1, 4033 const LogicVRegister& src2, 4034 Condition cond) { 4035 SimVRegister temp1, temp2; 4036 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4037 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4038 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4039 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4040 } else { 4041 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4042 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4043 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4044 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4045 } 4046 return dst; 4047 } 4048 4049 4050 template <typename T> 4051 LogicVRegister Simulator::fmla(VectorFormat vform, 4052 LogicVRegister dst, 4053 const LogicVRegister& src1, 4054 const LogicVRegister& src2) { 4055 dst.ClearForWrite(vform); 4056 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4057 T op1 = src1.Float<T>(i); 4058 T op2 = src2.Float<T>(i); 4059 T acc = dst.Float<T>(i); 4060 T result = FPMulAdd(acc, op1, op2); 4061 dst.SetFloat(i, result); 4062 } 4063 return dst; 4064 } 4065 4066 4067 LogicVRegister Simulator::fmla(VectorFormat vform, 4068 LogicVRegister dst, 4069 const LogicVRegister& src1, 4070 const LogicVRegister& src2) { 4071 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4072 fmla<float>(vform, dst, src1, src2); 4073 } else { 4074 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4075 fmla<double>(vform, dst, src1, src2); 4076 } 4077 return dst; 4078 } 4079 4080 4081 template <typename T> 4082 LogicVRegister Simulator::fmls(VectorFormat vform, 4083 LogicVRegister dst, 4084 const LogicVRegister& src1, 4085 const LogicVRegister& src2) { 4086 dst.ClearForWrite(vform); 4087 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4088 T op1 = -src1.Float<T>(i); 4089 T op2 = src2.Float<T>(i); 4090 T acc = dst.Float<T>(i); 4091 T result = FPMulAdd(acc, op1, op2); 4092 dst.SetFloat(i, result); 4093 } 4094 return dst; 4095 } 4096 4097 4098 LogicVRegister Simulator::fmls(VectorFormat vform, 4099 LogicVRegister dst, 4100 const LogicVRegister& src1, 4101 const LogicVRegister& src2) { 4102 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4103 fmls<float>(vform, dst, src1, src2); 4104 } else { 4105 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4106 fmls<double>(vform, dst, src1, src2); 4107 } 4108 return dst; 4109 } 4110 4111 4112 template <typename T> 4113 LogicVRegister Simulator::fneg(VectorFormat vform, 4114 LogicVRegister dst, 4115 const LogicVRegister& src) { 4116 dst.ClearForWrite(vform); 4117 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4118 T op = src.Float<T>(i); 4119 op = -op; 4120 dst.SetFloat(i, op); 4121 } 4122 return dst; 4123 } 4124 4125 4126 LogicVRegister Simulator::fneg(VectorFormat vform, 4127 LogicVRegister dst, 4128 const LogicVRegister& src) { 4129 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4130 fneg<float>(vform, dst, src); 4131 } else { 4132 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4133 fneg<double>(vform, dst, src); 4134 } 4135 return dst; 4136 } 4137 4138 4139 template <typename T> 4140 LogicVRegister Simulator::fabs_(VectorFormat vform, 4141 LogicVRegister dst, 4142 const LogicVRegister& src) { 4143 dst.ClearForWrite(vform); 4144 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4145 T op = src.Float<T>(i); 4146 if (copysign(1.0, op) < 0.0) { 4147 op = -op; 4148 } 4149 dst.SetFloat(i, op); 4150 } 4151 return dst; 4152 } 4153 4154 4155 LogicVRegister Simulator::fabs_(VectorFormat vform, 4156 LogicVRegister dst, 4157 const LogicVRegister& src) { 4158 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4159 fabs_<float>(vform, dst, src); 4160 } else { 4161 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4162 fabs_<double>(vform, dst, src); 4163 } 4164 return dst; 4165 } 4166 4167 4168 LogicVRegister Simulator::fabd(VectorFormat vform, 4169 LogicVRegister dst, 4170 const LogicVRegister& src1, 4171 const LogicVRegister& src2) { 4172 SimVRegister temp; 4173 fsub(vform, temp, src1, src2); 4174 fabs_(vform, dst, temp); 4175 return dst; 4176 } 4177 4178 4179 LogicVRegister Simulator::fsqrt(VectorFormat vform, 4180 LogicVRegister dst, 4181 const LogicVRegister& src) { 4182 dst.ClearForWrite(vform); 4183 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4184 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4185 float result = FPSqrt(src.Float<float>(i)); 4186 dst.SetFloat(i, result); 4187 } 4188 } else { 4189 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4190 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4191 double result = FPSqrt(src.Float<double>(i)); 4192 dst.SetFloat(i, result); 4193 } 4194 } 4195 return dst; 4196 } 4197 4198 4199 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4200 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4201 LogicVRegister dst, \ 4202 const LogicVRegister& src1, \ 4203 const LogicVRegister& src2) { \ 4204 SimVRegister temp1, temp2; \ 4205 uzp1(vform, temp1, src1, src2); \ 4206 uzp2(vform, temp2, src1, src2); \ 4207 FN(vform, dst, temp1, temp2); \ 4208 return dst; \ 4209 } \ 4210 \ 4211 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4212 LogicVRegister dst, \ 4213 const LogicVRegister& src) { \ 4214 if (vform == kFormatS) { \ 4215 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4216 dst.SetFloat(0, result); \ 4217 } else { \ 4218 VIXL_ASSERT(vform == kFormatD); \ 4219 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4220 dst.SetFloat(0, result); \ 4221 } \ 4222 dst.ClearForWrite(vform); \ 4223 return dst; \ 4224 } 4225 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4226 #undef DEFINE_NEON_FP_PAIR_OP 4227 4228 4229 LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4230 LogicVRegister dst, 4231 const LogicVRegister& src, 4232 FPMinMaxOp Op) { 4233 VIXL_ASSERT(vform == kFormat4S); 4234 USE(vform); 4235 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 4236 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 4237 float result = (this->*Op)(result1, result2); 4238 dst.ClearForWrite(kFormatS); 4239 dst.SetFloat<float>(0, result); 4240 return dst; 4241 } 4242 4243 4244 LogicVRegister Simulator::fmaxv(VectorFormat vform, 4245 LogicVRegister dst, 4246 const LogicVRegister& src) { 4247 return fminmaxv(vform, dst, src, &Simulator::FPMax); 4248 } 4249 4250 4251 LogicVRegister Simulator::fminv(VectorFormat vform, 4252 LogicVRegister dst, 4253 const LogicVRegister& src) { 4254 return fminmaxv(vform, dst, src, &Simulator::FPMin); 4255 } 4256 4257 4258 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4259 LogicVRegister dst, 4260 const LogicVRegister& src) { 4261 return fminmaxv(vform, dst, src, &Simulator::FPMaxNM); 4262 } 4263 4264 4265 LogicVRegister Simulator::fminnmv(VectorFormat vform, 4266 LogicVRegister dst, 4267 const LogicVRegister& src) { 4268 return fminmaxv(vform, dst, src, &Simulator::FPMinNM); 4269 } 4270 4271 4272 LogicVRegister Simulator::fmul(VectorFormat vform, 4273 LogicVRegister dst, 4274 const LogicVRegister& src1, 4275 const LogicVRegister& src2, 4276 int index) { 4277 dst.ClearForWrite(vform); 4278 SimVRegister temp; 4279 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4280 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4281 fmul<float>(vform, dst, src1, index_reg); 4282 4283 } else { 4284 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4285 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4286 fmul<double>(vform, dst, src1, index_reg); 4287 } 4288 return dst; 4289 } 4290 4291 4292 LogicVRegister Simulator::fmla(VectorFormat vform, 4293 LogicVRegister dst, 4294 const LogicVRegister& src1, 4295 const LogicVRegister& src2, 4296 int index) { 4297 dst.ClearForWrite(vform); 4298 SimVRegister temp; 4299 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4300 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4301 fmla<float>(vform, dst, src1, index_reg); 4302 4303 } else { 4304 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4305 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4306 fmla<double>(vform, dst, src1, index_reg); 4307 } 4308 return dst; 4309 } 4310 4311 4312 LogicVRegister Simulator::fmls(VectorFormat vform, 4313 LogicVRegister dst, 4314 const LogicVRegister& src1, 4315 const LogicVRegister& src2, 4316 int index) { 4317 dst.ClearForWrite(vform); 4318 SimVRegister temp; 4319 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4320 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4321 fmls<float>(vform, dst, src1, index_reg); 4322 4323 } else { 4324 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4325 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4326 fmls<double>(vform, dst, src1, index_reg); 4327 } 4328 return dst; 4329 } 4330 4331 4332 LogicVRegister Simulator::fmulx(VectorFormat vform, 4333 LogicVRegister dst, 4334 const LogicVRegister& src1, 4335 const LogicVRegister& src2, 4336 int index) { 4337 dst.ClearForWrite(vform); 4338 SimVRegister temp; 4339 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4340 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4341 fmulx<float>(vform, dst, src1, index_reg); 4342 4343 } else { 4344 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4345 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4346 fmulx<double>(vform, dst, src1, index_reg); 4347 } 4348 return dst; 4349 } 4350 4351 4352 LogicVRegister Simulator::frint(VectorFormat vform, 4353 LogicVRegister dst, 4354 const LogicVRegister& src, 4355 FPRounding rounding_mode, 4356 bool inexact_exception) { 4357 dst.ClearForWrite(vform); 4358 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4359 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4360 float input = src.Float<float>(i); 4361 float rounded = FPRoundInt(input, rounding_mode); 4362 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4363 FPProcessException(); 4364 } 4365 dst.SetFloat<float>(i, rounded); 4366 } 4367 } else { 4368 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4369 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4370 double input = src.Float<double>(i); 4371 double rounded = FPRoundInt(input, rounding_mode); 4372 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 4373 FPProcessException(); 4374 } 4375 dst.SetFloat<double>(i, rounded); 4376 } 4377 } 4378 return dst; 4379 } 4380 4381 4382 LogicVRegister Simulator::fcvts(VectorFormat vform, 4383 LogicVRegister dst, 4384 const LogicVRegister& src, 4385 FPRounding rounding_mode, 4386 int fbits) { 4387 dst.ClearForWrite(vform); 4388 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4389 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4390 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4391 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4392 } 4393 } else { 4394 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4395 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4396 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4397 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4398 } 4399 } 4400 return dst; 4401 } 4402 4403 4404 LogicVRegister Simulator::fcvtu(VectorFormat vform, 4405 LogicVRegister dst, 4406 const LogicVRegister& src, 4407 FPRounding rounding_mode, 4408 int fbits) { 4409 dst.ClearForWrite(vform); 4410 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4411 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4412 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4413 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4414 } 4415 } else { 4416 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4417 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4418 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4419 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4420 } 4421 } 4422 return dst; 4423 } 4424 4425 4426 LogicVRegister Simulator::fcvtl(VectorFormat vform, 4427 LogicVRegister dst, 4428 const LogicVRegister& src) { 4429 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4430 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4431 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 4432 } 4433 } else { 4434 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4435 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4436 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 4437 } 4438 } 4439 return dst; 4440 } 4441 4442 4443 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4444 LogicVRegister dst, 4445 const LogicVRegister& src) { 4446 int lane_count = LaneCountFromFormat(vform); 4447 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4448 for (int i = 0; i < lane_count; i++) { 4449 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 4450 } 4451 } else { 4452 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4453 for (int i = 0; i < lane_count; i++) { 4454 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 4455 } 4456 } 4457 return dst; 4458 } 4459 4460 4461 LogicVRegister Simulator::fcvtn(VectorFormat vform, 4462 LogicVRegister dst, 4463 const LogicVRegister& src) { 4464 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4465 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4466 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 4467 } 4468 } else { 4469 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4470 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4471 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 4472 } 4473 } 4474 return dst; 4475 } 4476 4477 4478 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4479 LogicVRegister dst, 4480 const LogicVRegister& src) { 4481 int lane_count = LaneCountFromFormat(vform) / 2; 4482 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4483 for (int i = lane_count - 1; i >= 0; i--) { 4484 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 4485 } 4486 } else { 4487 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4488 for (int i = lane_count - 1; i >= 0; i--) { 4489 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 4490 } 4491 } 4492 return dst; 4493 } 4494 4495 4496 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4497 LogicVRegister dst, 4498 const LogicVRegister& src) { 4499 dst.ClearForWrite(vform); 4500 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4501 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4502 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4503 } 4504 return dst; 4505 } 4506 4507 4508 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4509 LogicVRegister dst, 4510 const LogicVRegister& src) { 4511 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4512 int lane_count = LaneCountFromFormat(vform) / 2; 4513 for (int i = lane_count - 1; i >= 0; i--) { 4514 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 4515 } 4516 return dst; 4517 } 4518 4519 4520 // Based on reference C function recip_sqrt_estimate from ARM ARM. 4521 double Simulator::recip_sqrt_estimate(double a) { 4522 int q0, q1, s; 4523 double r; 4524 if (a < 0.5) { 4525 q0 = static_cast<int>(a * 512.0); 4526 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4527 } else { 4528 q1 = static_cast<int>(a * 256.0); 4529 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4530 } 4531 s = static_cast<int>(256.0 * r + 0.5); 4532 return static_cast<double>(s) / 256.0; 4533 } 4534 4535 4536 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4537 return unsigned_bitextract_64(start_bit, end_bit, val); 4538 } 4539 4540 4541 template <typename T> 4542 T Simulator::FPRecipSqrtEstimate(T op) { 4543 if (std::isnan(op)) { 4544 return FPProcessNaN(op); 4545 } else if (op == 0.0) { 4546 if (copysign(1.0, op) < 0.0) { 4547 return kFP64NegativeInfinity; 4548 } else { 4549 return kFP64PositiveInfinity; 4550 } 4551 } else if (copysign(1.0, op) < 0.0) { 4552 FPProcessException(); 4553 return FPDefaultNaN<T>(); 4554 } else if (std::isinf(op)) { 4555 return 0.0; 4556 } else { 4557 uint64_t fraction; 4558 int exp, result_exp; 4559 4560 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4561 exp = float_exp(op); 4562 fraction = float_mantissa(op); 4563 fraction <<= 29; 4564 } else { 4565 exp = double_exp(op); 4566 fraction = double_mantissa(op); 4567 } 4568 4569 if (exp == 0) { 4570 while (Bits(fraction, 51, 51) == 0) { 4571 fraction = Bits(fraction, 50, 0) << 1; 4572 exp -= 1; 4573 } 4574 fraction = Bits(fraction, 50, 0) << 1; 4575 } 4576 4577 double scaled; 4578 if (Bits(exp, 0, 0) == 0) { 4579 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4580 } else { 4581 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); 4582 } 4583 4584 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4585 result_exp = (380 - exp) / 2; 4586 } else { 4587 result_exp = (3068 - exp) / 2; 4588 } 4589 4590 double estimate = recip_sqrt_estimate(scaled); 4591 4592 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4593 return float_pack(0, Bits(result_exp, 7, 0), 4594 Bits(double_to_rawbits(estimate), 51, 29)); 4595 } else { 4596 return double_pack(0, Bits(result_exp, 10, 0), 4597 Bits(double_to_rawbits(estimate), 51, 0)); 4598 } 4599 } 4600 } 4601 4602 4603 LogicVRegister Simulator::frsqrte(VectorFormat vform, 4604 LogicVRegister dst, 4605 const LogicVRegister& src) { 4606 dst.ClearForWrite(vform); 4607 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4608 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4609 float input = src.Float<float>(i); 4610 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 4611 } 4612 } else { 4613 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4614 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4615 double input = src.Float<double>(i); 4616 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 4617 } 4618 } 4619 return dst; 4620 } 4621 4622 template <typename T> 4623 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 4624 uint32_t sign; 4625 4626 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4627 sign = float_sign(op); 4628 } else { 4629 sign = double_sign(op); 4630 } 4631 4632 if (std::isnan(op)) { 4633 return FPProcessNaN(op); 4634 } else if (std::isinf(op)) { 4635 return (sign == 1) ? -0.0 : 0.0; 4636 } else if (op == 0.0) { 4637 FPProcessException(); // FPExc_DivideByZero exception. 4638 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4639 } else if (((sizeof(T) == sizeof(float)) && // NOLINT(runtime/sizeof) 4640 (std::fabs(op) < std::pow(2.0, -128.0))) || 4641 ((sizeof(T) == sizeof(double)) && // NOLINT(runtime/sizeof) 4642 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 4643 bool overflow_to_inf = false; 4644 switch (rounding) { 4645 case FPTieEven: overflow_to_inf = true; break; 4646 case FPPositiveInfinity: overflow_to_inf = (sign == 0); break; 4647 case FPNegativeInfinity: overflow_to_inf = (sign == 1); break; 4648 case FPZero: overflow_to_inf = false; break; 4649 default: break; 4650 } 4651 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 4652 if (overflow_to_inf) { 4653 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 4654 } else { 4655 // Return FPMaxNormal(sign). 4656 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4657 return float_pack(sign, 0xfe, 0x07fffff); 4658 } else { 4659 return double_pack(sign, 0x7fe, 0x0fffffffffffffl); 4660 } 4661 } 4662 } else { 4663 uint64_t fraction; 4664 int exp, result_exp; 4665 uint32_t sign; 4666 4667 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4668 sign = float_sign(op); 4669 exp = float_exp(op); 4670 fraction = float_mantissa(op); 4671 fraction <<= 29; 4672 } else { 4673 sign = double_sign(op); 4674 exp = double_exp(op); 4675 fraction = double_mantissa(op); 4676 } 4677 4678 if (exp == 0) { 4679 if (Bits(fraction, 51, 51) == 0) { 4680 exp -= 1; 4681 fraction = Bits(fraction, 49, 0) << 2; 4682 } else { 4683 fraction = Bits(fraction, 50, 0) << 1; 4684 } 4685 } 4686 4687 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4688 4689 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4690 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 4691 } else { 4692 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 4693 } 4694 4695 double estimate = recip_estimate(scaled); 4696 4697 fraction = double_mantissa(estimate); 4698 if (result_exp == 0) { 4699 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4700 } else if (result_exp == -1) { 4701 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4702 result_exp = 0; 4703 } 4704 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4705 return float_pack(sign, Bits(result_exp, 7, 0), Bits(fraction, 51, 29)); 4706 } else { 4707 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4708 } 4709 } 4710 } 4711 4712 4713 LogicVRegister Simulator::frecpe(VectorFormat vform, 4714 LogicVRegister dst, 4715 const LogicVRegister& src, 4716 FPRounding round) { 4717 dst.ClearForWrite(vform); 4718 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4719 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4720 float input = src.Float<float>(i); 4721 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4722 } 4723 } else { 4724 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4725 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4726 double input = src.Float<double>(i); 4727 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4728 } 4729 } 4730 return dst; 4731 } 4732 4733 4734 LogicVRegister Simulator::ursqrte(VectorFormat vform, 4735 LogicVRegister dst, 4736 const LogicVRegister& src) { 4737 dst.ClearForWrite(vform); 4738 uint32_t operand, result; 4739 double dp_operand, dp_result; 4740 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4741 operand = src.Uint(vform, i); 4742 if (operand <= 0x3FFFFFFF) { 4743 result = 0xFFFFFFFF; 4744 } else { 4745 dp_operand = operand * std::pow(2.0, -32); 4746 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4747 result = static_cast<uint32_t>(dp_result); 4748 } 4749 dst.SetUint(vform, i, result); 4750 } 4751 return dst; 4752 } 4753 4754 4755 // Based on reference C function recip_estimate from ARM ARM. 4756 double Simulator::recip_estimate(double a) { 4757 int q, s; 4758 double r; 4759 q = static_cast<int>(a * 512.0); 4760 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4761 s = static_cast<int>(256.0 * r + 0.5); 4762 return static_cast<double>(s) / 256.0; 4763 } 4764 4765 4766 LogicVRegister Simulator::urecpe(VectorFormat vform, 4767 LogicVRegister dst, 4768 const LogicVRegister& src) { 4769 dst.ClearForWrite(vform); 4770 uint32_t operand, result; 4771 double dp_operand, dp_result; 4772 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4773 operand = src.Uint(vform, i); 4774 if (operand <= 0x7FFFFFFF) { 4775 result = 0xFFFFFFFF; 4776 } else { 4777 dp_operand = operand * std::pow(2.0, -32); 4778 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4779 result = static_cast<uint32_t>(dp_result); 4780 } 4781 dst.SetUint(vform, i, result); 4782 } 4783 return dst; 4784 } 4785 4786 template <typename T> 4787 LogicVRegister Simulator::frecpx(VectorFormat vform, 4788 LogicVRegister dst, 4789 const LogicVRegister& src) { 4790 dst.ClearForWrite(vform); 4791 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4792 T op = src.Float<T>(i); 4793 T result; 4794 if (std::isnan(op)) { 4795 result = FPProcessNaN(op); 4796 } else { 4797 int exp; 4798 uint32_t sign; 4799 if (sizeof(T) == sizeof(float)) { // NOLINT(runtime/sizeof) 4800 sign = float_sign(op); 4801 exp = float_exp(op); 4802 exp = (exp == 0) ? (0xFF - 1) : Bits(~exp, 7, 0); 4803 result = float_pack(sign, exp, 0); 4804 } else { 4805 sign = double_sign(op); 4806 exp = double_exp(op); 4807 exp = (exp == 0) ? (0x7FF - 1) : Bits(~exp, 10, 0); 4808 result = double_pack(sign, exp, 0); 4809 } 4810 } 4811 dst.SetFloat(i, result); 4812 } 4813 return dst; 4814 } 4815 4816 4817 LogicVRegister Simulator::frecpx(VectorFormat vform, 4818 LogicVRegister dst, 4819 const LogicVRegister& src) { 4820 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4821 frecpx<float>(vform, dst, src); 4822 } else { 4823 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4824 frecpx<double>(vform, dst, src); 4825 } 4826 return dst; 4827 } 4828 4829 LogicVRegister Simulator::scvtf(VectorFormat vform, 4830 LogicVRegister dst, 4831 const LogicVRegister& src, 4832 int fbits, 4833 FPRounding round) { 4834 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4835 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4836 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4837 dst.SetFloat<float>(i, result); 4838 } else { 4839 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4840 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4841 dst.SetFloat<double>(i, result); 4842 } 4843 } 4844 return dst; 4845 } 4846 4847 4848 LogicVRegister Simulator::ucvtf(VectorFormat vform, 4849 LogicVRegister dst, 4850 const LogicVRegister& src, 4851 int fbits, 4852 FPRounding round) { 4853 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4854 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4855 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4856 dst.SetFloat<float>(i, result); 4857 } else { 4858 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4859 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4860 dst.SetFloat<double>(i, result); 4861 } 4862 } 4863 return dst; 4864 } 4865 4866 4867 } // namespace vixl 4868