1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64 28 29 #include <cmath> 30 31 #include "simulator-aarch64.h" 32 33 namespace vixl { 34 namespace aarch64 { 35 36 using vixl::internal::SimFloat16; 37 38 template <typename T> 39 bool IsFloat64() { 40 return false; 41 } 42 template <> 43 bool IsFloat64<double>() { 44 return true; 45 } 46 47 template <typename T> 48 bool IsFloat32() { 49 return false; 50 } 51 template <> 52 bool IsFloat32<float>() { 53 return true; 54 } 55 56 template <typename T> 57 bool IsFloat16() { 58 return false; 59 } 60 template <> 61 bool IsFloat16<Float16>() { 62 return true; 63 } 64 template <> 65 bool IsFloat16<SimFloat16>() { 66 return true; 67 } 68 69 template <> 70 double Simulator::FPDefaultNaN<double>() { 71 return kFP64DefaultNaN; 72 } 73 74 75 template <> 76 float Simulator::FPDefaultNaN<float>() { 77 return kFP32DefaultNaN; 78 } 79 80 81 template <> 82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() { 83 return SimFloat16(kFP16DefaultNaN); 84 } 85 86 87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 88 if (src >= 0) { 89 return UFixedToDouble(src, fbits, round); 90 } else if (src == INT64_MIN) { 91 return -UFixedToDouble(src, fbits, round); 92 } else { 93 return -UFixedToDouble(-src, fbits, round); 94 } 95 } 96 97 98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 99 // An input of 0 is a special case because the result is effectively 100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 101 if (src == 0) { 102 return 0.0; 103 } 104 105 // Calculate the exponent. The highest significant bit will have the value 106 // 2^exponent. 107 const int highest_significant_bit = 63 - CountLeadingZeros(src); 108 const int64_t exponent = highest_significant_bit - fbits; 109 110 return FPRoundToDouble(0, exponent, src, round); 111 } 112 113 114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 115 if (src >= 0) { 116 return UFixedToFloat(src, fbits, round); 117 } else if (src == INT64_MIN) { 118 return -UFixedToFloat(src, fbits, round); 119 } else { 120 return -UFixedToFloat(-src, fbits, round); 121 } 122 } 123 124 125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 126 // An input of 0 is a special case because the result is effectively 127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 128 if (src == 0) { 129 return 0.0f; 130 } 131 132 // Calculate the exponent. The highest significant bit will have the value 133 // 2^exponent. 134 const int highest_significant_bit = 63 - CountLeadingZeros(src); 135 const int32_t exponent = highest_significant_bit - fbits; 136 137 return FPRoundToFloat(0, exponent, src, round); 138 } 139 140 141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) { 142 if (src >= 0) { 143 return UFixedToFloat16(src, fbits, round); 144 } else if (src == INT64_MIN) { 145 return -UFixedToFloat16(src, fbits, round); 146 } else { 147 return -UFixedToFloat16(-src, fbits, round); 148 } 149 } 150 151 152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src, 153 int fbits, 154 FPRounding round) { 155 // An input of 0 is a special case because the result is effectively 156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 157 if (src == 0) { 158 return 0.0f; 159 } 160 161 // Calculate the exponent. The highest significant bit will have the value 162 // 2^exponent. 163 const int highest_significant_bit = 63 - CountLeadingZeros(src); 164 const int16_t exponent = highest_significant_bit - fbits; 165 166 return FPRoundToFloat16(0, exponent, src, round); 167 } 168 169 170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 171 dst.ClearForWrite(vform); 172 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 173 dst.ReadUintFromMem(vform, i, addr); 174 addr += LaneSizeInBytesFromFormat(vform); 175 } 176 } 177 178 179 void Simulator::ld1(VectorFormat vform, 180 LogicVRegister dst, 181 int index, 182 uint64_t addr) { 183 dst.ReadUintFromMem(vform, index, addr); 184 } 185 186 187 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 188 dst.ClearForWrite(vform); 189 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 190 dst.ReadUintFromMem(vform, i, addr); 191 } 192 } 193 194 195 void Simulator::ld2(VectorFormat vform, 196 LogicVRegister dst1, 197 LogicVRegister dst2, 198 uint64_t addr1) { 199 dst1.ClearForWrite(vform); 200 dst2.ClearForWrite(vform); 201 int esize = LaneSizeInBytesFromFormat(vform); 202 uint64_t addr2 = addr1 + esize; 203 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 204 dst1.ReadUintFromMem(vform, i, addr1); 205 dst2.ReadUintFromMem(vform, i, addr2); 206 addr1 += 2 * esize; 207 addr2 += 2 * esize; 208 } 209 } 210 211 212 void Simulator::ld2(VectorFormat vform, 213 LogicVRegister dst1, 214 LogicVRegister dst2, 215 int index, 216 uint64_t addr1) { 217 dst1.ClearForWrite(vform); 218 dst2.ClearForWrite(vform); 219 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 220 dst1.ReadUintFromMem(vform, index, addr1); 221 dst2.ReadUintFromMem(vform, index, addr2); 222 } 223 224 225 void Simulator::ld2r(VectorFormat vform, 226 LogicVRegister dst1, 227 LogicVRegister dst2, 228 uint64_t addr) { 229 dst1.ClearForWrite(vform); 230 dst2.ClearForWrite(vform); 231 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 232 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 233 dst1.ReadUintFromMem(vform, i, addr); 234 dst2.ReadUintFromMem(vform, i, addr2); 235 } 236 } 237 238 239 void Simulator::ld3(VectorFormat vform, 240 LogicVRegister dst1, 241 LogicVRegister dst2, 242 LogicVRegister dst3, 243 uint64_t addr1) { 244 dst1.ClearForWrite(vform); 245 dst2.ClearForWrite(vform); 246 dst3.ClearForWrite(vform); 247 int esize = LaneSizeInBytesFromFormat(vform); 248 uint64_t addr2 = addr1 + esize; 249 uint64_t addr3 = addr2 + esize; 250 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 251 dst1.ReadUintFromMem(vform, i, addr1); 252 dst2.ReadUintFromMem(vform, i, addr2); 253 dst3.ReadUintFromMem(vform, i, addr3); 254 addr1 += 3 * esize; 255 addr2 += 3 * esize; 256 addr3 += 3 * esize; 257 } 258 } 259 260 261 void Simulator::ld3(VectorFormat vform, 262 LogicVRegister dst1, 263 LogicVRegister dst2, 264 LogicVRegister dst3, 265 int index, 266 uint64_t addr1) { 267 dst1.ClearForWrite(vform); 268 dst2.ClearForWrite(vform); 269 dst3.ClearForWrite(vform); 270 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 271 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 272 dst1.ReadUintFromMem(vform, index, addr1); 273 dst2.ReadUintFromMem(vform, index, addr2); 274 dst3.ReadUintFromMem(vform, index, addr3); 275 } 276 277 278 void Simulator::ld3r(VectorFormat vform, 279 LogicVRegister dst1, 280 LogicVRegister dst2, 281 LogicVRegister dst3, 282 uint64_t addr) { 283 dst1.ClearForWrite(vform); 284 dst2.ClearForWrite(vform); 285 dst3.ClearForWrite(vform); 286 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 287 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 288 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 289 dst1.ReadUintFromMem(vform, i, addr); 290 dst2.ReadUintFromMem(vform, i, addr2); 291 dst3.ReadUintFromMem(vform, i, addr3); 292 } 293 } 294 295 296 void Simulator::ld4(VectorFormat vform, 297 LogicVRegister dst1, 298 LogicVRegister dst2, 299 LogicVRegister dst3, 300 LogicVRegister dst4, 301 uint64_t addr1) { 302 dst1.ClearForWrite(vform); 303 dst2.ClearForWrite(vform); 304 dst3.ClearForWrite(vform); 305 dst4.ClearForWrite(vform); 306 int esize = LaneSizeInBytesFromFormat(vform); 307 uint64_t addr2 = addr1 + esize; 308 uint64_t addr3 = addr2 + esize; 309 uint64_t addr4 = addr3 + esize; 310 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 311 dst1.ReadUintFromMem(vform, i, addr1); 312 dst2.ReadUintFromMem(vform, i, addr2); 313 dst3.ReadUintFromMem(vform, i, addr3); 314 dst4.ReadUintFromMem(vform, i, addr4); 315 addr1 += 4 * esize; 316 addr2 += 4 * esize; 317 addr3 += 4 * esize; 318 addr4 += 4 * esize; 319 } 320 } 321 322 323 void Simulator::ld4(VectorFormat vform, 324 LogicVRegister dst1, 325 LogicVRegister dst2, 326 LogicVRegister dst3, 327 LogicVRegister dst4, 328 int index, 329 uint64_t addr1) { 330 dst1.ClearForWrite(vform); 331 dst2.ClearForWrite(vform); 332 dst3.ClearForWrite(vform); 333 dst4.ClearForWrite(vform); 334 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 335 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 336 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 337 dst1.ReadUintFromMem(vform, index, addr1); 338 dst2.ReadUintFromMem(vform, index, addr2); 339 dst3.ReadUintFromMem(vform, index, addr3); 340 dst4.ReadUintFromMem(vform, index, addr4); 341 } 342 343 344 void Simulator::ld4r(VectorFormat vform, 345 LogicVRegister dst1, 346 LogicVRegister dst2, 347 LogicVRegister dst3, 348 LogicVRegister dst4, 349 uint64_t addr) { 350 dst1.ClearForWrite(vform); 351 dst2.ClearForWrite(vform); 352 dst3.ClearForWrite(vform); 353 dst4.ClearForWrite(vform); 354 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 355 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 356 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 357 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 358 dst1.ReadUintFromMem(vform, i, addr); 359 dst2.ReadUintFromMem(vform, i, addr2); 360 dst3.ReadUintFromMem(vform, i, addr3); 361 dst4.ReadUintFromMem(vform, i, addr4); 362 } 363 } 364 365 366 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 367 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 368 src.WriteUintToMem(vform, i, addr); 369 addr += LaneSizeInBytesFromFormat(vform); 370 } 371 } 372 373 374 void Simulator::st1(VectorFormat vform, 375 LogicVRegister src, 376 int index, 377 uint64_t addr) { 378 src.WriteUintToMem(vform, index, addr); 379 } 380 381 382 void Simulator::st2(VectorFormat vform, 383 LogicVRegister dst, 384 LogicVRegister dst2, 385 uint64_t addr) { 386 int esize = LaneSizeInBytesFromFormat(vform); 387 uint64_t addr2 = addr + esize; 388 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 389 dst.WriteUintToMem(vform, i, addr); 390 dst2.WriteUintToMem(vform, i, addr2); 391 addr += 2 * esize; 392 addr2 += 2 * esize; 393 } 394 } 395 396 397 void Simulator::st2(VectorFormat vform, 398 LogicVRegister dst, 399 LogicVRegister dst2, 400 int index, 401 uint64_t addr) { 402 int esize = LaneSizeInBytesFromFormat(vform); 403 dst.WriteUintToMem(vform, index, addr); 404 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 405 } 406 407 408 void Simulator::st3(VectorFormat vform, 409 LogicVRegister dst, 410 LogicVRegister dst2, 411 LogicVRegister dst3, 412 uint64_t addr) { 413 int esize = LaneSizeInBytesFromFormat(vform); 414 uint64_t addr2 = addr + esize; 415 uint64_t addr3 = addr2 + esize; 416 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 417 dst.WriteUintToMem(vform, i, addr); 418 dst2.WriteUintToMem(vform, i, addr2); 419 dst3.WriteUintToMem(vform, i, addr3); 420 addr += 3 * esize; 421 addr2 += 3 * esize; 422 addr3 += 3 * esize; 423 } 424 } 425 426 427 void Simulator::st3(VectorFormat vform, 428 LogicVRegister dst, 429 LogicVRegister dst2, 430 LogicVRegister dst3, 431 int index, 432 uint64_t addr) { 433 int esize = LaneSizeInBytesFromFormat(vform); 434 dst.WriteUintToMem(vform, index, addr); 435 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 436 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 437 } 438 439 440 void Simulator::st4(VectorFormat vform, 441 LogicVRegister dst, 442 LogicVRegister dst2, 443 LogicVRegister dst3, 444 LogicVRegister dst4, 445 uint64_t addr) { 446 int esize = LaneSizeInBytesFromFormat(vform); 447 uint64_t addr2 = addr + esize; 448 uint64_t addr3 = addr2 + esize; 449 uint64_t addr4 = addr3 + esize; 450 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 451 dst.WriteUintToMem(vform, i, addr); 452 dst2.WriteUintToMem(vform, i, addr2); 453 dst3.WriteUintToMem(vform, i, addr3); 454 dst4.WriteUintToMem(vform, i, addr4); 455 addr += 4 * esize; 456 addr2 += 4 * esize; 457 addr3 += 4 * esize; 458 addr4 += 4 * esize; 459 } 460 } 461 462 463 void Simulator::st4(VectorFormat vform, 464 LogicVRegister dst, 465 LogicVRegister dst2, 466 LogicVRegister dst3, 467 LogicVRegister dst4, 468 int index, 469 uint64_t addr) { 470 int esize = LaneSizeInBytesFromFormat(vform); 471 dst.WriteUintToMem(vform, index, addr); 472 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 473 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 474 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 475 } 476 477 478 LogicVRegister Simulator::cmp(VectorFormat vform, 479 LogicVRegister dst, 480 const LogicVRegister& src1, 481 const LogicVRegister& src2, 482 Condition cond) { 483 dst.ClearForWrite(vform); 484 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 485 int64_t sa = src1.Int(vform, i); 486 int64_t sb = src2.Int(vform, i); 487 uint64_t ua = src1.Uint(vform, i); 488 uint64_t ub = src2.Uint(vform, i); 489 bool result = false; 490 switch (cond) { 491 case eq: 492 result = (ua == ub); 493 break; 494 case ge: 495 result = (sa >= sb); 496 break; 497 case gt: 498 result = (sa > sb); 499 break; 500 case hi: 501 result = (ua > ub); 502 break; 503 case hs: 504 result = (ua >= ub); 505 break; 506 case lt: 507 result = (sa < sb); 508 break; 509 case le: 510 result = (sa <= sb); 511 break; 512 default: 513 VIXL_UNREACHABLE(); 514 break; 515 } 516 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 517 } 518 return dst; 519 } 520 521 522 LogicVRegister Simulator::cmp(VectorFormat vform, 523 LogicVRegister dst, 524 const LogicVRegister& src1, 525 int imm, 526 Condition cond) { 527 SimVRegister temp; 528 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 529 return cmp(vform, dst, src1, imm_reg, cond); 530 } 531 532 533 LogicVRegister Simulator::cmptst(VectorFormat vform, 534 LogicVRegister dst, 535 const LogicVRegister& src1, 536 const LogicVRegister& src2) { 537 dst.ClearForWrite(vform); 538 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 539 uint64_t ua = src1.Uint(vform, i); 540 uint64_t ub = src2.Uint(vform, i); 541 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 542 } 543 return dst; 544 } 545 546 547 LogicVRegister Simulator::add(VectorFormat vform, 548 LogicVRegister dst, 549 const LogicVRegister& src1, 550 const LogicVRegister& src2) { 551 int lane_size = LaneSizeInBitsFromFormat(vform); 552 dst.ClearForWrite(vform); 553 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 554 // Test for unsigned saturation. 555 uint64_t ua = src1.UintLeftJustified(vform, i); 556 uint64_t ub = src2.UintLeftJustified(vform, i); 557 uint64_t ur = ua + ub; 558 if (ur < ua) { 559 dst.SetUnsignedSat(i, true); 560 } 561 562 // Test for signed saturation. 563 bool pos_a = (ua >> 63) == 0; 564 bool pos_b = (ub >> 63) == 0; 565 bool pos_r = (ur >> 63) == 0; 566 // If the signs of the operands are the same, but different from the result, 567 // there was an overflow. 568 if ((pos_a == pos_b) && (pos_a != pos_r)) { 569 dst.SetSignedSat(i, pos_a); 570 } 571 572 dst.SetInt(vform, i, ur >> (64 - lane_size)); 573 } 574 return dst; 575 } 576 577 578 LogicVRegister Simulator::addp(VectorFormat vform, 579 LogicVRegister dst, 580 const LogicVRegister& src1, 581 const LogicVRegister& src2) { 582 SimVRegister temp1, temp2; 583 uzp1(vform, temp1, src1, src2); 584 uzp2(vform, temp2, src1, src2); 585 add(vform, dst, temp1, temp2); 586 return dst; 587 } 588 589 590 LogicVRegister Simulator::mla(VectorFormat vform, 591 LogicVRegister dst, 592 const LogicVRegister& src1, 593 const LogicVRegister& src2) { 594 SimVRegister temp; 595 mul(vform, temp, src1, src2); 596 add(vform, dst, dst, temp); 597 return dst; 598 } 599 600 601 LogicVRegister Simulator::mls(VectorFormat vform, 602 LogicVRegister dst, 603 const LogicVRegister& src1, 604 const LogicVRegister& src2) { 605 SimVRegister temp; 606 mul(vform, temp, src1, src2); 607 sub(vform, dst, dst, temp); 608 return dst; 609 } 610 611 612 LogicVRegister Simulator::mul(VectorFormat vform, 613 LogicVRegister dst, 614 const LogicVRegister& src1, 615 const LogicVRegister& src2) { 616 dst.ClearForWrite(vform); 617 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 618 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 619 } 620 return dst; 621 } 622 623 624 LogicVRegister Simulator::mul(VectorFormat vform, 625 LogicVRegister dst, 626 const LogicVRegister& src1, 627 const LogicVRegister& src2, 628 int index) { 629 SimVRegister temp; 630 VectorFormat indexform = VectorFormatFillQ(vform); 631 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 632 } 633 634 635 LogicVRegister Simulator::mla(VectorFormat vform, 636 LogicVRegister dst, 637 const LogicVRegister& src1, 638 const LogicVRegister& src2, 639 int index) { 640 SimVRegister temp; 641 VectorFormat indexform = VectorFormatFillQ(vform); 642 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 643 } 644 645 646 LogicVRegister Simulator::mls(VectorFormat vform, 647 LogicVRegister dst, 648 const LogicVRegister& src1, 649 const LogicVRegister& src2, 650 int index) { 651 SimVRegister temp; 652 VectorFormat indexform = VectorFormatFillQ(vform); 653 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 654 } 655 656 657 LogicVRegister Simulator::smull(VectorFormat vform, 658 LogicVRegister dst, 659 const LogicVRegister& src1, 660 const LogicVRegister& src2, 661 int index) { 662 SimVRegister temp; 663 VectorFormat indexform = 664 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 665 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 666 } 667 668 669 LogicVRegister Simulator::smull2(VectorFormat vform, 670 LogicVRegister dst, 671 const LogicVRegister& src1, 672 const LogicVRegister& src2, 673 int index) { 674 SimVRegister temp; 675 VectorFormat indexform = 676 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 677 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 678 } 679 680 681 LogicVRegister Simulator::umull(VectorFormat vform, 682 LogicVRegister dst, 683 const LogicVRegister& src1, 684 const LogicVRegister& src2, 685 int index) { 686 SimVRegister temp; 687 VectorFormat indexform = 688 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 689 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 690 } 691 692 693 LogicVRegister Simulator::umull2(VectorFormat vform, 694 LogicVRegister dst, 695 const LogicVRegister& src1, 696 const LogicVRegister& src2, 697 int index) { 698 SimVRegister temp; 699 VectorFormat indexform = 700 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 701 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 702 } 703 704 705 LogicVRegister Simulator::smlal(VectorFormat vform, 706 LogicVRegister dst, 707 const LogicVRegister& src1, 708 const LogicVRegister& src2, 709 int index) { 710 SimVRegister temp; 711 VectorFormat indexform = 712 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 713 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 714 } 715 716 717 LogicVRegister Simulator::smlal2(VectorFormat vform, 718 LogicVRegister dst, 719 const LogicVRegister& src1, 720 const LogicVRegister& src2, 721 int index) { 722 SimVRegister temp; 723 VectorFormat indexform = 724 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 725 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 726 } 727 728 729 LogicVRegister Simulator::umlal(VectorFormat vform, 730 LogicVRegister dst, 731 const LogicVRegister& src1, 732 const LogicVRegister& src2, 733 int index) { 734 SimVRegister temp; 735 VectorFormat indexform = 736 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 737 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 738 } 739 740 741 LogicVRegister Simulator::umlal2(VectorFormat vform, 742 LogicVRegister dst, 743 const LogicVRegister& src1, 744 const LogicVRegister& src2, 745 int index) { 746 SimVRegister temp; 747 VectorFormat indexform = 748 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 749 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 750 } 751 752 753 LogicVRegister Simulator::smlsl(VectorFormat vform, 754 LogicVRegister dst, 755 const LogicVRegister& src1, 756 const LogicVRegister& src2, 757 int index) { 758 SimVRegister temp; 759 VectorFormat indexform = 760 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 761 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 762 } 763 764 765 LogicVRegister Simulator::smlsl2(VectorFormat vform, 766 LogicVRegister dst, 767 const LogicVRegister& src1, 768 const LogicVRegister& src2, 769 int index) { 770 SimVRegister temp; 771 VectorFormat indexform = 772 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 773 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 774 } 775 776 777 LogicVRegister Simulator::umlsl(VectorFormat vform, 778 LogicVRegister dst, 779 const LogicVRegister& src1, 780 const LogicVRegister& src2, 781 int index) { 782 SimVRegister temp; 783 VectorFormat indexform = 784 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 785 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 786 } 787 788 789 LogicVRegister Simulator::umlsl2(VectorFormat vform, 790 LogicVRegister dst, 791 const LogicVRegister& src1, 792 const LogicVRegister& src2, 793 int index) { 794 SimVRegister temp; 795 VectorFormat indexform = 796 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 797 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 798 } 799 800 801 LogicVRegister Simulator::sqdmull(VectorFormat vform, 802 LogicVRegister dst, 803 const LogicVRegister& src1, 804 const LogicVRegister& src2, 805 int index) { 806 SimVRegister temp; 807 VectorFormat indexform = 808 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 809 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 810 } 811 812 813 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 814 LogicVRegister dst, 815 const LogicVRegister& src1, 816 const LogicVRegister& src2, 817 int index) { 818 SimVRegister temp; 819 VectorFormat indexform = 820 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 821 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 822 } 823 824 825 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 826 LogicVRegister dst, 827 const LogicVRegister& src1, 828 const LogicVRegister& src2, 829 int index) { 830 SimVRegister temp; 831 VectorFormat indexform = 832 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 833 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 834 } 835 836 837 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 838 LogicVRegister dst, 839 const LogicVRegister& src1, 840 const LogicVRegister& src2, 841 int index) { 842 SimVRegister temp; 843 VectorFormat indexform = 844 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 845 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 846 } 847 848 849 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 850 LogicVRegister dst, 851 const LogicVRegister& src1, 852 const LogicVRegister& src2, 853 int index) { 854 SimVRegister temp; 855 VectorFormat indexform = 856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 857 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 858 } 859 860 861 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 862 LogicVRegister dst, 863 const LogicVRegister& src1, 864 const LogicVRegister& src2, 865 int index) { 866 SimVRegister temp; 867 VectorFormat indexform = 868 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 869 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 870 } 871 872 873 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 874 LogicVRegister dst, 875 const LogicVRegister& src1, 876 const LogicVRegister& src2, 877 int index) { 878 SimVRegister temp; 879 VectorFormat indexform = VectorFormatFillQ(vform); 880 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 881 } 882 883 884 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 885 LogicVRegister dst, 886 const LogicVRegister& src1, 887 const LogicVRegister& src2, 888 int index) { 889 SimVRegister temp; 890 VectorFormat indexform = VectorFormatFillQ(vform); 891 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 892 } 893 894 895 LogicVRegister Simulator::sdot(VectorFormat vform, 896 LogicVRegister dst, 897 const LogicVRegister& src1, 898 const LogicVRegister& src2, 899 int index) { 900 SimVRegister temp; 901 VectorFormat indexform = VectorFormatFillQ(vform); 902 return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index)); 903 } 904 905 906 LogicVRegister Simulator::sqrdmlah(VectorFormat vform, 907 LogicVRegister dst, 908 const LogicVRegister& src1, 909 const LogicVRegister& src2, 910 int index) { 911 SimVRegister temp; 912 VectorFormat indexform = VectorFormatFillQ(vform); 913 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index)); 914 } 915 916 917 LogicVRegister Simulator::udot(VectorFormat vform, 918 LogicVRegister dst, 919 const LogicVRegister& src1, 920 const LogicVRegister& src2, 921 int index) { 922 SimVRegister temp; 923 VectorFormat indexform = VectorFormatFillQ(vform); 924 return udot(vform, dst, src1, dup_element(indexform, temp, src2, index)); 925 } 926 927 928 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, 929 LogicVRegister dst, 930 const LogicVRegister& src1, 931 const LogicVRegister& src2, 932 int index) { 933 SimVRegister temp; 934 VectorFormat indexform = VectorFormatFillQ(vform); 935 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 936 } 937 938 939 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const { 940 uint16_t result = 0; 941 uint16_t extended_op2 = op2; 942 for (int i = 0; i < 8; ++i) { 943 if ((op1 >> i) & 1) { 944 result = result ^ (extended_op2 << i); 945 } 946 } 947 return result; 948 } 949 950 951 LogicVRegister Simulator::pmul(VectorFormat vform, 952 LogicVRegister dst, 953 const LogicVRegister& src1, 954 const LogicVRegister& src2) { 955 dst.ClearForWrite(vform); 956 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 957 dst.SetUint(vform, 958 i, 959 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 960 } 961 return dst; 962 } 963 964 965 LogicVRegister Simulator::pmull(VectorFormat vform, 966 LogicVRegister dst, 967 const LogicVRegister& src1, 968 const LogicVRegister& src2) { 969 VectorFormat vform_src = VectorFormatHalfWidth(vform); 970 dst.ClearForWrite(vform); 971 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 972 dst.SetUint(vform, 973 i, 974 PolynomialMult(src1.Uint(vform_src, i), 975 src2.Uint(vform_src, i))); 976 } 977 return dst; 978 } 979 980 981 LogicVRegister Simulator::pmull2(VectorFormat vform, 982 LogicVRegister dst, 983 const LogicVRegister& src1, 984 const LogicVRegister& src2) { 985 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 986 dst.ClearForWrite(vform); 987 int lane_count = LaneCountFromFormat(vform); 988 for (int i = 0; i < lane_count; i++) { 989 dst.SetUint(vform, 990 i, 991 PolynomialMult(src1.Uint(vform_src, lane_count + i), 992 src2.Uint(vform_src, lane_count + i))); 993 } 994 return dst; 995 } 996 997 998 LogicVRegister Simulator::sub(VectorFormat vform, 999 LogicVRegister dst, 1000 const LogicVRegister& src1, 1001 const LogicVRegister& src2) { 1002 int lane_size = LaneSizeInBitsFromFormat(vform); 1003 dst.ClearForWrite(vform); 1004 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1005 // Test for unsigned saturation. 1006 uint64_t ua = src1.UintLeftJustified(vform, i); 1007 uint64_t ub = src2.UintLeftJustified(vform, i); 1008 uint64_t ur = ua - ub; 1009 if (ub > ua) { 1010 dst.SetUnsignedSat(i, false); 1011 } 1012 1013 // Test for signed saturation. 1014 bool pos_a = (ua >> 63) == 0; 1015 bool pos_b = (ub >> 63) == 0; 1016 bool pos_r = (ur >> 63) == 0; 1017 // If the signs of the operands are different, and the sign of the first 1018 // operand doesn't match the result, there was an overflow. 1019 if ((pos_a != pos_b) && (pos_a != pos_r)) { 1020 dst.SetSignedSat(i, pos_a); 1021 } 1022 1023 dst.SetInt(vform, i, ur >> (64 - lane_size)); 1024 } 1025 return dst; 1026 } 1027 1028 1029 LogicVRegister Simulator::and_(VectorFormat vform, 1030 LogicVRegister dst, 1031 const LogicVRegister& src1, 1032 const LogicVRegister& src2) { 1033 dst.ClearForWrite(vform); 1034 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1035 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 1036 } 1037 return dst; 1038 } 1039 1040 1041 LogicVRegister Simulator::orr(VectorFormat vform, 1042 LogicVRegister dst, 1043 const LogicVRegister& src1, 1044 const LogicVRegister& src2) { 1045 dst.ClearForWrite(vform); 1046 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1047 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1048 } 1049 return dst; 1050 } 1051 1052 1053 LogicVRegister Simulator::orn(VectorFormat vform, 1054 LogicVRegister dst, 1055 const LogicVRegister& src1, 1056 const LogicVRegister& src2) { 1057 dst.ClearForWrite(vform); 1058 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1059 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1060 } 1061 return dst; 1062 } 1063 1064 1065 LogicVRegister Simulator::eor(VectorFormat vform, 1066 LogicVRegister dst, 1067 const LogicVRegister& src1, 1068 const LogicVRegister& src2) { 1069 dst.ClearForWrite(vform); 1070 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1071 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1072 } 1073 return dst; 1074 } 1075 1076 1077 LogicVRegister Simulator::bic(VectorFormat vform, 1078 LogicVRegister dst, 1079 const LogicVRegister& src1, 1080 const LogicVRegister& src2) { 1081 dst.ClearForWrite(vform); 1082 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1083 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1084 } 1085 return dst; 1086 } 1087 1088 1089 LogicVRegister Simulator::bic(VectorFormat vform, 1090 LogicVRegister dst, 1091 const LogicVRegister& src, 1092 uint64_t imm) { 1093 uint64_t result[16]; 1094 int laneCount = LaneCountFromFormat(vform); 1095 for (int i = 0; i < laneCount; ++i) { 1096 result[i] = src.Uint(vform, i) & ~imm; 1097 } 1098 dst.ClearForWrite(vform); 1099 for (int i = 0; i < laneCount; ++i) { 1100 dst.SetUint(vform, i, result[i]); 1101 } 1102 return dst; 1103 } 1104 1105 1106 LogicVRegister Simulator::bif(VectorFormat vform, 1107 LogicVRegister dst, 1108 const LogicVRegister& src1, 1109 const LogicVRegister& src2) { 1110 dst.ClearForWrite(vform); 1111 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1112 uint64_t operand1 = dst.Uint(vform, i); 1113 uint64_t operand2 = ~src2.Uint(vform, i); 1114 uint64_t operand3 = src1.Uint(vform, i); 1115 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1116 dst.SetUint(vform, i, result); 1117 } 1118 return dst; 1119 } 1120 1121 1122 LogicVRegister Simulator::bit(VectorFormat vform, 1123 LogicVRegister dst, 1124 const LogicVRegister& src1, 1125 const LogicVRegister& src2) { 1126 dst.ClearForWrite(vform); 1127 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1128 uint64_t operand1 = dst.Uint(vform, i); 1129 uint64_t operand2 = src2.Uint(vform, i); 1130 uint64_t operand3 = src1.Uint(vform, i); 1131 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1132 dst.SetUint(vform, i, result); 1133 } 1134 return dst; 1135 } 1136 1137 1138 LogicVRegister Simulator::bsl(VectorFormat vform, 1139 LogicVRegister dst, 1140 const LogicVRegister& src1, 1141 const LogicVRegister& src2) { 1142 dst.ClearForWrite(vform); 1143 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1144 uint64_t operand1 = src2.Uint(vform, i); 1145 uint64_t operand2 = dst.Uint(vform, i); 1146 uint64_t operand3 = src1.Uint(vform, i); 1147 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1148 dst.SetUint(vform, i, result); 1149 } 1150 return dst; 1151 } 1152 1153 1154 LogicVRegister Simulator::sminmax(VectorFormat vform, 1155 LogicVRegister dst, 1156 const LogicVRegister& src1, 1157 const LogicVRegister& src2, 1158 bool max) { 1159 dst.ClearForWrite(vform); 1160 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1161 int64_t src1_val = src1.Int(vform, i); 1162 int64_t src2_val = src2.Int(vform, i); 1163 int64_t dst_val; 1164 if (max) { 1165 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1166 } else { 1167 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1168 } 1169 dst.SetInt(vform, i, dst_val); 1170 } 1171 return dst; 1172 } 1173 1174 1175 LogicVRegister Simulator::smax(VectorFormat vform, 1176 LogicVRegister dst, 1177 const LogicVRegister& src1, 1178 const LogicVRegister& src2) { 1179 return sminmax(vform, dst, src1, src2, true); 1180 } 1181 1182 1183 LogicVRegister Simulator::smin(VectorFormat vform, 1184 LogicVRegister dst, 1185 const LogicVRegister& src1, 1186 const LogicVRegister& src2) { 1187 return sminmax(vform, dst, src1, src2, false); 1188 } 1189 1190 1191 LogicVRegister Simulator::sminmaxp(VectorFormat vform, 1192 LogicVRegister dst, 1193 const LogicVRegister& src1, 1194 const LogicVRegister& src2, 1195 bool max) { 1196 int lanes = LaneCountFromFormat(vform); 1197 int64_t result[kMaxLanesPerVector]; 1198 const LogicVRegister* src = &src1; 1199 for (int j = 0; j < 2; j++) { 1200 for (int i = 0; i < lanes; i += 2) { 1201 int64_t first_val = src->Int(vform, i); 1202 int64_t second_val = src->Int(vform, i + 1); 1203 int64_t dst_val; 1204 if (max) { 1205 dst_val = (first_val > second_val) ? first_val : second_val; 1206 } else { 1207 dst_val = (first_val < second_val) ? first_val : second_val; 1208 } 1209 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1210 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1211 } 1212 src = &src2; 1213 } 1214 dst.SetIntArray(vform, result); 1215 return dst; 1216 } 1217 1218 1219 LogicVRegister Simulator::smaxp(VectorFormat vform, 1220 LogicVRegister dst, 1221 const LogicVRegister& src1, 1222 const LogicVRegister& src2) { 1223 return sminmaxp(vform, dst, src1, src2, true); 1224 } 1225 1226 1227 LogicVRegister Simulator::sminp(VectorFormat vform, 1228 LogicVRegister dst, 1229 const LogicVRegister& src1, 1230 const LogicVRegister& src2) { 1231 return sminmaxp(vform, dst, src1, src2, false); 1232 } 1233 1234 1235 LogicVRegister Simulator::addp(VectorFormat vform, 1236 LogicVRegister dst, 1237 const LogicVRegister& src) { 1238 VIXL_ASSERT(vform == kFormatD); 1239 1240 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); 1241 dst.ClearForWrite(vform); 1242 dst.SetUint(vform, 0, dst_val); 1243 return dst; 1244 } 1245 1246 1247 LogicVRegister Simulator::addv(VectorFormat vform, 1248 LogicVRegister dst, 1249 const LogicVRegister& src) { 1250 VectorFormat vform_dst = 1251 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1252 1253 1254 int64_t dst_val = 0; 1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1256 dst_val += src.Int(vform, i); 1257 } 1258 1259 dst.ClearForWrite(vform_dst); 1260 dst.SetInt(vform_dst, 0, dst_val); 1261 return dst; 1262 } 1263 1264 1265 LogicVRegister Simulator::saddlv(VectorFormat vform, 1266 LogicVRegister dst, 1267 const LogicVRegister& src) { 1268 VectorFormat vform_dst = 1269 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1270 1271 int64_t dst_val = 0; 1272 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1273 dst_val += src.Int(vform, i); 1274 } 1275 1276 dst.ClearForWrite(vform_dst); 1277 dst.SetInt(vform_dst, 0, dst_val); 1278 return dst; 1279 } 1280 1281 1282 LogicVRegister Simulator::uaddlv(VectorFormat vform, 1283 LogicVRegister dst, 1284 const LogicVRegister& src) { 1285 VectorFormat vform_dst = 1286 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1287 1288 uint64_t dst_val = 0; 1289 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1290 dst_val += src.Uint(vform, i); 1291 } 1292 1293 dst.ClearForWrite(vform_dst); 1294 dst.SetUint(vform_dst, 0, dst_val); 1295 return dst; 1296 } 1297 1298 1299 LogicVRegister Simulator::sminmaxv(VectorFormat vform, 1300 LogicVRegister dst, 1301 const LogicVRegister& src, 1302 bool max) { 1303 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1304 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1305 int64_t src_val = src.Int(vform, i); 1306 if (max) { 1307 dst_val = (src_val > dst_val) ? src_val : dst_val; 1308 } else { 1309 dst_val = (src_val < dst_val) ? src_val : dst_val; 1310 } 1311 } 1312 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1313 dst.SetInt(vform, 0, dst_val); 1314 return dst; 1315 } 1316 1317 1318 LogicVRegister Simulator::smaxv(VectorFormat vform, 1319 LogicVRegister dst, 1320 const LogicVRegister& src) { 1321 sminmaxv(vform, dst, src, true); 1322 return dst; 1323 } 1324 1325 1326 LogicVRegister Simulator::sminv(VectorFormat vform, 1327 LogicVRegister dst, 1328 const LogicVRegister& src) { 1329 sminmaxv(vform, dst, src, false); 1330 return dst; 1331 } 1332 1333 1334 LogicVRegister Simulator::uminmax(VectorFormat vform, 1335 LogicVRegister dst, 1336 const LogicVRegister& src1, 1337 const LogicVRegister& src2, 1338 bool max) { 1339 dst.ClearForWrite(vform); 1340 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1341 uint64_t src1_val = src1.Uint(vform, i); 1342 uint64_t src2_val = src2.Uint(vform, i); 1343 uint64_t dst_val; 1344 if (max) { 1345 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1346 } else { 1347 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1348 } 1349 dst.SetUint(vform, i, dst_val); 1350 } 1351 return dst; 1352 } 1353 1354 1355 LogicVRegister Simulator::umax(VectorFormat vform, 1356 LogicVRegister dst, 1357 const LogicVRegister& src1, 1358 const LogicVRegister& src2) { 1359 return uminmax(vform, dst, src1, src2, true); 1360 } 1361 1362 1363 LogicVRegister Simulator::umin(VectorFormat vform, 1364 LogicVRegister dst, 1365 const LogicVRegister& src1, 1366 const LogicVRegister& src2) { 1367 return uminmax(vform, dst, src1, src2, false); 1368 } 1369 1370 1371 LogicVRegister Simulator::uminmaxp(VectorFormat vform, 1372 LogicVRegister dst, 1373 const LogicVRegister& src1, 1374 const LogicVRegister& src2, 1375 bool max) { 1376 int lanes = LaneCountFromFormat(vform); 1377 uint64_t result[kMaxLanesPerVector]; 1378 const LogicVRegister* src = &src1; 1379 for (int j = 0; j < 2; j++) { 1380 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1381 uint64_t first_val = src->Uint(vform, i); 1382 uint64_t second_val = src->Uint(vform, i + 1); 1383 uint64_t dst_val; 1384 if (max) { 1385 dst_val = (first_val > second_val) ? first_val : second_val; 1386 } else { 1387 dst_val = (first_val < second_val) ? first_val : second_val; 1388 } 1389 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector); 1390 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1391 } 1392 src = &src2; 1393 } 1394 dst.SetUintArray(vform, result); 1395 return dst; 1396 } 1397 1398 1399 LogicVRegister Simulator::umaxp(VectorFormat vform, 1400 LogicVRegister dst, 1401 const LogicVRegister& src1, 1402 const LogicVRegister& src2) { 1403 return uminmaxp(vform, dst, src1, src2, true); 1404 } 1405 1406 1407 LogicVRegister Simulator::uminp(VectorFormat vform, 1408 LogicVRegister dst, 1409 const LogicVRegister& src1, 1410 const LogicVRegister& src2) { 1411 return uminmaxp(vform, dst, src1, src2, false); 1412 } 1413 1414 1415 LogicVRegister Simulator::uminmaxv(VectorFormat vform, 1416 LogicVRegister dst, 1417 const LogicVRegister& src, 1418 bool max) { 1419 uint64_t dst_val = max ? 0 : UINT64_MAX; 1420 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1421 uint64_t src_val = src.Uint(vform, i); 1422 if (max) { 1423 dst_val = (src_val > dst_val) ? src_val : dst_val; 1424 } else { 1425 dst_val = (src_val < dst_val) ? src_val : dst_val; 1426 } 1427 } 1428 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1429 dst.SetUint(vform, 0, dst_val); 1430 return dst; 1431 } 1432 1433 1434 LogicVRegister Simulator::umaxv(VectorFormat vform, 1435 LogicVRegister dst, 1436 const LogicVRegister& src) { 1437 uminmaxv(vform, dst, src, true); 1438 return dst; 1439 } 1440 1441 1442 LogicVRegister Simulator::uminv(VectorFormat vform, 1443 LogicVRegister dst, 1444 const LogicVRegister& src) { 1445 uminmaxv(vform, dst, src, false); 1446 return dst; 1447 } 1448 1449 1450 LogicVRegister Simulator::shl(VectorFormat vform, 1451 LogicVRegister dst, 1452 const LogicVRegister& src, 1453 int shift) { 1454 VIXL_ASSERT(shift >= 0); 1455 SimVRegister temp; 1456 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1457 return ushl(vform, dst, src, shiftreg); 1458 } 1459 1460 1461 LogicVRegister Simulator::sshll(VectorFormat vform, 1462 LogicVRegister dst, 1463 const LogicVRegister& src, 1464 int shift) { 1465 VIXL_ASSERT(shift >= 0); 1466 SimVRegister temp1, temp2; 1467 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1468 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1469 return sshl(vform, dst, extendedreg, shiftreg); 1470 } 1471 1472 1473 LogicVRegister Simulator::sshll2(VectorFormat vform, 1474 LogicVRegister dst, 1475 const LogicVRegister& src, 1476 int shift) { 1477 VIXL_ASSERT(shift >= 0); 1478 SimVRegister temp1, temp2; 1479 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1480 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1481 return sshl(vform, dst, extendedreg, shiftreg); 1482 } 1483 1484 1485 LogicVRegister Simulator::shll(VectorFormat vform, 1486 LogicVRegister dst, 1487 const LogicVRegister& src) { 1488 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1489 return sshll(vform, dst, src, shift); 1490 } 1491 1492 1493 LogicVRegister Simulator::shll2(VectorFormat vform, 1494 LogicVRegister dst, 1495 const LogicVRegister& src) { 1496 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1497 return sshll2(vform, dst, src, shift); 1498 } 1499 1500 1501 LogicVRegister Simulator::ushll(VectorFormat vform, 1502 LogicVRegister dst, 1503 const LogicVRegister& src, 1504 int shift) { 1505 VIXL_ASSERT(shift >= 0); 1506 SimVRegister temp1, temp2; 1507 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1508 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1509 return ushl(vform, dst, extendedreg, shiftreg); 1510 } 1511 1512 1513 LogicVRegister Simulator::ushll2(VectorFormat vform, 1514 LogicVRegister dst, 1515 const LogicVRegister& src, 1516 int shift) { 1517 VIXL_ASSERT(shift >= 0); 1518 SimVRegister temp1, temp2; 1519 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1520 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1521 return ushl(vform, dst, extendedreg, shiftreg); 1522 } 1523 1524 1525 LogicVRegister Simulator::sli(VectorFormat vform, 1526 LogicVRegister dst, 1527 const LogicVRegister& src, 1528 int shift) { 1529 dst.ClearForWrite(vform); 1530 int laneCount = LaneCountFromFormat(vform); 1531 for (int i = 0; i < laneCount; i++) { 1532 uint64_t src_lane = src.Uint(vform, i); 1533 uint64_t dst_lane = dst.Uint(vform, i); 1534 uint64_t shifted = src_lane << shift; 1535 uint64_t mask = MaxUintFromFormat(vform) << shift; 1536 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1537 } 1538 return dst; 1539 } 1540 1541 1542 LogicVRegister Simulator::sqshl(VectorFormat vform, 1543 LogicVRegister dst, 1544 const LogicVRegister& src, 1545 int shift) { 1546 VIXL_ASSERT(shift >= 0); 1547 SimVRegister temp; 1548 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1549 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1550 } 1551 1552 1553 LogicVRegister Simulator::uqshl(VectorFormat vform, 1554 LogicVRegister dst, 1555 const LogicVRegister& src, 1556 int shift) { 1557 VIXL_ASSERT(shift >= 0); 1558 SimVRegister temp; 1559 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1560 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1561 } 1562 1563 1564 LogicVRegister Simulator::sqshlu(VectorFormat vform, 1565 LogicVRegister dst, 1566 const LogicVRegister& src, 1567 int shift) { 1568 VIXL_ASSERT(shift >= 0); 1569 SimVRegister temp; 1570 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1571 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1572 } 1573 1574 1575 LogicVRegister Simulator::sri(VectorFormat vform, 1576 LogicVRegister dst, 1577 const LogicVRegister& src, 1578 int shift) { 1579 dst.ClearForWrite(vform); 1580 int laneCount = LaneCountFromFormat(vform); 1581 VIXL_ASSERT((shift > 0) && 1582 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1583 for (int i = 0; i < laneCount; i++) { 1584 uint64_t src_lane = src.Uint(vform, i); 1585 uint64_t dst_lane = dst.Uint(vform, i); 1586 uint64_t shifted; 1587 uint64_t mask; 1588 if (shift == 64) { 1589 shifted = 0; 1590 mask = 0; 1591 } else { 1592 shifted = src_lane >> shift; 1593 mask = MaxUintFromFormat(vform) >> shift; 1594 } 1595 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1596 } 1597 return dst; 1598 } 1599 1600 1601 LogicVRegister Simulator::ushr(VectorFormat vform, 1602 LogicVRegister dst, 1603 const LogicVRegister& src, 1604 int shift) { 1605 VIXL_ASSERT(shift >= 0); 1606 SimVRegister temp; 1607 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1608 return ushl(vform, dst, src, shiftreg); 1609 } 1610 1611 1612 LogicVRegister Simulator::sshr(VectorFormat vform, 1613 LogicVRegister dst, 1614 const LogicVRegister& src, 1615 int shift) { 1616 VIXL_ASSERT(shift >= 0); 1617 SimVRegister temp; 1618 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1619 return sshl(vform, dst, src, shiftreg); 1620 } 1621 1622 1623 LogicVRegister Simulator::ssra(VectorFormat vform, 1624 LogicVRegister dst, 1625 const LogicVRegister& src, 1626 int shift) { 1627 SimVRegister temp; 1628 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1629 return add(vform, dst, dst, shifted_reg); 1630 } 1631 1632 1633 LogicVRegister Simulator::usra(VectorFormat vform, 1634 LogicVRegister dst, 1635 const LogicVRegister& src, 1636 int shift) { 1637 SimVRegister temp; 1638 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1639 return add(vform, dst, dst, shifted_reg); 1640 } 1641 1642 1643 LogicVRegister Simulator::srsra(VectorFormat vform, 1644 LogicVRegister dst, 1645 const LogicVRegister& src, 1646 int shift) { 1647 SimVRegister temp; 1648 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1649 return add(vform, dst, dst, shifted_reg); 1650 } 1651 1652 1653 LogicVRegister Simulator::ursra(VectorFormat vform, 1654 LogicVRegister dst, 1655 const LogicVRegister& src, 1656 int shift) { 1657 SimVRegister temp; 1658 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1659 return add(vform, dst, dst, shifted_reg); 1660 } 1661 1662 1663 LogicVRegister Simulator::cls(VectorFormat vform, 1664 LogicVRegister dst, 1665 const LogicVRegister& src) { 1666 uint64_t result[16]; 1667 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1668 int laneCount = LaneCountFromFormat(vform); 1669 for (int i = 0; i < laneCount; i++) { 1670 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1671 } 1672 1673 dst.ClearForWrite(vform); 1674 for (int i = 0; i < laneCount; ++i) { 1675 dst.SetUint(vform, i, result[i]); 1676 } 1677 return dst; 1678 } 1679 1680 1681 LogicVRegister Simulator::clz(VectorFormat vform, 1682 LogicVRegister dst, 1683 const LogicVRegister& src) { 1684 uint64_t result[16]; 1685 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1686 int laneCount = LaneCountFromFormat(vform); 1687 for (int i = 0; i < laneCount; i++) { 1688 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1689 } 1690 1691 dst.ClearForWrite(vform); 1692 for (int i = 0; i < laneCount; ++i) { 1693 dst.SetUint(vform, i, result[i]); 1694 } 1695 return dst; 1696 } 1697 1698 1699 LogicVRegister Simulator::cnt(VectorFormat vform, 1700 LogicVRegister dst, 1701 const LogicVRegister& src) { 1702 uint64_t result[16]; 1703 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1704 int laneCount = LaneCountFromFormat(vform); 1705 for (int i = 0; i < laneCount; i++) { 1706 uint64_t value = src.Uint(vform, i); 1707 result[i] = 0; 1708 for (int j = 0; j < laneSizeInBits; j++) { 1709 result[i] += (value & 1); 1710 value >>= 1; 1711 } 1712 } 1713 1714 dst.ClearForWrite(vform); 1715 for (int i = 0; i < laneCount; ++i) { 1716 dst.SetUint(vform, i, result[i]); 1717 } 1718 return dst; 1719 } 1720 1721 1722 LogicVRegister Simulator::sshl(VectorFormat vform, 1723 LogicVRegister dst, 1724 const LogicVRegister& src1, 1725 const LogicVRegister& src2) { 1726 dst.ClearForWrite(vform); 1727 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1728 int8_t shift_val = src2.Int(vform, i); 1729 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1730 1731 // Set signed saturation state. 1732 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) { 1733 dst.SetSignedSat(i, lj_src_val >= 0); 1734 } 1735 1736 // Set unsigned saturation state. 1737 if (lj_src_val < 0) { 1738 dst.SetUnsignedSat(i, false); 1739 } else if ((shift_val > CountLeadingZeros(lj_src_val)) && 1740 (lj_src_val != 0)) { 1741 dst.SetUnsignedSat(i, true); 1742 } 1743 1744 int64_t src_val = src1.Int(vform, i); 1745 bool src_is_negative = src_val < 0; 1746 if (shift_val > 63) { 1747 dst.SetInt(vform, i, 0); 1748 } else if (shift_val < -63) { 1749 dst.SetRounding(i, src_is_negative); 1750 dst.SetInt(vform, i, src_is_negative ? -1 : 0); 1751 } else { 1752 // Use unsigned types for shifts, as behaviour is undefined for signed 1753 // lhs. 1754 uint64_t usrc_val = static_cast<uint64_t>(src_val); 1755 1756 if (shift_val < 0) { 1757 // Convert to right shift. 1758 shift_val = -shift_val; 1759 1760 // Set rounding state by testing most-significant bit shifted out. 1761 // Rounding only needed on right shifts. 1762 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { 1763 dst.SetRounding(i, true); 1764 } 1765 1766 usrc_val >>= shift_val; 1767 1768 if (src_is_negative) { 1769 // Simulate sign-extension. 1770 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); 1771 } 1772 } else { 1773 usrc_val <<= shift_val; 1774 } 1775 dst.SetUint(vform, i, usrc_val); 1776 } 1777 } 1778 return dst; 1779 } 1780 1781 1782 LogicVRegister Simulator::ushl(VectorFormat vform, 1783 LogicVRegister dst, 1784 const LogicVRegister& src1, 1785 const LogicVRegister& src2) { 1786 dst.ClearForWrite(vform); 1787 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1788 int8_t shift_val = src2.Int(vform, i); 1789 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1790 1791 // Set saturation state. 1792 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) { 1793 dst.SetUnsignedSat(i, true); 1794 } 1795 1796 uint64_t src_val = src1.Uint(vform, i); 1797 if ((shift_val > 63) || (shift_val < -64)) { 1798 dst.SetUint(vform, i, 0); 1799 } else { 1800 if (shift_val < 0) { 1801 // Set rounding state. Rounding only needed on right shifts. 1802 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1803 dst.SetRounding(i, true); 1804 } 1805 1806 if (shift_val == -64) { 1807 src_val = 0; 1808 } else { 1809 src_val >>= -shift_val; 1810 } 1811 } else { 1812 src_val <<= shift_val; 1813 } 1814 dst.SetUint(vform, i, src_val); 1815 } 1816 } 1817 return dst; 1818 } 1819 1820 1821 LogicVRegister Simulator::neg(VectorFormat vform, 1822 LogicVRegister dst, 1823 const LogicVRegister& src) { 1824 dst.ClearForWrite(vform); 1825 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1826 // Test for signed saturation. 1827 int64_t sa = src.Int(vform, i); 1828 if (sa == MinIntFromFormat(vform)) { 1829 dst.SetSignedSat(i, true); 1830 } 1831 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1832 } 1833 return dst; 1834 } 1835 1836 1837 LogicVRegister Simulator::suqadd(VectorFormat vform, 1838 LogicVRegister dst, 1839 const LogicVRegister& src) { 1840 dst.ClearForWrite(vform); 1841 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1842 int64_t sa = dst.IntLeftJustified(vform, i); 1843 uint64_t ub = src.UintLeftJustified(vform, i); 1844 uint64_t ur = sa + ub; 1845 1846 int64_t sr; 1847 memcpy(&sr, &ur, sizeof(sr)); 1848 if (sr < sa) { // Test for signed positive saturation. 1849 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1850 } else { 1851 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); 1852 } 1853 } 1854 return dst; 1855 } 1856 1857 1858 LogicVRegister Simulator::usqadd(VectorFormat vform, 1859 LogicVRegister dst, 1860 const LogicVRegister& src) { 1861 dst.ClearForWrite(vform); 1862 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1863 uint64_t ua = dst.UintLeftJustified(vform, i); 1864 int64_t sb = src.IntLeftJustified(vform, i); 1865 uint64_t ur = ua + sb; 1866 1867 if ((sb > 0) && (ur <= ua)) { 1868 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 1869 } else if ((sb < 0) && (ur >= ua)) { 1870 dst.SetUint(vform, i, 0); // Negative saturation. 1871 } else { 1872 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 1873 } 1874 } 1875 return dst; 1876 } 1877 1878 1879 LogicVRegister Simulator::abs(VectorFormat vform, 1880 LogicVRegister dst, 1881 const LogicVRegister& src) { 1882 dst.ClearForWrite(vform); 1883 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1884 // Test for signed saturation. 1885 int64_t sa = src.Int(vform, i); 1886 if (sa == MinIntFromFormat(vform)) { 1887 dst.SetSignedSat(i, true); 1888 } 1889 if (sa < 0) { 1890 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1891 } else { 1892 dst.SetInt(vform, i, sa); 1893 } 1894 } 1895 return dst; 1896 } 1897 1898 1899 LogicVRegister Simulator::extractnarrow(VectorFormat dstform, 1900 LogicVRegister dst, 1901 bool dstIsSigned, 1902 const LogicVRegister& src, 1903 bool srcIsSigned) { 1904 bool upperhalf = false; 1905 VectorFormat srcform = kFormatUndefined; 1906 int64_t ssrc[8]; 1907 uint64_t usrc[8]; 1908 1909 switch (dstform) { 1910 case kFormat8B: 1911 upperhalf = false; 1912 srcform = kFormat8H; 1913 break; 1914 case kFormat16B: 1915 upperhalf = true; 1916 srcform = kFormat8H; 1917 break; 1918 case kFormat4H: 1919 upperhalf = false; 1920 srcform = kFormat4S; 1921 break; 1922 case kFormat8H: 1923 upperhalf = true; 1924 srcform = kFormat4S; 1925 break; 1926 case kFormat2S: 1927 upperhalf = false; 1928 srcform = kFormat2D; 1929 break; 1930 case kFormat4S: 1931 upperhalf = true; 1932 srcform = kFormat2D; 1933 break; 1934 case kFormatB: 1935 upperhalf = false; 1936 srcform = kFormatH; 1937 break; 1938 case kFormatH: 1939 upperhalf = false; 1940 srcform = kFormatS; 1941 break; 1942 case kFormatS: 1943 upperhalf = false; 1944 srcform = kFormatD; 1945 break; 1946 default: 1947 VIXL_UNIMPLEMENTED(); 1948 } 1949 1950 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1951 ssrc[i] = src.Int(srcform, i); 1952 usrc[i] = src.Uint(srcform, i); 1953 } 1954 1955 int offset; 1956 if (upperhalf) { 1957 offset = LaneCountFromFormat(dstform) / 2; 1958 } else { 1959 offset = 0; 1960 dst.ClearForWrite(dstform); 1961 } 1962 1963 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1964 // Test for signed saturation 1965 if (ssrc[i] > MaxIntFromFormat(dstform)) { 1966 dst.SetSignedSat(offset + i, true); 1967 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 1968 dst.SetSignedSat(offset + i, false); 1969 } 1970 1971 // Test for unsigned saturation 1972 if (srcIsSigned) { 1973 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 1974 dst.SetUnsignedSat(offset + i, true); 1975 } else if (ssrc[i] < 0) { 1976 dst.SetUnsignedSat(offset + i, false); 1977 } 1978 } else { 1979 if (usrc[i] > MaxUintFromFormat(dstform)) { 1980 dst.SetUnsignedSat(offset + i, true); 1981 } 1982 } 1983 1984 int64_t result; 1985 if (srcIsSigned) { 1986 result = ssrc[i] & MaxUintFromFormat(dstform); 1987 } else { 1988 result = usrc[i] & MaxUintFromFormat(dstform); 1989 } 1990 1991 if (dstIsSigned) { 1992 dst.SetInt(dstform, offset + i, result); 1993 } else { 1994 dst.SetUint(dstform, offset + i, result); 1995 } 1996 } 1997 return dst; 1998 } 1999 2000 2001 LogicVRegister Simulator::xtn(VectorFormat vform, 2002 LogicVRegister dst, 2003 const LogicVRegister& src) { 2004 return extractnarrow(vform, dst, true, src, true); 2005 } 2006 2007 2008 LogicVRegister Simulator::sqxtn(VectorFormat vform, 2009 LogicVRegister dst, 2010 const LogicVRegister& src) { 2011 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform); 2012 } 2013 2014 2015 LogicVRegister Simulator::sqxtun(VectorFormat vform, 2016 LogicVRegister dst, 2017 const LogicVRegister& src) { 2018 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 2019 } 2020 2021 2022 LogicVRegister Simulator::uqxtn(VectorFormat vform, 2023 LogicVRegister dst, 2024 const LogicVRegister& src) { 2025 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 2026 } 2027 2028 2029 LogicVRegister Simulator::absdiff(VectorFormat vform, 2030 LogicVRegister dst, 2031 const LogicVRegister& src1, 2032 const LogicVRegister& src2, 2033 bool issigned) { 2034 dst.ClearForWrite(vform); 2035 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2036 if (issigned) { 2037 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 2038 sr = sr > 0 ? sr : -sr; 2039 dst.SetInt(vform, i, sr); 2040 } else { 2041 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 2042 sr = sr > 0 ? sr : -sr; 2043 dst.SetUint(vform, i, sr); 2044 } 2045 } 2046 return dst; 2047 } 2048 2049 2050 LogicVRegister Simulator::saba(VectorFormat vform, 2051 LogicVRegister dst, 2052 const LogicVRegister& src1, 2053 const LogicVRegister& src2) { 2054 SimVRegister temp; 2055 dst.ClearForWrite(vform); 2056 absdiff(vform, temp, src1, src2, true); 2057 add(vform, dst, dst, temp); 2058 return dst; 2059 } 2060 2061 2062 LogicVRegister Simulator::uaba(VectorFormat vform, 2063 LogicVRegister dst, 2064 const LogicVRegister& src1, 2065 const LogicVRegister& src2) { 2066 SimVRegister temp; 2067 dst.ClearForWrite(vform); 2068 absdiff(vform, temp, src1, src2, false); 2069 add(vform, dst, dst, temp); 2070 return dst; 2071 } 2072 2073 2074 LogicVRegister Simulator::not_(VectorFormat vform, 2075 LogicVRegister dst, 2076 const LogicVRegister& src) { 2077 dst.ClearForWrite(vform); 2078 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2079 dst.SetUint(vform, i, ~src.Uint(vform, i)); 2080 } 2081 return dst; 2082 } 2083 2084 2085 LogicVRegister Simulator::rbit(VectorFormat vform, 2086 LogicVRegister dst, 2087 const LogicVRegister& src) { 2088 uint64_t result[16]; 2089 int laneCount = LaneCountFromFormat(vform); 2090 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 2091 uint64_t reversed_value; 2092 uint64_t value; 2093 for (int i = 0; i < laneCount; i++) { 2094 value = src.Uint(vform, i); 2095 reversed_value = 0; 2096 for (int j = 0; j < laneSizeInBits; j++) { 2097 reversed_value = (reversed_value << 1) | (value & 1); 2098 value >>= 1; 2099 } 2100 result[i] = reversed_value; 2101 } 2102 2103 dst.ClearForWrite(vform); 2104 for (int i = 0; i < laneCount; ++i) { 2105 dst.SetUint(vform, i, result[i]); 2106 } 2107 return dst; 2108 } 2109 2110 2111 LogicVRegister Simulator::rev(VectorFormat vform, 2112 LogicVRegister dst, 2113 const LogicVRegister& src, 2114 int revSize) { 2115 uint64_t result[16]; 2116 int laneCount = LaneCountFromFormat(vform); 2117 int laneSize = LaneSizeInBytesFromFormat(vform); 2118 int lanesPerLoop = revSize / laneSize; 2119 for (int i = 0; i < laneCount; i += lanesPerLoop) { 2120 for (int j = 0; j < lanesPerLoop; j++) { 2121 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 2122 } 2123 } 2124 dst.ClearForWrite(vform); 2125 for (int i = 0; i < laneCount; ++i) { 2126 dst.SetUint(vform, i, result[i]); 2127 } 2128 return dst; 2129 } 2130 2131 2132 LogicVRegister Simulator::rev16(VectorFormat vform, 2133 LogicVRegister dst, 2134 const LogicVRegister& src) { 2135 return rev(vform, dst, src, 2); 2136 } 2137 2138 2139 LogicVRegister Simulator::rev32(VectorFormat vform, 2140 LogicVRegister dst, 2141 const LogicVRegister& src) { 2142 return rev(vform, dst, src, 4); 2143 } 2144 2145 2146 LogicVRegister Simulator::rev64(VectorFormat vform, 2147 LogicVRegister dst, 2148 const LogicVRegister& src) { 2149 return rev(vform, dst, src, 8); 2150 } 2151 2152 2153 LogicVRegister Simulator::addlp(VectorFormat vform, 2154 LogicVRegister dst, 2155 const LogicVRegister& src, 2156 bool is_signed, 2157 bool do_accumulate) { 2158 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 2159 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32); 2160 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8); 2161 2162 uint64_t result[8]; 2163 int lane_count = LaneCountFromFormat(vform); 2164 for (int i = 0; i < lane_count; i++) { 2165 if (is_signed) { 2166 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + 2167 src.Int(vformsrc, 2 * i + 1)); 2168 } else { 2169 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 2170 } 2171 } 2172 2173 dst.ClearForWrite(vform); 2174 for (int i = 0; i < lane_count; ++i) { 2175 if (do_accumulate) { 2176 result[i] += dst.Uint(vform, i); 2177 } 2178 dst.SetUint(vform, i, result[i]); 2179 } 2180 2181 return dst; 2182 } 2183 2184 2185 LogicVRegister Simulator::saddlp(VectorFormat vform, 2186 LogicVRegister dst, 2187 const LogicVRegister& src) { 2188 return addlp(vform, dst, src, true, false); 2189 } 2190 2191 2192 LogicVRegister Simulator::uaddlp(VectorFormat vform, 2193 LogicVRegister dst, 2194 const LogicVRegister& src) { 2195 return addlp(vform, dst, src, false, false); 2196 } 2197 2198 2199 LogicVRegister Simulator::sadalp(VectorFormat vform, 2200 LogicVRegister dst, 2201 const LogicVRegister& src) { 2202 return addlp(vform, dst, src, true, true); 2203 } 2204 2205 2206 LogicVRegister Simulator::uadalp(VectorFormat vform, 2207 LogicVRegister dst, 2208 const LogicVRegister& src) { 2209 return addlp(vform, dst, src, false, true); 2210 } 2211 2212 2213 LogicVRegister Simulator::ext(VectorFormat vform, 2214 LogicVRegister dst, 2215 const LogicVRegister& src1, 2216 const LogicVRegister& src2, 2217 int index) { 2218 uint8_t result[16]; 2219 int laneCount = LaneCountFromFormat(vform); 2220 for (int i = 0; i < laneCount - index; ++i) { 2221 result[i] = src1.Uint(vform, i + index); 2222 } 2223 for (int i = 0; i < index; ++i) { 2224 result[laneCount - index + i] = src2.Uint(vform, i); 2225 } 2226 dst.ClearForWrite(vform); 2227 for (int i = 0; i < laneCount; ++i) { 2228 dst.SetUint(vform, i, result[i]); 2229 } 2230 return dst; 2231 } 2232 2233 template <typename T> 2234 LogicVRegister Simulator::fcadd(VectorFormat vform, 2235 LogicVRegister dst, // d 2236 const LogicVRegister& src1, // n 2237 const LogicVRegister& src2, // m 2238 int rot) { 2239 int elements = LaneCountFromFormat(vform); 2240 2241 T element1, element3; 2242 rot = (rot == 1) ? 270 : 90; 2243 2244 // Loop example: 2245 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) 2246 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) 2247 2248 for (int e = 0; e <= (elements / 2) - 1; e++) { 2249 switch (rot) { 2250 case 90: 2251 element1 = FPNeg(src2.Float<T>(e * 2 + 1)); 2252 element3 = src2.Float<T>(e * 2); 2253 break; 2254 case 270: 2255 element1 = src2.Float<T>(e * 2 + 1); 2256 element3 = FPNeg(src2.Float<T>(e * 2)); 2257 break; 2258 default: 2259 VIXL_UNREACHABLE(); 2260 return dst; // prevents "element(n) may be unintialized" errors 2261 } 2262 dst.ClearForWrite(vform); 2263 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1)); 2264 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3)); 2265 } 2266 return dst; 2267 } 2268 2269 2270 LogicVRegister Simulator::fcadd(VectorFormat vform, 2271 LogicVRegister dst, // d 2272 const LogicVRegister& src1, // n 2273 const LogicVRegister& src2, // m 2274 int rot) { 2275 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2276 VIXL_UNIMPLEMENTED(); 2277 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2278 fcadd<float>(vform, dst, src1, src2, rot); 2279 } else { 2280 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 2281 fcadd<double>(vform, dst, src1, src2, rot); 2282 } 2283 return dst; 2284 } 2285 2286 2287 template <typename T> 2288 LogicVRegister Simulator::fcmla(VectorFormat vform, 2289 LogicVRegister dst, // d 2290 const LogicVRegister& src1, // n 2291 const LogicVRegister& src2, // m 2292 int index, 2293 int rot) { 2294 int elements = LaneCountFromFormat(vform); 2295 2296 T element1, element2, element3, element4; 2297 rot *= 90; 2298 2299 // Loop example: 2300 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) 2301 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) 2302 2303 for (int e = 0; e <= (elements / 2) - 1; e++) { 2304 switch (rot) { 2305 case 0: 2306 element1 = src2.Float<T>(index * 2); 2307 element2 = src1.Float<T>(e * 2); 2308 element3 = src2.Float<T>(index * 2 + 1); 2309 element4 = src1.Float<T>(e * 2); 2310 break; 2311 case 90: 2312 element1 = FPNeg(src2.Float<T>(index * 2 + 1)); 2313 element2 = src1.Float<T>(e * 2 + 1); 2314 element3 = src2.Float<T>(index * 2); 2315 element4 = src1.Float<T>(e * 2 + 1); 2316 break; 2317 case 180: 2318 element1 = FPNeg(src2.Float<T>(index * 2)); 2319 element2 = src1.Float<T>(e * 2); 2320 element3 = FPNeg(src2.Float<T>(index * 2 + 1)); 2321 element4 = src1.Float<T>(e * 2); 2322 break; 2323 case 270: 2324 element1 = src2.Float<T>(index * 2 + 1); 2325 element2 = src1.Float<T>(e * 2 + 1); 2326 element3 = FPNeg(src2.Float<T>(index * 2)); 2327 element4 = src1.Float<T>(e * 2 + 1); 2328 break; 2329 default: 2330 VIXL_UNREACHABLE(); 2331 return dst; // prevents "element(n) may be unintialized" errors 2332 } 2333 dst.ClearForWrite(vform); 2334 dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); 2335 dst.SetFloat<T>(e * 2 + 1, 2336 FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); 2337 } 2338 return dst; 2339 } 2340 2341 2342 template <typename T> 2343 LogicVRegister Simulator::fcmla(VectorFormat vform, 2344 LogicVRegister dst, // d 2345 const LogicVRegister& src1, // n 2346 const LogicVRegister& src2, // m 2347 int rot) { 2348 int elements = LaneCountFromFormat(vform); 2349 2350 T element1, element2, element3, element4; 2351 rot *= 90; 2352 2353 // Loop example: 2354 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i) 2355 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i) 2356 2357 for (int e = 0; e <= (elements / 2) - 1; e++) { 2358 switch (rot) { 2359 case 0: 2360 element1 = src2.Float<T>(e * 2); 2361 element2 = src1.Float<T>(e * 2); 2362 element3 = src2.Float<T>(e * 2 + 1); 2363 element4 = src1.Float<T>(e * 2); 2364 break; 2365 case 90: 2366 element1 = FPNeg(src2.Float<T>(e * 2 + 1)); 2367 element2 = src1.Float<T>(e * 2 + 1); 2368 element3 = src2.Float<T>(e * 2); 2369 element4 = src1.Float<T>(e * 2 + 1); 2370 break; 2371 case 180: 2372 element1 = FPNeg(src2.Float<T>(e * 2)); 2373 element2 = src1.Float<T>(e * 2); 2374 element3 = FPNeg(src2.Float<T>(e * 2 + 1)); 2375 element4 = src1.Float<T>(e * 2); 2376 break; 2377 case 270: 2378 element1 = src2.Float<T>(e * 2 + 1); 2379 element2 = src1.Float<T>(e * 2 + 1); 2380 element3 = FPNeg(src2.Float<T>(e * 2)); 2381 element4 = src1.Float<T>(e * 2 + 1); 2382 break; 2383 default: 2384 VIXL_UNREACHABLE(); 2385 return dst; // prevents "element(n) may be unintialized" errors 2386 } 2387 dst.ClearForWrite(vform); 2388 dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1)); 2389 dst.SetFloat<T>(e * 2 + 1, 2390 FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3)); 2391 } 2392 return dst; 2393 } 2394 2395 2396 LogicVRegister Simulator::fcmla(VectorFormat vform, 2397 LogicVRegister dst, // d 2398 const LogicVRegister& src1, // n 2399 const LogicVRegister& src2, // m 2400 int rot) { 2401 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2402 VIXL_UNIMPLEMENTED(); 2403 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2404 fcmla<float>(vform, dst, src1, src2, rot); 2405 } else { 2406 fcmla<double>(vform, dst, src1, src2, rot); 2407 } 2408 return dst; 2409 } 2410 2411 2412 LogicVRegister Simulator::fcmla(VectorFormat vform, 2413 LogicVRegister dst, // d 2414 const LogicVRegister& src1, // n 2415 const LogicVRegister& src2, // m 2416 int index, 2417 int rot) { 2418 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 2419 VIXL_UNIMPLEMENTED(); 2420 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 2421 fcmla<float>(vform, dst, src1, src2, index, rot); 2422 } else { 2423 fcmla<double>(vform, dst, src1, src2, index, rot); 2424 } 2425 return dst; 2426 } 2427 2428 2429 LogicVRegister Simulator::dup_element(VectorFormat vform, 2430 LogicVRegister dst, 2431 const LogicVRegister& src, 2432 int src_index) { 2433 int laneCount = LaneCountFromFormat(vform); 2434 uint64_t value = src.Uint(vform, src_index); 2435 dst.ClearForWrite(vform); 2436 for (int i = 0; i < laneCount; ++i) { 2437 dst.SetUint(vform, i, value); 2438 } 2439 return dst; 2440 } 2441 2442 2443 LogicVRegister Simulator::dup_immediate(VectorFormat vform, 2444 LogicVRegister dst, 2445 uint64_t imm) { 2446 int laneCount = LaneCountFromFormat(vform); 2447 uint64_t value = imm & MaxUintFromFormat(vform); 2448 dst.ClearForWrite(vform); 2449 for (int i = 0; i < laneCount; ++i) { 2450 dst.SetUint(vform, i, value); 2451 } 2452 return dst; 2453 } 2454 2455 2456 LogicVRegister Simulator::ins_element(VectorFormat vform, 2457 LogicVRegister dst, 2458 int dst_index, 2459 const LogicVRegister& src, 2460 int src_index) { 2461 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2462 return dst; 2463 } 2464 2465 2466 LogicVRegister Simulator::ins_immediate(VectorFormat vform, 2467 LogicVRegister dst, 2468 int dst_index, 2469 uint64_t imm) { 2470 uint64_t value = imm & MaxUintFromFormat(vform); 2471 dst.SetUint(vform, dst_index, value); 2472 return dst; 2473 } 2474 2475 2476 LogicVRegister Simulator::movi(VectorFormat vform, 2477 LogicVRegister dst, 2478 uint64_t imm) { 2479 int laneCount = LaneCountFromFormat(vform); 2480 dst.ClearForWrite(vform); 2481 for (int i = 0; i < laneCount; ++i) { 2482 dst.SetUint(vform, i, imm); 2483 } 2484 return dst; 2485 } 2486 2487 2488 LogicVRegister Simulator::mvni(VectorFormat vform, 2489 LogicVRegister dst, 2490 uint64_t imm) { 2491 int laneCount = LaneCountFromFormat(vform); 2492 dst.ClearForWrite(vform); 2493 for (int i = 0; i < laneCount; ++i) { 2494 dst.SetUint(vform, i, ~imm); 2495 } 2496 return dst; 2497 } 2498 2499 2500 LogicVRegister Simulator::orr(VectorFormat vform, 2501 LogicVRegister dst, 2502 const LogicVRegister& src, 2503 uint64_t imm) { 2504 uint64_t result[16]; 2505 int laneCount = LaneCountFromFormat(vform); 2506 for (int i = 0; i < laneCount; ++i) { 2507 result[i] = src.Uint(vform, i) | imm; 2508 } 2509 dst.ClearForWrite(vform); 2510 for (int i = 0; i < laneCount; ++i) { 2511 dst.SetUint(vform, i, result[i]); 2512 } 2513 return dst; 2514 } 2515 2516 2517 LogicVRegister Simulator::uxtl(VectorFormat vform, 2518 LogicVRegister dst, 2519 const LogicVRegister& src) { 2520 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2521 2522 dst.ClearForWrite(vform); 2523 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2524 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2525 } 2526 return dst; 2527 } 2528 2529 2530 LogicVRegister Simulator::sxtl(VectorFormat vform, 2531 LogicVRegister dst, 2532 const LogicVRegister& src) { 2533 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2534 2535 dst.ClearForWrite(vform); 2536 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2537 dst.SetInt(vform, i, src.Int(vform_half, i)); 2538 } 2539 return dst; 2540 } 2541 2542 2543 LogicVRegister Simulator::uxtl2(VectorFormat vform, 2544 LogicVRegister dst, 2545 const LogicVRegister& src) { 2546 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2547 int lane_count = LaneCountFromFormat(vform); 2548 2549 dst.ClearForWrite(vform); 2550 for (int i = 0; i < lane_count; i++) { 2551 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2552 } 2553 return dst; 2554 } 2555 2556 2557 LogicVRegister Simulator::sxtl2(VectorFormat vform, 2558 LogicVRegister dst, 2559 const LogicVRegister& src) { 2560 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2561 int lane_count = LaneCountFromFormat(vform); 2562 2563 dst.ClearForWrite(vform); 2564 for (int i = 0; i < lane_count; i++) { 2565 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2566 } 2567 return dst; 2568 } 2569 2570 2571 LogicVRegister Simulator::shrn(VectorFormat vform, 2572 LogicVRegister dst, 2573 const LogicVRegister& src, 2574 int shift) { 2575 SimVRegister temp; 2576 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2577 VectorFormat vform_dst = vform; 2578 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2579 return extractnarrow(vform_dst, dst, false, shifted_src, false); 2580 } 2581 2582 2583 LogicVRegister Simulator::shrn2(VectorFormat vform, 2584 LogicVRegister dst, 2585 const LogicVRegister& src, 2586 int shift) { 2587 SimVRegister temp; 2588 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2589 VectorFormat vformdst = vform; 2590 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2591 return extractnarrow(vformdst, dst, false, shifted_src, false); 2592 } 2593 2594 2595 LogicVRegister Simulator::rshrn(VectorFormat vform, 2596 LogicVRegister dst, 2597 const LogicVRegister& src, 2598 int shift) { 2599 SimVRegister temp; 2600 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2601 VectorFormat vformdst = vform; 2602 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2603 return extractnarrow(vformdst, dst, false, shifted_src, false); 2604 } 2605 2606 2607 LogicVRegister Simulator::rshrn2(VectorFormat vform, 2608 LogicVRegister dst, 2609 const LogicVRegister& src, 2610 int shift) { 2611 SimVRegister temp; 2612 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2613 VectorFormat vformdst = vform; 2614 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2615 return extractnarrow(vformdst, dst, false, shifted_src, false); 2616 } 2617 2618 2619 LogicVRegister Simulator::Table(VectorFormat vform, 2620 LogicVRegister dst, 2621 const LogicVRegister& ind, 2622 bool zero_out_of_bounds, 2623 const LogicVRegister* tab1, 2624 const LogicVRegister* tab2, 2625 const LogicVRegister* tab3, 2626 const LogicVRegister* tab4) { 2627 VIXL_ASSERT(tab1 != NULL); 2628 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2629 uint64_t result[kMaxLanesPerVector]; 2630 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2631 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2632 } 2633 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2634 uint64_t j = ind.Uint(vform, i); 2635 int tab_idx = static_cast<int>(j >> 4); 2636 int j_idx = static_cast<int>(j & 15); 2637 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) { 2638 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2639 } 2640 } 2641 dst.SetUintArray(vform, result); 2642 return dst; 2643 } 2644 2645 2646 LogicVRegister Simulator::tbl(VectorFormat vform, 2647 LogicVRegister dst, 2648 const LogicVRegister& tab, 2649 const LogicVRegister& ind) { 2650 return Table(vform, dst, ind, true, &tab); 2651 } 2652 2653 2654 LogicVRegister Simulator::tbl(VectorFormat vform, 2655 LogicVRegister dst, 2656 const LogicVRegister& tab, 2657 const LogicVRegister& tab2, 2658 const LogicVRegister& ind) { 2659 return Table(vform, dst, ind, true, &tab, &tab2); 2660 } 2661 2662 2663 LogicVRegister Simulator::tbl(VectorFormat vform, 2664 LogicVRegister dst, 2665 const LogicVRegister& tab, 2666 const LogicVRegister& tab2, 2667 const LogicVRegister& tab3, 2668 const LogicVRegister& ind) { 2669 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2670 } 2671 2672 2673 LogicVRegister Simulator::tbl(VectorFormat vform, 2674 LogicVRegister dst, 2675 const LogicVRegister& tab, 2676 const LogicVRegister& tab2, 2677 const LogicVRegister& tab3, 2678 const LogicVRegister& tab4, 2679 const LogicVRegister& ind) { 2680 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2681 } 2682 2683 2684 LogicVRegister Simulator::tbx(VectorFormat vform, 2685 LogicVRegister dst, 2686 const LogicVRegister& tab, 2687 const LogicVRegister& ind) { 2688 return Table(vform, dst, ind, false, &tab); 2689 } 2690 2691 2692 LogicVRegister Simulator::tbx(VectorFormat vform, 2693 LogicVRegister dst, 2694 const LogicVRegister& tab, 2695 const LogicVRegister& tab2, 2696 const LogicVRegister& ind) { 2697 return Table(vform, dst, ind, false, &tab, &tab2); 2698 } 2699 2700 2701 LogicVRegister Simulator::tbx(VectorFormat vform, 2702 LogicVRegister dst, 2703 const LogicVRegister& tab, 2704 const LogicVRegister& tab2, 2705 const LogicVRegister& tab3, 2706 const LogicVRegister& ind) { 2707 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2708 } 2709 2710 2711 LogicVRegister Simulator::tbx(VectorFormat vform, 2712 LogicVRegister dst, 2713 const LogicVRegister& tab, 2714 const LogicVRegister& tab2, 2715 const LogicVRegister& tab3, 2716 const LogicVRegister& tab4, 2717 const LogicVRegister& ind) { 2718 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2719 } 2720 2721 2722 LogicVRegister Simulator::uqshrn(VectorFormat vform, 2723 LogicVRegister dst, 2724 const LogicVRegister& src, 2725 int shift) { 2726 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2727 } 2728 2729 2730 LogicVRegister Simulator::uqshrn2(VectorFormat vform, 2731 LogicVRegister dst, 2732 const LogicVRegister& src, 2733 int shift) { 2734 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2735 } 2736 2737 2738 LogicVRegister Simulator::uqrshrn(VectorFormat vform, 2739 LogicVRegister dst, 2740 const LogicVRegister& src, 2741 int shift) { 2742 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2743 } 2744 2745 2746 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, 2747 LogicVRegister dst, 2748 const LogicVRegister& src, 2749 int shift) { 2750 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2751 } 2752 2753 2754 LogicVRegister Simulator::sqshrn(VectorFormat vform, 2755 LogicVRegister dst, 2756 const LogicVRegister& src, 2757 int shift) { 2758 SimVRegister temp; 2759 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2760 VectorFormat vformdst = vform; 2761 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2762 return sqxtn(vformdst, dst, shifted_src); 2763 } 2764 2765 2766 LogicVRegister Simulator::sqshrn2(VectorFormat vform, 2767 LogicVRegister dst, 2768 const LogicVRegister& src, 2769 int shift) { 2770 SimVRegister temp; 2771 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2772 VectorFormat vformdst = vform; 2773 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2774 return sqxtn(vformdst, dst, shifted_src); 2775 } 2776 2777 2778 LogicVRegister Simulator::sqrshrn(VectorFormat vform, 2779 LogicVRegister dst, 2780 const LogicVRegister& src, 2781 int shift) { 2782 SimVRegister temp; 2783 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2784 VectorFormat vformdst = vform; 2785 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2786 return sqxtn(vformdst, dst, shifted_src); 2787 } 2788 2789 2790 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, 2791 LogicVRegister dst, 2792 const LogicVRegister& src, 2793 int shift) { 2794 SimVRegister temp; 2795 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2796 VectorFormat vformdst = vform; 2797 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2798 return sqxtn(vformdst, dst, shifted_src); 2799 } 2800 2801 2802 LogicVRegister Simulator::sqshrun(VectorFormat vform, 2803 LogicVRegister dst, 2804 const LogicVRegister& src, 2805 int shift) { 2806 SimVRegister temp; 2807 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2808 VectorFormat vformdst = vform; 2809 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2810 return sqxtun(vformdst, dst, shifted_src); 2811 } 2812 2813 2814 LogicVRegister Simulator::sqshrun2(VectorFormat vform, 2815 LogicVRegister dst, 2816 const LogicVRegister& src, 2817 int shift) { 2818 SimVRegister temp; 2819 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2820 VectorFormat vformdst = vform; 2821 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2822 return sqxtun(vformdst, dst, shifted_src); 2823 } 2824 2825 2826 LogicVRegister Simulator::sqrshrun(VectorFormat vform, 2827 LogicVRegister dst, 2828 const LogicVRegister& src, 2829 int shift) { 2830 SimVRegister temp; 2831 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2832 VectorFormat vformdst = vform; 2833 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2834 return sqxtun(vformdst, dst, shifted_src); 2835 } 2836 2837 2838 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, 2839 LogicVRegister dst, 2840 const LogicVRegister& src, 2841 int shift) { 2842 SimVRegister temp; 2843 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2844 VectorFormat vformdst = vform; 2845 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2846 return sqxtun(vformdst, dst, shifted_src); 2847 } 2848 2849 2850 LogicVRegister Simulator::uaddl(VectorFormat vform, 2851 LogicVRegister dst, 2852 const LogicVRegister& src1, 2853 const LogicVRegister& src2) { 2854 SimVRegister temp1, temp2; 2855 uxtl(vform, temp1, src1); 2856 uxtl(vform, temp2, src2); 2857 add(vform, dst, temp1, temp2); 2858 return dst; 2859 } 2860 2861 2862 LogicVRegister Simulator::uaddl2(VectorFormat vform, 2863 LogicVRegister dst, 2864 const LogicVRegister& src1, 2865 const LogicVRegister& src2) { 2866 SimVRegister temp1, temp2; 2867 uxtl2(vform, temp1, src1); 2868 uxtl2(vform, temp2, src2); 2869 add(vform, dst, temp1, temp2); 2870 return dst; 2871 } 2872 2873 2874 LogicVRegister Simulator::uaddw(VectorFormat vform, 2875 LogicVRegister dst, 2876 const LogicVRegister& src1, 2877 const LogicVRegister& src2) { 2878 SimVRegister temp; 2879 uxtl(vform, temp, src2); 2880 add(vform, dst, src1, temp); 2881 return dst; 2882 } 2883 2884 2885 LogicVRegister Simulator::uaddw2(VectorFormat vform, 2886 LogicVRegister dst, 2887 const LogicVRegister& src1, 2888 const LogicVRegister& src2) { 2889 SimVRegister temp; 2890 uxtl2(vform, temp, src2); 2891 add(vform, dst, src1, temp); 2892 return dst; 2893 } 2894 2895 2896 LogicVRegister Simulator::saddl(VectorFormat vform, 2897 LogicVRegister dst, 2898 const LogicVRegister& src1, 2899 const LogicVRegister& src2) { 2900 SimVRegister temp1, temp2; 2901 sxtl(vform, temp1, src1); 2902 sxtl(vform, temp2, src2); 2903 add(vform, dst, temp1, temp2); 2904 return dst; 2905 } 2906 2907 2908 LogicVRegister Simulator::saddl2(VectorFormat vform, 2909 LogicVRegister dst, 2910 const LogicVRegister& src1, 2911 const LogicVRegister& src2) { 2912 SimVRegister temp1, temp2; 2913 sxtl2(vform, temp1, src1); 2914 sxtl2(vform, temp2, src2); 2915 add(vform, dst, temp1, temp2); 2916 return dst; 2917 } 2918 2919 2920 LogicVRegister Simulator::saddw(VectorFormat vform, 2921 LogicVRegister dst, 2922 const LogicVRegister& src1, 2923 const LogicVRegister& src2) { 2924 SimVRegister temp; 2925 sxtl(vform, temp, src2); 2926 add(vform, dst, src1, temp); 2927 return dst; 2928 } 2929 2930 2931 LogicVRegister Simulator::saddw2(VectorFormat vform, 2932 LogicVRegister dst, 2933 const LogicVRegister& src1, 2934 const LogicVRegister& src2) { 2935 SimVRegister temp; 2936 sxtl2(vform, temp, src2); 2937 add(vform, dst, src1, temp); 2938 return dst; 2939 } 2940 2941 2942 LogicVRegister Simulator::usubl(VectorFormat vform, 2943 LogicVRegister dst, 2944 const LogicVRegister& src1, 2945 const LogicVRegister& src2) { 2946 SimVRegister temp1, temp2; 2947 uxtl(vform, temp1, src1); 2948 uxtl(vform, temp2, src2); 2949 sub(vform, dst, temp1, temp2); 2950 return dst; 2951 } 2952 2953 2954 LogicVRegister Simulator::usubl2(VectorFormat vform, 2955 LogicVRegister dst, 2956 const LogicVRegister& src1, 2957 const LogicVRegister& src2) { 2958 SimVRegister temp1, temp2; 2959 uxtl2(vform, temp1, src1); 2960 uxtl2(vform, temp2, src2); 2961 sub(vform, dst, temp1, temp2); 2962 return dst; 2963 } 2964 2965 2966 LogicVRegister Simulator::usubw(VectorFormat vform, 2967 LogicVRegister dst, 2968 const LogicVRegister& src1, 2969 const LogicVRegister& src2) { 2970 SimVRegister temp; 2971 uxtl(vform, temp, src2); 2972 sub(vform, dst, src1, temp); 2973 return dst; 2974 } 2975 2976 2977 LogicVRegister Simulator::usubw2(VectorFormat vform, 2978 LogicVRegister dst, 2979 const LogicVRegister& src1, 2980 const LogicVRegister& src2) { 2981 SimVRegister temp; 2982 uxtl2(vform, temp, src2); 2983 sub(vform, dst, src1, temp); 2984 return dst; 2985 } 2986 2987 2988 LogicVRegister Simulator::ssubl(VectorFormat vform, 2989 LogicVRegister dst, 2990 const LogicVRegister& src1, 2991 const LogicVRegister& src2) { 2992 SimVRegister temp1, temp2; 2993 sxtl(vform, temp1, src1); 2994 sxtl(vform, temp2, src2); 2995 sub(vform, dst, temp1, temp2); 2996 return dst; 2997 } 2998 2999 3000 LogicVRegister Simulator::ssubl2(VectorFormat vform, 3001 LogicVRegister dst, 3002 const LogicVRegister& src1, 3003 const LogicVRegister& src2) { 3004 SimVRegister temp1, temp2; 3005 sxtl2(vform, temp1, src1); 3006 sxtl2(vform, temp2, src2); 3007 sub(vform, dst, temp1, temp2); 3008 return dst; 3009 } 3010 3011 3012 LogicVRegister Simulator::ssubw(VectorFormat vform, 3013 LogicVRegister dst, 3014 const LogicVRegister& src1, 3015 const LogicVRegister& src2) { 3016 SimVRegister temp; 3017 sxtl(vform, temp, src2); 3018 sub(vform, dst, src1, temp); 3019 return dst; 3020 } 3021 3022 3023 LogicVRegister Simulator::ssubw2(VectorFormat vform, 3024 LogicVRegister dst, 3025 const LogicVRegister& src1, 3026 const LogicVRegister& src2) { 3027 SimVRegister temp; 3028 sxtl2(vform, temp, src2); 3029 sub(vform, dst, src1, temp); 3030 return dst; 3031 } 3032 3033 3034 LogicVRegister Simulator::uabal(VectorFormat vform, 3035 LogicVRegister dst, 3036 const LogicVRegister& src1, 3037 const LogicVRegister& src2) { 3038 SimVRegister temp1, temp2; 3039 uxtl(vform, temp1, src1); 3040 uxtl(vform, temp2, src2); 3041 uaba(vform, dst, temp1, temp2); 3042 return dst; 3043 } 3044 3045 3046 LogicVRegister Simulator::uabal2(VectorFormat vform, 3047 LogicVRegister dst, 3048 const LogicVRegister& src1, 3049 const LogicVRegister& src2) { 3050 SimVRegister temp1, temp2; 3051 uxtl2(vform, temp1, src1); 3052 uxtl2(vform, temp2, src2); 3053 uaba(vform, dst, temp1, temp2); 3054 return dst; 3055 } 3056 3057 3058 LogicVRegister Simulator::sabal(VectorFormat vform, 3059 LogicVRegister dst, 3060 const LogicVRegister& src1, 3061 const LogicVRegister& src2) { 3062 SimVRegister temp1, temp2; 3063 sxtl(vform, temp1, src1); 3064 sxtl(vform, temp2, src2); 3065 saba(vform, dst, temp1, temp2); 3066 return dst; 3067 } 3068 3069 3070 LogicVRegister Simulator::sabal2(VectorFormat vform, 3071 LogicVRegister dst, 3072 const LogicVRegister& src1, 3073 const LogicVRegister& src2) { 3074 SimVRegister temp1, temp2; 3075 sxtl2(vform, temp1, src1); 3076 sxtl2(vform, temp2, src2); 3077 saba(vform, dst, temp1, temp2); 3078 return dst; 3079 } 3080 3081 3082 LogicVRegister Simulator::uabdl(VectorFormat vform, 3083 LogicVRegister dst, 3084 const LogicVRegister& src1, 3085 const LogicVRegister& src2) { 3086 SimVRegister temp1, temp2; 3087 uxtl(vform, temp1, src1); 3088 uxtl(vform, temp2, src2); 3089 absdiff(vform, dst, temp1, temp2, false); 3090 return dst; 3091 } 3092 3093 3094 LogicVRegister Simulator::uabdl2(VectorFormat vform, 3095 LogicVRegister dst, 3096 const LogicVRegister& src1, 3097 const LogicVRegister& src2) { 3098 SimVRegister temp1, temp2; 3099 uxtl2(vform, temp1, src1); 3100 uxtl2(vform, temp2, src2); 3101 absdiff(vform, dst, temp1, temp2, false); 3102 return dst; 3103 } 3104 3105 3106 LogicVRegister Simulator::sabdl(VectorFormat vform, 3107 LogicVRegister dst, 3108 const LogicVRegister& src1, 3109 const LogicVRegister& src2) { 3110 SimVRegister temp1, temp2; 3111 sxtl(vform, temp1, src1); 3112 sxtl(vform, temp2, src2); 3113 absdiff(vform, dst, temp1, temp2, true); 3114 return dst; 3115 } 3116 3117 3118 LogicVRegister Simulator::sabdl2(VectorFormat vform, 3119 LogicVRegister dst, 3120 const LogicVRegister& src1, 3121 const LogicVRegister& src2) { 3122 SimVRegister temp1, temp2; 3123 sxtl2(vform, temp1, src1); 3124 sxtl2(vform, temp2, src2); 3125 absdiff(vform, dst, temp1, temp2, true); 3126 return dst; 3127 } 3128 3129 3130 LogicVRegister Simulator::umull(VectorFormat vform, 3131 LogicVRegister dst, 3132 const LogicVRegister& src1, 3133 const LogicVRegister& src2) { 3134 SimVRegister temp1, temp2; 3135 uxtl(vform, temp1, src1); 3136 uxtl(vform, temp2, src2); 3137 mul(vform, dst, temp1, temp2); 3138 return dst; 3139 } 3140 3141 3142 LogicVRegister Simulator::umull2(VectorFormat vform, 3143 LogicVRegister dst, 3144 const LogicVRegister& src1, 3145 const LogicVRegister& src2) { 3146 SimVRegister temp1, temp2; 3147 uxtl2(vform, temp1, src1); 3148 uxtl2(vform, temp2, src2); 3149 mul(vform, dst, temp1, temp2); 3150 return dst; 3151 } 3152 3153 3154 LogicVRegister Simulator::smull(VectorFormat vform, 3155 LogicVRegister dst, 3156 const LogicVRegister& src1, 3157 const LogicVRegister& src2) { 3158 SimVRegister temp1, temp2; 3159 sxtl(vform, temp1, src1); 3160 sxtl(vform, temp2, src2); 3161 mul(vform, dst, temp1, temp2); 3162 return dst; 3163 } 3164 3165 3166 LogicVRegister Simulator::smull2(VectorFormat vform, 3167 LogicVRegister dst, 3168 const LogicVRegister& src1, 3169 const LogicVRegister& src2) { 3170 SimVRegister temp1, temp2; 3171 sxtl2(vform, temp1, src1); 3172 sxtl2(vform, temp2, src2); 3173 mul(vform, dst, temp1, temp2); 3174 return dst; 3175 } 3176 3177 3178 LogicVRegister Simulator::umlsl(VectorFormat vform, 3179 LogicVRegister dst, 3180 const LogicVRegister& src1, 3181 const LogicVRegister& src2) { 3182 SimVRegister temp1, temp2; 3183 uxtl(vform, temp1, src1); 3184 uxtl(vform, temp2, src2); 3185 mls(vform, dst, temp1, temp2); 3186 return dst; 3187 } 3188 3189 3190 LogicVRegister Simulator::umlsl2(VectorFormat vform, 3191 LogicVRegister dst, 3192 const LogicVRegister& src1, 3193 const LogicVRegister& src2) { 3194 SimVRegister temp1, temp2; 3195 uxtl2(vform, temp1, src1); 3196 uxtl2(vform, temp2, src2); 3197 mls(vform, dst, temp1, temp2); 3198 return dst; 3199 } 3200 3201 3202 LogicVRegister Simulator::smlsl(VectorFormat vform, 3203 LogicVRegister dst, 3204 const LogicVRegister& src1, 3205 const LogicVRegister& src2) { 3206 SimVRegister temp1, temp2; 3207 sxtl(vform, temp1, src1); 3208 sxtl(vform, temp2, src2); 3209 mls(vform, dst, temp1, temp2); 3210 return dst; 3211 } 3212 3213 3214 LogicVRegister Simulator::smlsl2(VectorFormat vform, 3215 LogicVRegister dst, 3216 const LogicVRegister& src1, 3217 const LogicVRegister& src2) { 3218 SimVRegister temp1, temp2; 3219 sxtl2(vform, temp1, src1); 3220 sxtl2(vform, temp2, src2); 3221 mls(vform, dst, temp1, temp2); 3222 return dst; 3223 } 3224 3225 3226 LogicVRegister Simulator::umlal(VectorFormat vform, 3227 LogicVRegister dst, 3228 const LogicVRegister& src1, 3229 const LogicVRegister& src2) { 3230 SimVRegister temp1, temp2; 3231 uxtl(vform, temp1, src1); 3232 uxtl(vform, temp2, src2); 3233 mla(vform, dst, temp1, temp2); 3234 return dst; 3235 } 3236 3237 3238 LogicVRegister Simulator::umlal2(VectorFormat vform, 3239 LogicVRegister dst, 3240 const LogicVRegister& src1, 3241 const LogicVRegister& src2) { 3242 SimVRegister temp1, temp2; 3243 uxtl2(vform, temp1, src1); 3244 uxtl2(vform, temp2, src2); 3245 mla(vform, dst, temp1, temp2); 3246 return dst; 3247 } 3248 3249 3250 LogicVRegister Simulator::smlal(VectorFormat vform, 3251 LogicVRegister dst, 3252 const LogicVRegister& src1, 3253 const LogicVRegister& src2) { 3254 SimVRegister temp1, temp2; 3255 sxtl(vform, temp1, src1); 3256 sxtl(vform, temp2, src2); 3257 mla(vform, dst, temp1, temp2); 3258 return dst; 3259 } 3260 3261 3262 LogicVRegister Simulator::smlal2(VectorFormat vform, 3263 LogicVRegister dst, 3264 const LogicVRegister& src1, 3265 const LogicVRegister& src2) { 3266 SimVRegister temp1, temp2; 3267 sxtl2(vform, temp1, src1); 3268 sxtl2(vform, temp2, src2); 3269 mla(vform, dst, temp1, temp2); 3270 return dst; 3271 } 3272 3273 3274 LogicVRegister Simulator::sqdmlal(VectorFormat vform, 3275 LogicVRegister dst, 3276 const LogicVRegister& src1, 3277 const LogicVRegister& src2) { 3278 SimVRegister temp; 3279 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3280 return add(vform, dst, dst, product).SignedSaturate(vform); 3281 } 3282 3283 3284 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, 3285 LogicVRegister dst, 3286 const LogicVRegister& src1, 3287 const LogicVRegister& src2) { 3288 SimVRegister temp; 3289 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3290 return add(vform, dst, dst, product).SignedSaturate(vform); 3291 } 3292 3293 3294 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, 3295 LogicVRegister dst, 3296 const LogicVRegister& src1, 3297 const LogicVRegister& src2) { 3298 SimVRegister temp; 3299 LogicVRegister product = sqdmull(vform, temp, src1, src2); 3300 return sub(vform, dst, dst, product).SignedSaturate(vform); 3301 } 3302 3303 3304 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, 3305 LogicVRegister dst, 3306 const LogicVRegister& src1, 3307 const LogicVRegister& src2) { 3308 SimVRegister temp; 3309 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 3310 return sub(vform, dst, dst, product).SignedSaturate(vform); 3311 } 3312 3313 3314 LogicVRegister Simulator::sqdmull(VectorFormat vform, 3315 LogicVRegister dst, 3316 const LogicVRegister& src1, 3317 const LogicVRegister& src2) { 3318 SimVRegister temp; 3319 LogicVRegister product = smull(vform, temp, src1, src2); 3320 return add(vform, dst, product, product).SignedSaturate(vform); 3321 } 3322 3323 3324 LogicVRegister Simulator::sqdmull2(VectorFormat vform, 3325 LogicVRegister dst, 3326 const LogicVRegister& src1, 3327 const LogicVRegister& src2) { 3328 SimVRegister temp; 3329 LogicVRegister product = smull2(vform, temp, src1, src2); 3330 return add(vform, dst, product, product).SignedSaturate(vform); 3331 } 3332 3333 3334 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, 3335 LogicVRegister dst, 3336 const LogicVRegister& src1, 3337 const LogicVRegister& src2, 3338 bool round) { 3339 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3340 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3341 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3342 3343 int esize = LaneSizeInBitsFromFormat(vform); 3344 int round_const = round ? (1 << (esize - 2)) : 0; 3345 int64_t product; 3346 3347 dst.ClearForWrite(vform); 3348 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3349 product = src1.Int(vform, i) * src2.Int(vform, i); 3350 product += round_const; 3351 product = product >> (esize - 1); 3352 3353 if (product > MaxIntFromFormat(vform)) { 3354 product = MaxIntFromFormat(vform); 3355 } else if (product < MinIntFromFormat(vform)) { 3356 product = MinIntFromFormat(vform); 3357 } 3358 dst.SetInt(vform, i, product); 3359 } 3360 return dst; 3361 } 3362 3363 3364 LogicVRegister Simulator::dot(VectorFormat vform, 3365 LogicVRegister dst, 3366 const LogicVRegister& src1, 3367 const LogicVRegister& src2, 3368 bool is_signed) { 3369 VectorFormat quarter_vform = 3370 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform)); 3371 3372 dst.ClearForWrite(vform); 3373 for (int e = 0; e < LaneCountFromFormat(vform); e++) { 3374 int64_t result = 0; 3375 int64_t element1, element2; 3376 for (int i = 0; i < 4; i++) { 3377 int index = 4 * e + i; 3378 if (is_signed) { 3379 element1 = src1.Int(quarter_vform, index); 3380 element2 = src2.Int(quarter_vform, index); 3381 } else { 3382 element1 = src1.Uint(quarter_vform, index); 3383 element2 = src2.Uint(quarter_vform, index); 3384 } 3385 result += element1 * element2; 3386 } 3387 3388 result += dst.Int(vform, e); 3389 dst.SetInt(vform, e, result); 3390 } 3391 return dst; 3392 } 3393 3394 3395 LogicVRegister Simulator::sdot(VectorFormat vform, 3396 LogicVRegister dst, 3397 const LogicVRegister& src1, 3398 const LogicVRegister& src2) { 3399 return dot(vform, dst, src1, src2, true); 3400 } 3401 3402 3403 LogicVRegister Simulator::udot(VectorFormat vform, 3404 LogicVRegister dst, 3405 const LogicVRegister& src1, 3406 const LogicVRegister& src2) { 3407 return dot(vform, dst, src1, src2, false); 3408 } 3409 3410 3411 LogicVRegister Simulator::sqrdmlash(VectorFormat vform, 3412 LogicVRegister dst, 3413 const LogicVRegister& src1, 3414 const LogicVRegister& src2, 3415 bool round, 3416 bool sub_op) { 3417 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 3418 // To avoid this, we use: 3419 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 3420 // which is same as: 3421 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize. 3422 3423 int esize = LaneSizeInBitsFromFormat(vform); 3424 int round_const = round ? (1 << (esize - 2)) : 0; 3425 int64_t accum; 3426 3427 dst.ClearForWrite(vform); 3428 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3429 accum = dst.Int(vform, i) << (esize - 1); 3430 if (sub_op) { 3431 accum -= src1.Int(vform, i) * src2.Int(vform, i); 3432 } else { 3433 accum += src1.Int(vform, i) * src2.Int(vform, i); 3434 } 3435 accum += round_const; 3436 accum = accum >> (esize - 1); 3437 3438 if (accum > MaxIntFromFormat(vform)) { 3439 accum = MaxIntFromFormat(vform); 3440 } else if (accum < MinIntFromFormat(vform)) { 3441 accum = MinIntFromFormat(vform); 3442 } 3443 dst.SetInt(vform, i, accum); 3444 } 3445 return dst; 3446 } 3447 3448 3449 LogicVRegister Simulator::sqrdmlah(VectorFormat vform, 3450 LogicVRegister dst, 3451 const LogicVRegister& src1, 3452 const LogicVRegister& src2, 3453 bool round) { 3454 return sqrdmlash(vform, dst, src1, src2, round, false); 3455 } 3456 3457 3458 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform, 3459 LogicVRegister dst, 3460 const LogicVRegister& src1, 3461 const LogicVRegister& src2, 3462 bool round) { 3463 return sqrdmlash(vform, dst, src1, src2, round, true); 3464 } 3465 3466 3467 LogicVRegister Simulator::sqdmulh(VectorFormat vform, 3468 LogicVRegister dst, 3469 const LogicVRegister& src1, 3470 const LogicVRegister& src2) { 3471 return sqrdmulh(vform, dst, src1, src2, false); 3472 } 3473 3474 3475 LogicVRegister Simulator::addhn(VectorFormat vform, 3476 LogicVRegister dst, 3477 const LogicVRegister& src1, 3478 const LogicVRegister& src2) { 3479 SimVRegister temp; 3480 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3481 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3482 return dst; 3483 } 3484 3485 3486 LogicVRegister Simulator::addhn2(VectorFormat vform, 3487 LogicVRegister dst, 3488 const LogicVRegister& src1, 3489 const LogicVRegister& src2) { 3490 SimVRegister temp; 3491 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3492 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3493 return dst; 3494 } 3495 3496 3497 LogicVRegister Simulator::raddhn(VectorFormat vform, 3498 LogicVRegister dst, 3499 const LogicVRegister& src1, 3500 const LogicVRegister& src2) { 3501 SimVRegister temp; 3502 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 3503 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3504 return dst; 3505 } 3506 3507 3508 LogicVRegister Simulator::raddhn2(VectorFormat vform, 3509 LogicVRegister dst, 3510 const LogicVRegister& src1, 3511 const LogicVRegister& src2) { 3512 SimVRegister temp; 3513 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3514 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3515 return dst; 3516 } 3517 3518 3519 LogicVRegister Simulator::subhn(VectorFormat vform, 3520 LogicVRegister dst, 3521 const LogicVRegister& src1, 3522 const LogicVRegister& src2) { 3523 SimVRegister temp; 3524 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3525 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3526 return dst; 3527 } 3528 3529 3530 LogicVRegister Simulator::subhn2(VectorFormat vform, 3531 LogicVRegister dst, 3532 const LogicVRegister& src1, 3533 const LogicVRegister& src2) { 3534 SimVRegister temp; 3535 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3536 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3537 return dst; 3538 } 3539 3540 3541 LogicVRegister Simulator::rsubhn(VectorFormat vform, 3542 LogicVRegister dst, 3543 const LogicVRegister& src1, 3544 const LogicVRegister& src2) { 3545 SimVRegister temp; 3546 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 3547 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3548 return dst; 3549 } 3550 3551 3552 LogicVRegister Simulator::rsubhn2(VectorFormat vform, 3553 LogicVRegister dst, 3554 const LogicVRegister& src1, 3555 const LogicVRegister& src2) { 3556 SimVRegister temp; 3557 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 3558 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 3559 return dst; 3560 } 3561 3562 3563 LogicVRegister Simulator::trn1(VectorFormat vform, 3564 LogicVRegister dst, 3565 const LogicVRegister& src1, 3566 const LogicVRegister& src2) { 3567 uint64_t result[16]; 3568 int laneCount = LaneCountFromFormat(vform); 3569 int pairs = laneCount / 2; 3570 for (int i = 0; i < pairs; ++i) { 3571 result[2 * i] = src1.Uint(vform, 2 * i); 3572 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 3573 } 3574 3575 dst.ClearForWrite(vform); 3576 for (int i = 0; i < laneCount; ++i) { 3577 dst.SetUint(vform, i, result[i]); 3578 } 3579 return dst; 3580 } 3581 3582 3583 LogicVRegister Simulator::trn2(VectorFormat vform, 3584 LogicVRegister dst, 3585 const LogicVRegister& src1, 3586 const LogicVRegister& src2) { 3587 uint64_t result[16]; 3588 int laneCount = LaneCountFromFormat(vform); 3589 int pairs = laneCount / 2; 3590 for (int i = 0; i < pairs; ++i) { 3591 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 3592 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 3593 } 3594 3595 dst.ClearForWrite(vform); 3596 for (int i = 0; i < laneCount; ++i) { 3597 dst.SetUint(vform, i, result[i]); 3598 } 3599 return dst; 3600 } 3601 3602 3603 LogicVRegister Simulator::zip1(VectorFormat vform, 3604 LogicVRegister dst, 3605 const LogicVRegister& src1, 3606 const LogicVRegister& src2) { 3607 uint64_t result[16]; 3608 int laneCount = LaneCountFromFormat(vform); 3609 int pairs = laneCount / 2; 3610 for (int i = 0; i < pairs; ++i) { 3611 result[2 * i] = src1.Uint(vform, i); 3612 result[(2 * i) + 1] = src2.Uint(vform, i); 3613 } 3614 3615 dst.ClearForWrite(vform); 3616 for (int i = 0; i < laneCount; ++i) { 3617 dst.SetUint(vform, i, result[i]); 3618 } 3619 return dst; 3620 } 3621 3622 3623 LogicVRegister Simulator::zip2(VectorFormat vform, 3624 LogicVRegister dst, 3625 const LogicVRegister& src1, 3626 const LogicVRegister& src2) { 3627 uint64_t result[16]; 3628 int laneCount = LaneCountFromFormat(vform); 3629 int pairs = laneCount / 2; 3630 for (int i = 0; i < pairs; ++i) { 3631 result[2 * i] = src1.Uint(vform, pairs + i); 3632 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 3633 } 3634 3635 dst.ClearForWrite(vform); 3636 for (int i = 0; i < laneCount; ++i) { 3637 dst.SetUint(vform, i, result[i]); 3638 } 3639 return dst; 3640 } 3641 3642 3643 LogicVRegister Simulator::uzp1(VectorFormat vform, 3644 LogicVRegister dst, 3645 const LogicVRegister& src1, 3646 const LogicVRegister& src2) { 3647 uint64_t result[32]; 3648 int laneCount = LaneCountFromFormat(vform); 3649 for (int i = 0; i < laneCount; ++i) { 3650 result[i] = src1.Uint(vform, i); 3651 result[laneCount + i] = src2.Uint(vform, i); 3652 } 3653 3654 dst.ClearForWrite(vform); 3655 for (int i = 0; i < laneCount; ++i) { 3656 dst.SetUint(vform, i, result[2 * i]); 3657 } 3658 return dst; 3659 } 3660 3661 3662 LogicVRegister Simulator::uzp2(VectorFormat vform, 3663 LogicVRegister dst, 3664 const LogicVRegister& src1, 3665 const LogicVRegister& src2) { 3666 uint64_t result[32]; 3667 int laneCount = LaneCountFromFormat(vform); 3668 for (int i = 0; i < laneCount; ++i) { 3669 result[i] = src1.Uint(vform, i); 3670 result[laneCount + i] = src2.Uint(vform, i); 3671 } 3672 3673 dst.ClearForWrite(vform); 3674 for (int i = 0; i < laneCount; ++i) { 3675 dst.SetUint(vform, i, result[(2 * i) + 1]); 3676 } 3677 return dst; 3678 } 3679 3680 3681 template <typename T> 3682 T Simulator::FPNeg(T op) { 3683 return -op; 3684 } 3685 3686 template <typename T> 3687 T Simulator::FPAdd(T op1, T op2) { 3688 T result = FPProcessNaNs(op1, op2); 3689 if (IsNaN(result)) { 3690 return result; 3691 } 3692 3693 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) { 3694 // inf + -inf returns the default NaN. 3695 FPProcessException(); 3696 return FPDefaultNaN<T>(); 3697 } else { 3698 // Other cases should be handled by standard arithmetic. 3699 return op1 + op2; 3700 } 3701 } 3702 3703 3704 template <typename T> 3705 T Simulator::FPSub(T op1, T op2) { 3706 // NaNs should be handled elsewhere. 3707 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 3708 3709 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) { 3710 // inf - inf returns the default NaN. 3711 FPProcessException(); 3712 return FPDefaultNaN<T>(); 3713 } else { 3714 // Other cases should be handled by standard arithmetic. 3715 return op1 - op2; 3716 } 3717 } 3718 3719 3720 template <typename T> 3721 T Simulator::FPMul(T op1, T op2) { 3722 // NaNs should be handled elsewhere. 3723 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 3724 3725 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { 3726 // inf * 0.0 returns the default NaN. 3727 FPProcessException(); 3728 return FPDefaultNaN<T>(); 3729 } else { 3730 // Other cases should be handled by standard arithmetic. 3731 return op1 * op2; 3732 } 3733 } 3734 3735 3736 template <typename T> 3737 T Simulator::FPMulx(T op1, T op2) { 3738 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) { 3739 // inf * 0.0 returns +/-2.0. 3740 T two = 2.0; 3741 return copysign(1.0, op1) * copysign(1.0, op2) * two; 3742 } 3743 return FPMul(op1, op2); 3744 } 3745 3746 3747 template <typename T> 3748 T Simulator::FPMulAdd(T a, T op1, T op2) { 3749 T result = FPProcessNaNs3(a, op1, op2); 3750 3751 T sign_a = copysign(1.0, a); 3752 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 3753 bool isinf_prod = IsInf(op1) || IsInf(op2); 3754 bool operation_generates_nan = 3755 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0 3756 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf 3757 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 3758 3759 if (IsNaN(result)) { 3760 // Generated NaNs override quiet NaNs propagated from a. 3761 if (operation_generates_nan && IsQuietNaN(a)) { 3762 FPProcessException(); 3763 return FPDefaultNaN<T>(); 3764 } else { 3765 return result; 3766 } 3767 } 3768 3769 // If the operation would produce a NaN, return the default NaN. 3770 if (operation_generates_nan) { 3771 FPProcessException(); 3772 return FPDefaultNaN<T>(); 3773 } 3774 3775 // Work around broken fma implementations for exact zero results: The sign of 3776 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3777 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3778 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0; 3779 } 3780 3781 result = FusedMultiplyAdd(op1, op2, a); 3782 VIXL_ASSERT(!IsNaN(result)); 3783 3784 // Work around broken fma implementations for rounded zero results: If a is 3785 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3786 if ((a == 0.0) && (result == 0.0)) { 3787 return copysign(0.0, sign_prod); 3788 } 3789 3790 return result; 3791 } 3792 3793 3794 template <typename T> 3795 T Simulator::FPDiv(T op1, T op2) { 3796 // NaNs should be handled elsewhere. 3797 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2)); 3798 3799 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3800 // inf / inf and 0.0 / 0.0 return the default NaN. 3801 FPProcessException(); 3802 return FPDefaultNaN<T>(); 3803 } else { 3804 if (op2 == 0.0) { 3805 FPProcessException(); 3806 if (!IsNaN(op1)) { 3807 double op1_sign = copysign(1.0, op1); 3808 double op2_sign = copysign(1.0, op2); 3809 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); 3810 } 3811 } 3812 3813 // Other cases should be handled by standard arithmetic. 3814 return op1 / op2; 3815 } 3816 } 3817 3818 3819 template <typename T> 3820 T Simulator::FPSqrt(T op) { 3821 if (IsNaN(op)) { 3822 return FPProcessNaN(op); 3823 } else if (op < T(0.0)) { 3824 FPProcessException(); 3825 return FPDefaultNaN<T>(); 3826 } else { 3827 return sqrt(op); 3828 } 3829 } 3830 3831 3832 template <typename T> 3833 T Simulator::FPMax(T a, T b) { 3834 T result = FPProcessNaNs(a, b); 3835 if (IsNaN(result)) return result; 3836 3837 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3838 // a and b are zero, and the sign differs: return +0.0. 3839 return 0.0; 3840 } else { 3841 return (a > b) ? a : b; 3842 } 3843 } 3844 3845 3846 template <typename T> 3847 T Simulator::FPMaxNM(T a, T b) { 3848 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3849 a = kFP64NegativeInfinity; 3850 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3851 b = kFP64NegativeInfinity; 3852 } 3853 3854 T result = FPProcessNaNs(a, b); 3855 return IsNaN(result) ? result : FPMax(a, b); 3856 } 3857 3858 3859 template <typename T> 3860 T Simulator::FPMin(T a, T b) { 3861 T result = FPProcessNaNs(a, b); 3862 if (IsNaN(result)) return result; 3863 3864 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3865 // a and b are zero, and the sign differs: return -0.0. 3866 return -0.0; 3867 } else { 3868 return (a < b) ? a : b; 3869 } 3870 } 3871 3872 3873 template <typename T> 3874 T Simulator::FPMinNM(T a, T b) { 3875 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3876 a = kFP64PositiveInfinity; 3877 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3878 b = kFP64PositiveInfinity; 3879 } 3880 3881 T result = FPProcessNaNs(a, b); 3882 return IsNaN(result) ? result : FPMin(a, b); 3883 } 3884 3885 3886 template <typename T> 3887 T Simulator::FPRecipStepFused(T op1, T op2) { 3888 const T two = 2.0; 3889 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { 3890 return two; 3891 } else if (IsInf(op1) || IsInf(op2)) { 3892 // Return +inf if signs match, otherwise -inf. 3893 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3894 : kFP64NegativeInfinity; 3895 } else { 3896 return FusedMultiplyAdd(op1, op2, two); 3897 } 3898 } 3899 3900 template <typename T> 3901 bool IsNormal(T value) { 3902 return std::isnormal(value); 3903 } 3904 3905 template <> 3906 bool IsNormal(SimFloat16 value) { 3907 uint16_t rawbits = Float16ToRawbits(value); 3908 uint16_t exp_mask = 0x7c00; 3909 // Check that the exponent is neither all zeroes or all ones. 3910 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0); 3911 } 3912 3913 3914 template <typename T> 3915 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3916 const T one_point_five = 1.5; 3917 const T two = 2.0; 3918 3919 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) { 3920 return one_point_five; 3921 } else if (IsInf(op1) || IsInf(op2)) { 3922 // Return +inf if signs match, otherwise -inf. 3923 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3924 : kFP64NegativeInfinity; 3925 } else { 3926 // The multiply-add-halve operation must be fully fused, so avoid interim 3927 // rounding by checking which operand can be losslessly divided by two 3928 // before doing the multiply-add. 3929 if (IsNormal(op1 / two)) { 3930 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3931 } else if (IsNormal(op2 / two)) { 3932 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3933 } else { 3934 // Neither operand is normal after halving: the result is dominated by 3935 // the addition term, so just return that. 3936 return one_point_five; 3937 } 3938 } 3939 } 3940 3941 int32_t Simulator::FPToFixedJS(double value) { 3942 // The Z-flag is set when the conversion from double precision floating-point 3943 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN, 3944 // outside the bounds of a 32-bit integer, or isn't an exact integer then the 3945 // Z-flag is unset. 3946 int Z = 1; 3947 int32_t result; 3948 3949 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3950 (value == kFP64NegativeInfinity)) { 3951 // +/- zero and infinity all return zero, however -0 and +/- Infinity also 3952 // unset the Z-flag. 3953 result = 0.0; 3954 if ((value != 0.0) || std::signbit(value)) { 3955 Z = 0; 3956 } 3957 } else if (std::isnan(value)) { 3958 // NaN values unset the Z-flag and set the result to 0. 3959 FPProcessNaN(value); 3960 result = 0; 3961 Z = 0; 3962 } else { 3963 // All other values are converted to an integer representation, rounded 3964 // toward zero. 3965 double int_result = std::floor(value); 3966 double error = value - int_result; 3967 3968 if ((error != 0.0) && (int_result < 0.0)) { 3969 int_result++; 3970 } 3971 3972 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost 3973 // write a one-liner with std::round, but the behaviour on ties is incorrect 3974 // for our purposes. 3975 double mod_const = static_cast<double>(UINT64_C(1) << 32); 3976 double mod_error = 3977 (int_result / mod_const) - std::floor(int_result / mod_const); 3978 double constrained; 3979 if (mod_error == 0.5) { 3980 constrained = INT32_MIN; 3981 } else { 3982 constrained = int_result - mod_const * round(int_result / mod_const); 3983 } 3984 3985 VIXL_ASSERT(std::floor(constrained) == constrained); 3986 VIXL_ASSERT(constrained >= INT32_MIN); 3987 VIXL_ASSERT(constrained <= INT32_MAX); 3988 3989 // Take the bottom 32 bits of the result as a 32-bit integer. 3990 result = static_cast<int32_t>(constrained); 3991 3992 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) || 3993 (error != 0.0)) { 3994 // If the integer result is out of range or the conversion isn't exact, 3995 // take exception and unset the Z-flag. 3996 FPProcessException(); 3997 Z = 0; 3998 } 3999 } 4000 4001 ReadNzcv().SetN(0); 4002 ReadNzcv().SetZ(Z); 4003 ReadNzcv().SetC(0); 4004 ReadNzcv().SetV(0); 4005 4006 return result; 4007 } 4008 4009 4010 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 4011 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 4012 (value == kFP64NegativeInfinity)) { 4013 return value; 4014 } else if (IsNaN(value)) { 4015 return FPProcessNaN(value); 4016 } 4017 4018 double int_result = std::floor(value); 4019 double error = value - int_result; 4020 switch (round_mode) { 4021 case FPTieAway: { 4022 // Take care of correctly handling the range ]-0.5, -0.0], which must 4023 // yield -0.0. 4024 if ((-0.5 < value) && (value < 0.0)) { 4025 int_result = -0.0; 4026 4027 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 4028 // If the error is greater than 0.5, or is equal to 0.5 and the integer 4029 // result is positive, round up. 4030 int_result++; 4031 } 4032 break; 4033 } 4034 case FPTieEven: { 4035 // Take care of correctly handling the range [-0.5, -0.0], which must 4036 // yield -0.0. 4037 if ((-0.5 <= value) && (value < 0.0)) { 4038 int_result = -0.0; 4039 4040 // If the error is greater than 0.5, or is equal to 0.5 and the integer 4041 // result is odd, round up. 4042 } else if ((error > 0.5) || 4043 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 4044 int_result++; 4045 } 4046 break; 4047 } 4048 case FPZero: { 4049 // If value>0 then we take floor(value) 4050 // otherwise, ceil(value). 4051 if (value < 0) { 4052 int_result = ceil(value); 4053 } 4054 break; 4055 } 4056 case FPNegativeInfinity: { 4057 // We always use floor(value). 4058 break; 4059 } 4060 case FPPositiveInfinity: { 4061 // Take care of correctly handling the range ]-1.0, -0.0], which must 4062 // yield -0.0. 4063 if ((-1.0 < value) && (value < 0.0)) { 4064 int_result = -0.0; 4065 4066 // If the error is non-zero, round up. 4067 } else if (error > 0.0) { 4068 int_result++; 4069 } 4070 break; 4071 } 4072 default: 4073 VIXL_UNIMPLEMENTED(); 4074 } 4075 return int_result; 4076 } 4077 4078 4079 int16_t Simulator::FPToInt16(double value, FPRounding rmode) { 4080 value = FPRoundInt(value, rmode); 4081 if (value >= kHMaxInt) { 4082 return kHMaxInt; 4083 } else if (value < kHMinInt) { 4084 return kHMinInt; 4085 } 4086 return IsNaN(value) ? 0 : static_cast<int16_t>(value); 4087 } 4088 4089 4090 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 4091 value = FPRoundInt(value, rmode); 4092 if (value >= kWMaxInt) { 4093 return kWMaxInt; 4094 } else if (value < kWMinInt) { 4095 return kWMinInt; 4096 } 4097 return IsNaN(value) ? 0 : static_cast<int32_t>(value); 4098 } 4099 4100 4101 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 4102 value = FPRoundInt(value, rmode); 4103 if (value >= kXMaxInt) { 4104 return kXMaxInt; 4105 } else if (value < kXMinInt) { 4106 return kXMinInt; 4107 } 4108 return IsNaN(value) ? 0 : static_cast<int64_t>(value); 4109 } 4110 4111 4112 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) { 4113 value = FPRoundInt(value, rmode); 4114 if (value >= kHMaxUInt) { 4115 return kHMaxUInt; 4116 } else if (value < 0.0) { 4117 return 0; 4118 } 4119 return IsNaN(value) ? 0 : static_cast<uint16_t>(value); 4120 } 4121 4122 4123 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 4124 value = FPRoundInt(value, rmode); 4125 if (value >= kWMaxUInt) { 4126 return kWMaxUInt; 4127 } else if (value < 0.0) { 4128 return 0; 4129 } 4130 return IsNaN(value) ? 0 : static_cast<uint32_t>(value); 4131 } 4132 4133 4134 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 4135 value = FPRoundInt(value, rmode); 4136 if (value >= kXMaxUInt) { 4137 return kXMaxUInt; 4138 } else if (value < 0.0) { 4139 return 0; 4140 } 4141 return IsNaN(value) ? 0 : static_cast<uint64_t>(value); 4142 } 4143 4144 4145 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 4146 template <typename T> \ 4147 LogicVRegister Simulator::FN(VectorFormat vform, \ 4148 LogicVRegister dst, \ 4149 const LogicVRegister& src1, \ 4150 const LogicVRegister& src2) { \ 4151 dst.ClearForWrite(vform); \ 4152 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 4153 T op1 = src1.Float<T>(i); \ 4154 T op2 = src2.Float<T>(i); \ 4155 T result; \ 4156 if (PROCNAN) { \ 4157 result = FPProcessNaNs(op1, op2); \ 4158 if (!IsNaN(result)) { \ 4159 result = OP(op1, op2); \ 4160 } \ 4161 } else { \ 4162 result = OP(op1, op2); \ 4163 } \ 4164 dst.SetFloat(i, result); \ 4165 } \ 4166 return dst; \ 4167 } \ 4168 \ 4169 LogicVRegister Simulator::FN(VectorFormat vform, \ 4170 LogicVRegister dst, \ 4171 const LogicVRegister& src1, \ 4172 const LogicVRegister& src2) { \ 4173 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \ 4174 FN<SimFloat16>(vform, dst, src1, src2); \ 4175 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \ 4176 FN<float>(vform, dst, src1, src2); \ 4177 } else { \ 4178 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \ 4179 FN<double>(vform, dst, src1, src2); \ 4180 } \ 4181 return dst; \ 4182 } 4183 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 4184 #undef DEFINE_NEON_FP_VECTOR_OP 4185 4186 4187 LogicVRegister Simulator::fnmul(VectorFormat vform, 4188 LogicVRegister dst, 4189 const LogicVRegister& src1, 4190 const LogicVRegister& src2) { 4191 SimVRegister temp; 4192 LogicVRegister product = fmul(vform, temp, src1, src2); 4193 return fneg(vform, dst, product); 4194 } 4195 4196 4197 template <typename T> 4198 LogicVRegister Simulator::frecps(VectorFormat vform, 4199 LogicVRegister dst, 4200 const LogicVRegister& src1, 4201 const LogicVRegister& src2) { 4202 dst.ClearForWrite(vform); 4203 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4204 T op1 = -src1.Float<T>(i); 4205 T op2 = src2.Float<T>(i); 4206 T result = FPProcessNaNs(op1, op2); 4207 dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2)); 4208 } 4209 return dst; 4210 } 4211 4212 4213 LogicVRegister Simulator::frecps(VectorFormat vform, 4214 LogicVRegister dst, 4215 const LogicVRegister& src1, 4216 const LogicVRegister& src2) { 4217 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4218 frecps<SimFloat16>(vform, dst, src1, src2); 4219 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4220 frecps<float>(vform, dst, src1, src2); 4221 } else { 4222 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4223 frecps<double>(vform, dst, src1, src2); 4224 } 4225 return dst; 4226 } 4227 4228 4229 template <typename T> 4230 LogicVRegister Simulator::frsqrts(VectorFormat vform, 4231 LogicVRegister dst, 4232 const LogicVRegister& src1, 4233 const LogicVRegister& src2) { 4234 dst.ClearForWrite(vform); 4235 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4236 T op1 = -src1.Float<T>(i); 4237 T op2 = src2.Float<T>(i); 4238 T result = FPProcessNaNs(op1, op2); 4239 dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2)); 4240 } 4241 return dst; 4242 } 4243 4244 4245 LogicVRegister Simulator::frsqrts(VectorFormat vform, 4246 LogicVRegister dst, 4247 const LogicVRegister& src1, 4248 const LogicVRegister& src2) { 4249 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4250 frsqrts<SimFloat16>(vform, dst, src1, src2); 4251 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4252 frsqrts<float>(vform, dst, src1, src2); 4253 } else { 4254 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4255 frsqrts<double>(vform, dst, src1, src2); 4256 } 4257 return dst; 4258 } 4259 4260 4261 template <typename T> 4262 LogicVRegister Simulator::fcmp(VectorFormat vform, 4263 LogicVRegister dst, 4264 const LogicVRegister& src1, 4265 const LogicVRegister& src2, 4266 Condition cond) { 4267 dst.ClearForWrite(vform); 4268 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4269 bool result = false; 4270 T op1 = src1.Float<T>(i); 4271 T op2 = src2.Float<T>(i); 4272 T nan_result = FPProcessNaNs(op1, op2); 4273 if (!IsNaN(nan_result)) { 4274 switch (cond) { 4275 case eq: 4276 result = (op1 == op2); 4277 break; 4278 case ge: 4279 result = (op1 >= op2); 4280 break; 4281 case gt: 4282 result = (op1 > op2); 4283 break; 4284 case le: 4285 result = (op1 <= op2); 4286 break; 4287 case lt: 4288 result = (op1 < op2); 4289 break; 4290 default: 4291 VIXL_UNREACHABLE(); 4292 break; 4293 } 4294 } 4295 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 4296 } 4297 return dst; 4298 } 4299 4300 4301 LogicVRegister Simulator::fcmp(VectorFormat vform, 4302 LogicVRegister dst, 4303 const LogicVRegister& src1, 4304 const LogicVRegister& src2, 4305 Condition cond) { 4306 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4307 fcmp<SimFloat16>(vform, dst, src1, src2, cond); 4308 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4309 fcmp<float>(vform, dst, src1, src2, cond); 4310 } else { 4311 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4312 fcmp<double>(vform, dst, src1, src2, cond); 4313 } 4314 return dst; 4315 } 4316 4317 4318 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, 4319 LogicVRegister dst, 4320 const LogicVRegister& src, 4321 Condition cond) { 4322 SimVRegister temp; 4323 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4324 LogicVRegister zero_reg = 4325 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0))); 4326 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond); 4327 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4328 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0)); 4329 fcmp<float>(vform, dst, src, zero_reg, cond); 4330 } else { 4331 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4332 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0)); 4333 fcmp<double>(vform, dst, src, zero_reg, cond); 4334 } 4335 return dst; 4336 } 4337 4338 4339 LogicVRegister Simulator::fabscmp(VectorFormat vform, 4340 LogicVRegister dst, 4341 const LogicVRegister& src1, 4342 const LogicVRegister& src2, 4343 Condition cond) { 4344 SimVRegister temp1, temp2; 4345 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4346 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1); 4347 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2); 4348 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond); 4349 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4350 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 4351 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 4352 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 4353 } else { 4354 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4355 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 4356 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 4357 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 4358 } 4359 return dst; 4360 } 4361 4362 4363 template <typename T> 4364 LogicVRegister Simulator::fmla(VectorFormat vform, 4365 LogicVRegister dst, 4366 const LogicVRegister& src1, 4367 const LogicVRegister& src2) { 4368 dst.ClearForWrite(vform); 4369 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4370 T op1 = src1.Float<T>(i); 4371 T op2 = src2.Float<T>(i); 4372 T acc = dst.Float<T>(i); 4373 T result = FPMulAdd(acc, op1, op2); 4374 dst.SetFloat(i, result); 4375 } 4376 return dst; 4377 } 4378 4379 4380 LogicVRegister Simulator::fmla(VectorFormat vform, 4381 LogicVRegister dst, 4382 const LogicVRegister& src1, 4383 const LogicVRegister& src2) { 4384 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4385 fmla<SimFloat16>(vform, dst, src1, src2); 4386 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4387 fmla<float>(vform, dst, src1, src2); 4388 } else { 4389 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4390 fmla<double>(vform, dst, src1, src2); 4391 } 4392 return dst; 4393 } 4394 4395 4396 template <typename T> 4397 LogicVRegister Simulator::fmls(VectorFormat vform, 4398 LogicVRegister dst, 4399 const LogicVRegister& src1, 4400 const LogicVRegister& src2) { 4401 dst.ClearForWrite(vform); 4402 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4403 T op1 = -src1.Float<T>(i); 4404 T op2 = src2.Float<T>(i); 4405 T acc = dst.Float<T>(i); 4406 T result = FPMulAdd(acc, op1, op2); 4407 dst.SetFloat(i, result); 4408 } 4409 return dst; 4410 } 4411 4412 4413 LogicVRegister Simulator::fmls(VectorFormat vform, 4414 LogicVRegister dst, 4415 const LogicVRegister& src1, 4416 const LogicVRegister& src2) { 4417 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4418 fmls<SimFloat16>(vform, dst, src1, src2); 4419 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4420 fmls<float>(vform, dst, src1, src2); 4421 } else { 4422 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4423 fmls<double>(vform, dst, src1, src2); 4424 } 4425 return dst; 4426 } 4427 4428 4429 template <typename T> 4430 LogicVRegister Simulator::fneg(VectorFormat vform, 4431 LogicVRegister dst, 4432 const LogicVRegister& src) { 4433 dst.ClearForWrite(vform); 4434 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4435 T op = src.Float<T>(i); 4436 op = -op; 4437 dst.SetFloat(i, op); 4438 } 4439 return dst; 4440 } 4441 4442 4443 LogicVRegister Simulator::fneg(VectorFormat vform, 4444 LogicVRegister dst, 4445 const LogicVRegister& src) { 4446 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4447 fneg<SimFloat16>(vform, dst, src); 4448 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4449 fneg<float>(vform, dst, src); 4450 } else { 4451 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4452 fneg<double>(vform, dst, src); 4453 } 4454 return dst; 4455 } 4456 4457 4458 template <typename T> 4459 LogicVRegister Simulator::fabs_(VectorFormat vform, 4460 LogicVRegister dst, 4461 const LogicVRegister& src) { 4462 dst.ClearForWrite(vform); 4463 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4464 T op = src.Float<T>(i); 4465 if (copysign(1.0, op) < 0.0) { 4466 op = -op; 4467 } 4468 dst.SetFloat(i, op); 4469 } 4470 return dst; 4471 } 4472 4473 4474 LogicVRegister Simulator::fabs_(VectorFormat vform, 4475 LogicVRegister dst, 4476 const LogicVRegister& src) { 4477 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4478 fabs_<SimFloat16>(vform, dst, src); 4479 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4480 fabs_<float>(vform, dst, src); 4481 } else { 4482 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4483 fabs_<double>(vform, dst, src); 4484 } 4485 return dst; 4486 } 4487 4488 4489 LogicVRegister Simulator::fabd(VectorFormat vform, 4490 LogicVRegister dst, 4491 const LogicVRegister& src1, 4492 const LogicVRegister& src2) { 4493 SimVRegister temp; 4494 fsub(vform, temp, src1, src2); 4495 fabs_(vform, dst, temp); 4496 return dst; 4497 } 4498 4499 4500 LogicVRegister Simulator::fsqrt(VectorFormat vform, 4501 LogicVRegister dst, 4502 const LogicVRegister& src) { 4503 dst.ClearForWrite(vform); 4504 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4505 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4506 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i)); 4507 dst.SetFloat(i, result); 4508 } 4509 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4510 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4511 float result = FPSqrt(src.Float<float>(i)); 4512 dst.SetFloat(i, result); 4513 } 4514 } else { 4515 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4516 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4517 double result = FPSqrt(src.Float<double>(i)); 4518 dst.SetFloat(i, result); 4519 } 4520 } 4521 return dst; 4522 } 4523 4524 4525 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 4526 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4527 LogicVRegister dst, \ 4528 const LogicVRegister& src1, \ 4529 const LogicVRegister& src2) { \ 4530 SimVRegister temp1, temp2; \ 4531 uzp1(vform, temp1, src1, src2); \ 4532 uzp2(vform, temp2, src1, src2); \ 4533 FN(vform, dst, temp1, temp2); \ 4534 return dst; \ 4535 } \ 4536 \ 4537 LogicVRegister Simulator::FNP(VectorFormat vform, \ 4538 LogicVRegister dst, \ 4539 const LogicVRegister& src) { \ 4540 if (vform == kFormatH) { \ 4541 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \ 4542 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \ 4543 dst.SetUint(vform, 0, Float16ToRawbits(result)); \ 4544 } else if (vform == kFormatS) { \ 4545 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 4546 dst.SetFloat(0, result); \ 4547 } else { \ 4548 VIXL_ASSERT(vform == kFormatD); \ 4549 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 4550 dst.SetFloat(0, result); \ 4551 } \ 4552 dst.ClearForWrite(vform); \ 4553 return dst; \ 4554 } 4555 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 4556 #undef DEFINE_NEON_FP_PAIR_OP 4557 4558 template <typename T> 4559 LogicVRegister Simulator::fminmaxv(VectorFormat vform, 4560 LogicVRegister dst, 4561 const LogicVRegister& src, 4562 typename TFPMinMaxOp<T>::type Op) { 4563 VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) || 4564 (vform == kFormat4S)); 4565 USE(vform); 4566 T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1)); 4567 T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3)); 4568 if (vform == kFormat8H) { 4569 T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5)); 4570 T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7)); 4571 result1 = (this->*Op)(result1, result3); 4572 result2 = (this->*Op)(result2, result4); 4573 } 4574 T result = (this->*Op)(result1, result2); 4575 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 4576 dst.SetFloat<T>(0, result); 4577 return dst; 4578 } 4579 4580 4581 LogicVRegister Simulator::fmaxv(VectorFormat vform, 4582 LogicVRegister dst, 4583 const LogicVRegister& src) { 4584 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4585 return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>); 4586 } else { 4587 return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>); 4588 } 4589 } 4590 4591 4592 LogicVRegister Simulator::fminv(VectorFormat vform, 4593 LogicVRegister dst, 4594 const LogicVRegister& src) { 4595 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4596 return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>); 4597 } else { 4598 return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>); 4599 } 4600 } 4601 4602 4603 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, 4604 LogicVRegister dst, 4605 const LogicVRegister& src) { 4606 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4607 return fminmaxv<SimFloat16>(vform, 4608 dst, 4609 src, 4610 &Simulator::FPMaxNM<SimFloat16>); 4611 } else { 4612 return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>); 4613 } 4614 } 4615 4616 4617 LogicVRegister Simulator::fminnmv(VectorFormat vform, 4618 LogicVRegister dst, 4619 const LogicVRegister& src) { 4620 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4621 return fminmaxv<SimFloat16>(vform, 4622 dst, 4623 src, 4624 &Simulator::FPMinNM<SimFloat16>); 4625 } else { 4626 return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>); 4627 } 4628 } 4629 4630 4631 LogicVRegister Simulator::fmul(VectorFormat vform, 4632 LogicVRegister dst, 4633 const LogicVRegister& src1, 4634 const LogicVRegister& src2, 4635 int index) { 4636 dst.ClearForWrite(vform); 4637 SimVRegister temp; 4638 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4639 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 4640 fmul<SimFloat16>(vform, dst, src1, index_reg); 4641 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4642 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4643 fmul<float>(vform, dst, src1, index_reg); 4644 } else { 4645 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4646 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4647 fmul<double>(vform, dst, src1, index_reg); 4648 } 4649 return dst; 4650 } 4651 4652 4653 LogicVRegister Simulator::fmla(VectorFormat vform, 4654 LogicVRegister dst, 4655 const LogicVRegister& src1, 4656 const LogicVRegister& src2, 4657 int index) { 4658 dst.ClearForWrite(vform); 4659 SimVRegister temp; 4660 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4661 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 4662 fmla<SimFloat16>(vform, dst, src1, index_reg); 4663 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4664 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4665 fmla<float>(vform, dst, src1, index_reg); 4666 } else { 4667 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4668 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4669 fmla<double>(vform, dst, src1, index_reg); 4670 } 4671 return dst; 4672 } 4673 4674 4675 LogicVRegister Simulator::fmls(VectorFormat vform, 4676 LogicVRegister dst, 4677 const LogicVRegister& src1, 4678 const LogicVRegister& src2, 4679 int index) { 4680 dst.ClearForWrite(vform); 4681 SimVRegister temp; 4682 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4683 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 4684 fmls<SimFloat16>(vform, dst, src1, index_reg); 4685 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4687 fmls<float>(vform, dst, src1, index_reg); 4688 } else { 4689 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4690 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4691 fmls<double>(vform, dst, src1, index_reg); 4692 } 4693 return dst; 4694 } 4695 4696 4697 LogicVRegister Simulator::fmulx(VectorFormat vform, 4698 LogicVRegister dst, 4699 const LogicVRegister& src1, 4700 const LogicVRegister& src2, 4701 int index) { 4702 dst.ClearForWrite(vform); 4703 SimVRegister temp; 4704 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4705 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index); 4706 fmulx<SimFloat16>(vform, dst, src1, index_reg); 4707 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4708 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 4709 fmulx<float>(vform, dst, src1, index_reg); 4710 } else { 4711 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4712 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 4713 fmulx<double>(vform, dst, src1, index_reg); 4714 } 4715 return dst; 4716 } 4717 4718 4719 LogicVRegister Simulator::frint(VectorFormat vform, 4720 LogicVRegister dst, 4721 const LogicVRegister& src, 4722 FPRounding rounding_mode, 4723 bool inexact_exception) { 4724 dst.ClearForWrite(vform); 4725 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4726 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4727 SimFloat16 input = src.Float<SimFloat16>(i); 4728 SimFloat16 rounded = FPRoundInt(input, rounding_mode); 4729 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 4730 FPProcessException(); 4731 } 4732 dst.SetFloat<SimFloat16>(i, rounded); 4733 } 4734 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4735 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4736 float input = src.Float<float>(i); 4737 float rounded = FPRoundInt(input, rounding_mode); 4738 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 4739 FPProcessException(); 4740 } 4741 dst.SetFloat<float>(i, rounded); 4742 } 4743 } else { 4744 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4745 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4746 double input = src.Float<double>(i); 4747 double rounded = FPRoundInt(input, rounding_mode); 4748 if (inexact_exception && !IsNaN(input) && (input != rounded)) { 4749 FPProcessException(); 4750 } 4751 dst.SetFloat<double>(i, rounded); 4752 } 4753 } 4754 return dst; 4755 } 4756 4757 4758 LogicVRegister Simulator::fcvts(VectorFormat vform, 4759 LogicVRegister dst, 4760 const LogicVRegister& src, 4761 FPRounding rounding_mode, 4762 int fbits) { 4763 dst.ClearForWrite(vform); 4764 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4765 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4766 SimFloat16 op = 4767 static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); 4768 dst.SetInt(vform, i, FPToInt16(op, rounding_mode)); 4769 } 4770 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4771 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4772 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4773 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 4774 } 4775 } else { 4776 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4777 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4778 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4779 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 4780 } 4781 } 4782 return dst; 4783 } 4784 4785 4786 LogicVRegister Simulator::fcvtu(VectorFormat vform, 4787 LogicVRegister dst, 4788 const LogicVRegister& src, 4789 FPRounding rounding_mode, 4790 int fbits) { 4791 dst.ClearForWrite(vform); 4792 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4793 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4794 SimFloat16 op = 4795 static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits); 4796 dst.SetUint(vform, i, FPToUInt16(op, rounding_mode)); 4797 } 4798 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4799 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4800 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 4801 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 4802 } 4803 } else { 4804 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4805 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4806 double op = src.Float<double>(i) * std::pow(2.0, fbits); 4807 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 4808 } 4809 } 4810 return dst; 4811 } 4812 4813 4814 LogicVRegister Simulator::fcvtl(VectorFormat vform, 4815 LogicVRegister dst, 4816 const LogicVRegister& src) { 4817 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4818 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4819 // TODO: Full support for SimFloat16 in SimRegister(s). 4820 dst.SetFloat(i, 4821 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)), 4822 ReadDN())); 4823 } 4824 } else { 4825 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4826 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 4827 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN())); 4828 } 4829 } 4830 return dst; 4831 } 4832 4833 4834 LogicVRegister Simulator::fcvtl2(VectorFormat vform, 4835 LogicVRegister dst, 4836 const LogicVRegister& src) { 4837 int lane_count = LaneCountFromFormat(vform); 4838 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 4839 for (int i = 0; i < lane_count; i++) { 4840 // TODO: Full support for SimFloat16 in SimRegister(s). 4841 dst.SetFloat(i, 4842 FPToFloat(RawbitsToFloat16( 4843 src.Float<uint16_t>(i + lane_count)), 4844 ReadDN())); 4845 } 4846 } else { 4847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 4848 for (int i = 0; i < lane_count; i++) { 4849 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN())); 4850 } 4851 } 4852 return dst; 4853 } 4854 4855 4856 LogicVRegister Simulator::fcvtn(VectorFormat vform, 4857 LogicVRegister dst, 4858 const LogicVRegister& src) { 4859 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4860 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4861 dst.SetFloat(i, 4862 Float16ToRawbits( 4863 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); 4864 } 4865 } else { 4866 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4867 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4868 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); 4869 } 4870 } 4871 return dst; 4872 } 4873 4874 4875 LogicVRegister Simulator::fcvtn2(VectorFormat vform, 4876 LogicVRegister dst, 4877 const LogicVRegister& src) { 4878 int lane_count = LaneCountFromFormat(vform) / 2; 4879 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 4880 for (int i = lane_count - 1; i >= 0; i--) { 4881 dst.SetFloat(i + lane_count, 4882 Float16ToRawbits( 4883 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN()))); 4884 } 4885 } else { 4886 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4887 for (int i = lane_count - 1; i >= 0; i--) { 4888 dst.SetFloat(i + lane_count, 4889 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN())); 4890 } 4891 } 4892 return dst; 4893 } 4894 4895 4896 LogicVRegister Simulator::fcvtxn(VectorFormat vform, 4897 LogicVRegister dst, 4898 const LogicVRegister& src) { 4899 dst.ClearForWrite(vform); 4900 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4901 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4902 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); 4903 } 4904 return dst; 4905 } 4906 4907 4908 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, 4909 LogicVRegister dst, 4910 const LogicVRegister& src) { 4911 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize); 4912 int lane_count = LaneCountFromFormat(vform) / 2; 4913 for (int i = lane_count - 1; i >= 0; i--) { 4914 dst.SetFloat(i + lane_count, 4915 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN())); 4916 } 4917 return dst; 4918 } 4919 4920 4921 // Based on reference C function recip_sqrt_estimate from ARM ARM. 4922 double Simulator::recip_sqrt_estimate(double a) { 4923 int q0, q1, s; 4924 double r; 4925 if (a < 0.5) { 4926 q0 = static_cast<int>(a * 512.0); 4927 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 4928 } else { 4929 q1 = static_cast<int>(a * 256.0); 4930 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 4931 } 4932 s = static_cast<int>(256.0 * r + 0.5); 4933 return static_cast<double>(s) / 256.0; 4934 } 4935 4936 4937 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 4938 return ExtractUnsignedBitfield64(start_bit, end_bit, val); 4939 } 4940 4941 4942 template <typename T> 4943 T Simulator::FPRecipSqrtEstimate(T op) { 4944 if (IsNaN(op)) { 4945 return FPProcessNaN(op); 4946 } else if (op == 0.0) { 4947 if (copysign(1.0, op) < 0.0) { 4948 return kFP64NegativeInfinity; 4949 } else { 4950 return kFP64PositiveInfinity; 4951 } 4952 } else if (copysign(1.0, op) < 0.0) { 4953 FPProcessException(); 4954 return FPDefaultNaN<T>(); 4955 } else if (IsInf(op)) { 4956 return 0.0; 4957 } else { 4958 uint64_t fraction; 4959 int exp, result_exp; 4960 4961 if (IsFloat16<T>()) { 4962 exp = Float16Exp(op); 4963 fraction = Float16Mantissa(op); 4964 fraction <<= 42; 4965 } else if (IsFloat32<T>()) { 4966 exp = FloatExp(op); 4967 fraction = FloatMantissa(op); 4968 fraction <<= 29; 4969 } else { 4970 VIXL_ASSERT(IsFloat64<T>()); 4971 exp = DoubleExp(op); 4972 fraction = DoubleMantissa(op); 4973 } 4974 4975 if (exp == 0) { 4976 while (Bits(fraction, 51, 51) == 0) { 4977 fraction = Bits(fraction, 50, 0) << 1; 4978 exp -= 1; 4979 } 4980 fraction = Bits(fraction, 50, 0) << 1; 4981 } 4982 4983 double scaled; 4984 if (Bits(exp, 0, 0) == 0) { 4985 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 4986 } else { 4987 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44); 4988 } 4989 4990 if (IsFloat16<T>()) { 4991 result_exp = (44 - exp) / 2; 4992 } else if (IsFloat32<T>()) { 4993 result_exp = (380 - exp) / 2; 4994 } else { 4995 VIXL_ASSERT(IsFloat64<T>()); 4996 result_exp = (3068 - exp) / 2; 4997 } 4998 4999 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled)); 5000 5001 if (IsFloat16<T>()) { 5002 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0)); 5003 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42)); 5004 return Float16Pack(0, exp_bits, est_bits); 5005 } else if (IsFloat32<T>()) { 5006 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 5007 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 5008 return FloatPack(0, exp_bits, est_bits); 5009 } else { 5010 VIXL_ASSERT(IsFloat64<T>()); 5011 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 5012 } 5013 } 5014 } 5015 5016 5017 LogicVRegister Simulator::frsqrte(VectorFormat vform, 5018 LogicVRegister dst, 5019 const LogicVRegister& src) { 5020 dst.ClearForWrite(vform); 5021 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5022 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5023 SimFloat16 input = src.Float<SimFloat16>(i); 5024 dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input)); 5025 } 5026 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5027 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5028 float input = src.Float<float>(i); 5029 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 5030 } 5031 } else { 5032 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5033 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5034 double input = src.Float<double>(i); 5035 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 5036 } 5037 } 5038 return dst; 5039 } 5040 5041 template <typename T> 5042 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 5043 uint32_t sign; 5044 5045 if (IsFloat16<T>()) { 5046 sign = Float16Sign(op); 5047 } else if (IsFloat32<T>()) { 5048 sign = FloatSign(op); 5049 } else { 5050 VIXL_ASSERT(IsFloat64<T>()); 5051 sign = DoubleSign(op); 5052 } 5053 5054 if (IsNaN(op)) { 5055 return FPProcessNaN(op); 5056 } else if (IsInf(op)) { 5057 return (sign == 1) ? -0.0 : 0.0; 5058 } else if (op == 0.0) { 5059 FPProcessException(); // FPExc_DivideByZero exception. 5060 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 5061 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) || 5062 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) || 5063 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) { 5064 bool overflow_to_inf = false; 5065 switch (rounding) { 5066 case FPTieEven: 5067 overflow_to_inf = true; 5068 break; 5069 case FPPositiveInfinity: 5070 overflow_to_inf = (sign == 0); 5071 break; 5072 case FPNegativeInfinity: 5073 overflow_to_inf = (sign == 1); 5074 break; 5075 case FPZero: 5076 overflow_to_inf = false; 5077 break; 5078 default: 5079 break; 5080 } 5081 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 5082 if (overflow_to_inf) { 5083 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 5084 } else { 5085 // Return FPMaxNormal(sign). 5086 if (IsFloat16<T>()) { 5087 return Float16Pack(sign, 0x1f, 0x3ff); 5088 } else if (IsFloat32<T>()) { 5089 return FloatPack(sign, 0xfe, 0x07fffff); 5090 } else { 5091 VIXL_ASSERT(IsFloat64<T>()); 5092 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl); 5093 } 5094 } 5095 } else { 5096 uint64_t fraction; 5097 int exp, result_exp; 5098 uint32_t sign; 5099 5100 if (IsFloat16<T>()) { 5101 sign = Float16Sign(op); 5102 exp = Float16Exp(op); 5103 fraction = Float16Mantissa(op); 5104 fraction <<= 42; 5105 } else if (IsFloat32<T>()) { 5106 sign = FloatSign(op); 5107 exp = FloatExp(op); 5108 fraction = FloatMantissa(op); 5109 fraction <<= 29; 5110 } else { 5111 VIXL_ASSERT(IsFloat64<T>()); 5112 sign = DoubleSign(op); 5113 exp = DoubleExp(op); 5114 fraction = DoubleMantissa(op); 5115 } 5116 5117 if (exp == 0) { 5118 if (Bits(fraction, 51, 51) == 0) { 5119 exp -= 1; 5120 fraction = Bits(fraction, 49, 0) << 2; 5121 } else { 5122 fraction = Bits(fraction, 50, 0) << 1; 5123 } 5124 } 5125 5126 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44); 5127 5128 if (IsFloat16<T>()) { 5129 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30. 5130 } else if (IsFloat32<T>()) { 5131 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254. 5132 } else { 5133 VIXL_ASSERT(IsFloat64<T>()); 5134 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046. 5135 } 5136 5137 double estimate = recip_estimate(scaled); 5138 5139 fraction = DoubleMantissa(estimate); 5140 if (result_exp == 0) { 5141 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 5142 } else if (result_exp == -1) { 5143 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 5144 result_exp = 0; 5145 } 5146 if (IsFloat16<T>()) { 5147 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0)); 5148 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42)); 5149 return Float16Pack(sign, exp_bits, frac_bits); 5150 } else if (IsFloat32<T>()) { 5151 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 5152 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 5153 return FloatPack(sign, exp_bits, frac_bits); 5154 } else { 5155 VIXL_ASSERT(IsFloat64<T>()); 5156 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 5157 } 5158 } 5159 } 5160 5161 5162 LogicVRegister Simulator::frecpe(VectorFormat vform, 5163 LogicVRegister dst, 5164 const LogicVRegister& src, 5165 FPRounding round) { 5166 dst.ClearForWrite(vform); 5167 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5168 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5169 SimFloat16 input = src.Float<SimFloat16>(i); 5170 dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round)); 5171 } 5172 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5173 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5174 float input = src.Float<float>(i); 5175 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 5176 } 5177 } else { 5178 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5179 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5180 double input = src.Float<double>(i); 5181 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 5182 } 5183 } 5184 return dst; 5185 } 5186 5187 5188 LogicVRegister Simulator::ursqrte(VectorFormat vform, 5189 LogicVRegister dst, 5190 const LogicVRegister& src) { 5191 dst.ClearForWrite(vform); 5192 uint64_t operand; 5193 uint32_t result; 5194 double dp_operand, dp_result; 5195 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5196 operand = src.Uint(vform, i); 5197 if (operand <= 0x3FFFFFFF) { 5198 result = 0xFFFFFFFF; 5199 } else { 5200 dp_operand = operand * std::pow(2.0, -32); 5201 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 5202 result = static_cast<uint32_t>(dp_result); 5203 } 5204 dst.SetUint(vform, i, result); 5205 } 5206 return dst; 5207 } 5208 5209 5210 // Based on reference C function recip_estimate from ARM ARM. 5211 double Simulator::recip_estimate(double a) { 5212 int q, s; 5213 double r; 5214 q = static_cast<int>(a * 512.0); 5215 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 5216 s = static_cast<int>(256.0 * r + 0.5); 5217 return static_cast<double>(s) / 256.0; 5218 } 5219 5220 5221 LogicVRegister Simulator::urecpe(VectorFormat vform, 5222 LogicVRegister dst, 5223 const LogicVRegister& src) { 5224 dst.ClearForWrite(vform); 5225 uint64_t operand; 5226 uint32_t result; 5227 double dp_operand, dp_result; 5228 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5229 operand = src.Uint(vform, i); 5230 if (operand <= 0x7FFFFFFF) { 5231 result = 0xFFFFFFFF; 5232 } else { 5233 dp_operand = operand * std::pow(2.0, -32); 5234 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 5235 result = static_cast<uint32_t>(dp_result); 5236 } 5237 dst.SetUint(vform, i, result); 5238 } 5239 return dst; 5240 } 5241 5242 template <typename T> 5243 LogicVRegister Simulator::frecpx(VectorFormat vform, 5244 LogicVRegister dst, 5245 const LogicVRegister& src) { 5246 dst.ClearForWrite(vform); 5247 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5248 T op = src.Float<T>(i); 5249 T result; 5250 if (IsNaN(op)) { 5251 result = FPProcessNaN(op); 5252 } else { 5253 int exp; 5254 uint32_t sign; 5255 if (IsFloat16<T>()) { 5256 sign = Float16Sign(op); 5257 exp = Float16Exp(op); 5258 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0)); 5259 result = Float16Pack(sign, exp, 0); 5260 } else if (IsFloat32<T>()) { 5261 sign = FloatSign(op); 5262 exp = FloatExp(op); 5263 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 5264 result = FloatPack(sign, exp, 0); 5265 } else { 5266 VIXL_ASSERT(IsFloat64<T>()); 5267 sign = DoubleSign(op); 5268 exp = DoubleExp(op); 5269 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 5270 result = DoublePack(sign, exp, 0); 5271 } 5272 } 5273 dst.SetFloat(i, result); 5274 } 5275 return dst; 5276 } 5277 5278 5279 LogicVRegister Simulator::frecpx(VectorFormat vform, 5280 LogicVRegister dst, 5281 const LogicVRegister& src) { 5282 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5283 frecpx<SimFloat16>(vform, dst, src); 5284 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5285 frecpx<float>(vform, dst, src); 5286 } else { 5287 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5288 frecpx<double>(vform, dst, src); 5289 } 5290 return dst; 5291 } 5292 5293 LogicVRegister Simulator::scvtf(VectorFormat vform, 5294 LogicVRegister dst, 5295 const LogicVRegister& src, 5296 int fbits, 5297 FPRounding round) { 5298 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5299 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5300 SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round); 5301 dst.SetFloat<SimFloat16>(i, result); 5302 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5303 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 5304 dst.SetFloat<float>(i, result); 5305 } else { 5306 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5307 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 5308 dst.SetFloat<double>(i, result); 5309 } 5310 } 5311 return dst; 5312 } 5313 5314 5315 LogicVRegister Simulator::ucvtf(VectorFormat vform, 5316 LogicVRegister dst, 5317 const LogicVRegister& src, 5318 int fbits, 5319 FPRounding round) { 5320 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 5321 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { 5322 SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round); 5323 dst.SetFloat<SimFloat16>(i, result); 5324 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { 5325 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 5326 dst.SetFloat<float>(i, result); 5327 } else { 5328 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); 5329 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 5330 dst.SetFloat<double>(i, result); 5331 } 5332 } 5333 return dst; 5334 } 5335 5336 5337 } // namespace aarch64 5338 } // namespace vixl 5339 5340 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64 5341