1 // Copyright 2016 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if V8_TARGET_ARCH_ARM64 6 7 #include <cmath> 8 #include "src/arm64/simulator-arm64.h" 9 10 namespace v8 { 11 namespace internal { 12 13 #if defined(USE_SIMULATOR) 14 15 namespace { 16 17 // See FPRound for a description of this function. 18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa, 19 FPRounding round_mode) { 20 uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>( 21 sign, exponent, mantissa, round_mode); 22 return bit_cast<double>(bits); 23 } 24 25 // See FPRound for a description of this function. 26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa, 27 FPRounding round_mode) { 28 uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>( 29 sign, exponent, mantissa, round_mode); 30 return bit_cast<float>(bits); 31 } 32 33 // See FPRound for a description of this function. 34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent, 35 uint64_t mantissa, FPRounding round_mode) { 36 return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>( 37 sign, exponent, mantissa, round_mode); 38 } 39 40 } // namespace 41 42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) { 43 if (src >= 0) { 44 return UFixedToDouble(src, fbits, round); 45 } else if (src == INT64_MIN) { 46 return -UFixedToDouble(src, fbits, round); 47 } else { 48 return -UFixedToDouble(-src, fbits, round); 49 } 50 } 51 52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) { 53 // An input of 0 is a special case because the result is effectively 54 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 55 if (src == 0) { 56 return 0.0; 57 } 58 59 // Calculate the exponent. The highest significant bit will have the value 60 // 2^exponent. 61 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); 62 const int64_t exponent = highest_significant_bit - fbits; 63 64 return FPRoundToDouble(0, exponent, src, round); 65 } 66 67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) { 68 if (src >= 0) { 69 return UFixedToFloat(src, fbits, round); 70 } else if (src == INT64_MIN) { 71 return -UFixedToFloat(src, fbits, round); 72 } else { 73 return -UFixedToFloat(-src, fbits, round); 74 } 75 } 76 77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) { 78 // An input of 0 is a special case because the result is effectively 79 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit. 80 if (src == 0) { 81 return 0.0f; 82 } 83 84 // Calculate the exponent. The highest significant bit will have the value 85 // 2^exponent. 86 const int highest_significant_bit = 63 - CountLeadingZeros(src, 64); 87 const int32_t exponent = highest_significant_bit - fbits; 88 89 return FPRoundToFloat(0, exponent, src, round); 90 } 91 92 double Simulator::FPToDouble(float value) { 93 switch (std::fpclassify(value)) { 94 case FP_NAN: { 95 if (IsSignallingNaN(value)) { 96 FPProcessException(); 97 } 98 if (DN()) return kFP64DefaultNaN; 99 100 // Convert NaNs as the processor would: 101 // - The sign is propagated. 102 // - The mantissa is transferred entirely, except that the top bit is 103 // forced to '1', making the result a quiet NaN. The unused (low-order) 104 // mantissa bits are set to 0. 105 uint32_t raw = bit_cast<uint32_t>(value); 106 107 uint64_t sign = raw >> 31; 108 uint64_t exponent = (1 << kDoubleExponentBits) - 1; 109 uint64_t mantissa = unsigned_bitextract_64(21, 0, raw); 110 111 // Unused low-order bits remain zero. 112 mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits); 113 114 // Force a quiet NaN. 115 mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1)); 116 117 return double_pack(sign, exponent, mantissa); 118 } 119 120 case FP_ZERO: 121 case FP_NORMAL: 122 case FP_SUBNORMAL: 123 case FP_INFINITE: { 124 // All other inputs are preserved in a standard cast, because every value 125 // representable using an IEEE-754 float is also representable using an 126 // IEEE-754 double. 127 return static_cast<double>(value); 128 } 129 } 130 131 UNREACHABLE(); 132 } 133 134 float Simulator::FPToFloat(float16 value) { 135 uint32_t sign = value >> 15; 136 uint32_t exponent = 137 unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1, 138 kFloat16MantissaBits, value); 139 uint32_t mantissa = 140 unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value); 141 142 switch (float16classify(value)) { 143 case FP_ZERO: 144 return (sign == 0) ? 0.0f : -0.0f; 145 146 case FP_INFINITE: 147 return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity; 148 149 case FP_SUBNORMAL: { 150 // Calculate shift required to put mantissa into the most-significant bits 151 // of the destination mantissa. 152 int shift = CountLeadingZeros(mantissa << (32 - 10), 32); 153 154 // Shift mantissa and discard implicit '1'. 155 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1; 156 mantissa &= (1 << kFloatMantissaBits) - 1; 157 158 // Adjust the exponent for the shift applied, and rebias. 159 exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias); 160 break; 161 } 162 163 case FP_NAN: { 164 if (IsSignallingNaN(value)) { 165 FPProcessException(); 166 } 167 if (DN()) return kFP32DefaultNaN; 168 169 // Convert NaNs as the processor would: 170 // - The sign is propagated. 171 // - The mantissa is transferred entirely, except that the top bit is 172 // forced to '1', making the result a quiet NaN. The unused (low-order) 173 // mantissa bits are set to 0. 174 exponent = (1 << kFloatExponentBits) - 1; 175 176 // Increase bits in mantissa, making low-order bits 0. 177 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 178 mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN. 179 break; 180 } 181 182 case FP_NORMAL: { 183 // Increase bits in mantissa, making low-order bits 0. 184 mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits); 185 186 // Change exponent bias. 187 exponent += (kFloatExponentBias - kFloat16ExponentBias); 188 break; 189 } 190 191 default: 192 UNREACHABLE(); 193 } 194 return float_pack(sign, exponent, mantissa); 195 } 196 197 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) { 198 // Only the FPTieEven rounding mode is implemented. 199 DCHECK_EQ(round_mode, FPTieEven); 200 USE(round_mode); 201 202 int64_t sign = float_sign(value); 203 int64_t exponent = 204 static_cast<int64_t>(float_exp(value)) - kFloatExponentBias; 205 uint32_t mantissa = float_mantissa(value); 206 207 switch (std::fpclassify(value)) { 208 case FP_NAN: { 209 if (IsSignallingNaN(value)) { 210 FPProcessException(); 211 } 212 if (DN()) return kFP16DefaultNaN; 213 214 // Convert NaNs as the processor would: 215 // - The sign is propagated. 216 // - The mantissa is transferred as much as possible, except that the top 217 // bit is forced to '1', making the result a quiet NaN. 218 float16 result = 219 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 220 result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits); 221 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; 222 return result; 223 } 224 225 case FP_ZERO: 226 return (sign == 0) ? 0 : 0x8000; 227 228 case FP_INFINITE: 229 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 230 231 case FP_NORMAL: 232 case FP_SUBNORMAL: { 233 // Convert float-to-half as the processor would, assuming that FPCR.FZ 234 // (flush-to-zero) is not set. 235 236 // Add the implicit '1' bit to the mantissa. 237 mantissa += (1 << kFloatMantissaBits); 238 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 239 } 240 } 241 242 UNREACHABLE(); 243 } 244 245 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) { 246 // Only the FPTieEven rounding mode is implemented. 247 DCHECK_EQ(round_mode, FPTieEven); 248 USE(round_mode); 249 250 int64_t sign = double_sign(value); 251 int64_t exponent = 252 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; 253 uint64_t mantissa = double_mantissa(value); 254 255 switch (std::fpclassify(value)) { 256 case FP_NAN: { 257 if (IsSignallingNaN(value)) { 258 FPProcessException(); 259 } 260 if (DN()) return kFP16DefaultNaN; 261 262 // Convert NaNs as the processor would: 263 // - The sign is propagated. 264 // - The mantissa is transferred as much as possible, except that the top 265 // bit is forced to '1', making the result a quiet NaN. 266 float16 result = 267 (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 268 result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits); 269 result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN; 270 return result; 271 } 272 273 case FP_ZERO: 274 return (sign == 0) ? 0 : 0x8000; 275 276 case FP_INFINITE: 277 return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity; 278 279 case FP_NORMAL: 280 case FP_SUBNORMAL: { 281 // Convert double-to-half as the processor would, assuming that FPCR.FZ 282 // (flush-to-zero) is not set. 283 284 // Add the implicit '1' bit to the mantissa. 285 mantissa += (UINT64_C(1) << kDoubleMantissaBits); 286 return FPRoundToFloat16(sign, exponent, mantissa, round_mode); 287 } 288 } 289 290 UNREACHABLE(); 291 } 292 293 float Simulator::FPToFloat(double value, FPRounding round_mode) { 294 // Only the FPTieEven rounding mode is implemented. 295 DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 296 USE(round_mode); 297 298 switch (std::fpclassify(value)) { 299 case FP_NAN: { 300 if (IsSignallingNaN(value)) { 301 FPProcessException(); 302 } 303 if (DN()) return kFP32DefaultNaN; 304 305 // Convert NaNs as the processor would: 306 // - The sign is propagated. 307 // - The mantissa is transferred as much as possible, except that the 308 // top bit is forced to '1', making the result a quiet NaN. 309 310 uint64_t raw = bit_cast<uint64_t>(value); 311 312 uint32_t sign = raw >> 63; 313 uint32_t exponent = (1 << 8) - 1; 314 uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64( 315 50, kDoubleMantissaBits - kFloatMantissaBits, raw)); 316 mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN. 317 318 return float_pack(sign, exponent, mantissa); 319 } 320 321 case FP_ZERO: 322 case FP_INFINITE: { 323 // In a C++ cast, any value representable in the target type will be 324 // unchanged. This is always the case for +/-0.0 and infinities. 325 return static_cast<float>(value); 326 } 327 328 case FP_NORMAL: 329 case FP_SUBNORMAL: { 330 // Convert double-to-float as the processor would, assuming that FPCR.FZ 331 // (flush-to-zero) is not set. 332 uint32_t sign = double_sign(value); 333 int64_t exponent = 334 static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias; 335 uint64_t mantissa = double_mantissa(value); 336 if (std::fpclassify(value) == FP_NORMAL) { 337 // For normal FP values, add the hidden bit. 338 mantissa |= (UINT64_C(1) << kDoubleMantissaBits); 339 } 340 return FPRoundToFloat(sign, exponent, mantissa, round_mode); 341 } 342 } 343 344 UNREACHABLE(); 345 } 346 347 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 348 dst.ClearForWrite(vform); 349 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 350 dst.ReadUintFromMem(vform, i, addr); 351 addr += LaneSizeInBytesFromFormat(vform); 352 } 353 } 354 355 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index, 356 uint64_t addr) { 357 dst.ReadUintFromMem(vform, index, addr); 358 } 359 360 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) { 361 dst.ClearForWrite(vform); 362 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 363 dst.ReadUintFromMem(vform, i, addr); 364 } 365 } 366 367 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, 368 LogicVRegister dst2, uint64_t addr1) { 369 dst1.ClearForWrite(vform); 370 dst2.ClearForWrite(vform); 371 int esize = LaneSizeInBytesFromFormat(vform); 372 uint64_t addr2 = addr1 + esize; 373 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 374 dst1.ReadUintFromMem(vform, i, addr1); 375 dst2.ReadUintFromMem(vform, i, addr2); 376 addr1 += 2 * esize; 377 addr2 += 2 * esize; 378 } 379 } 380 381 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1, 382 LogicVRegister dst2, int index, uint64_t addr1) { 383 dst1.ClearForWrite(vform); 384 dst2.ClearForWrite(vform); 385 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 386 dst1.ReadUintFromMem(vform, index, addr1); 387 dst2.ReadUintFromMem(vform, index, addr2); 388 } 389 390 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1, 391 LogicVRegister dst2, uint64_t addr) { 392 dst1.ClearForWrite(vform); 393 dst2.ClearForWrite(vform); 394 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 395 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 396 dst1.ReadUintFromMem(vform, i, addr); 397 dst2.ReadUintFromMem(vform, i, addr2); 398 } 399 } 400 401 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, 402 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) { 403 dst1.ClearForWrite(vform); 404 dst2.ClearForWrite(vform); 405 dst3.ClearForWrite(vform); 406 int esize = LaneSizeInBytesFromFormat(vform); 407 uint64_t addr2 = addr1 + esize; 408 uint64_t addr3 = addr2 + esize; 409 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 410 dst1.ReadUintFromMem(vform, i, addr1); 411 dst2.ReadUintFromMem(vform, i, addr2); 412 dst3.ReadUintFromMem(vform, i, addr3); 413 addr1 += 3 * esize; 414 addr2 += 3 * esize; 415 addr3 += 3 * esize; 416 } 417 } 418 419 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1, 420 LogicVRegister dst2, LogicVRegister dst3, int index, 421 uint64_t addr1) { 422 dst1.ClearForWrite(vform); 423 dst2.ClearForWrite(vform); 424 dst3.ClearForWrite(vform); 425 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 426 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 427 dst1.ReadUintFromMem(vform, index, addr1); 428 dst2.ReadUintFromMem(vform, index, addr2); 429 dst3.ReadUintFromMem(vform, index, addr3); 430 } 431 432 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1, 433 LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) { 434 dst1.ClearForWrite(vform); 435 dst2.ClearForWrite(vform); 436 dst3.ClearForWrite(vform); 437 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 438 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 439 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 440 dst1.ReadUintFromMem(vform, i, addr); 441 dst2.ReadUintFromMem(vform, i, addr2); 442 dst3.ReadUintFromMem(vform, i, addr3); 443 } 444 } 445 446 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, 447 LogicVRegister dst2, LogicVRegister dst3, 448 LogicVRegister dst4, uint64_t addr1) { 449 dst1.ClearForWrite(vform); 450 dst2.ClearForWrite(vform); 451 dst3.ClearForWrite(vform); 452 dst4.ClearForWrite(vform); 453 int esize = LaneSizeInBytesFromFormat(vform); 454 uint64_t addr2 = addr1 + esize; 455 uint64_t addr3 = addr2 + esize; 456 uint64_t addr4 = addr3 + esize; 457 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 458 dst1.ReadUintFromMem(vform, i, addr1); 459 dst2.ReadUintFromMem(vform, i, addr2); 460 dst3.ReadUintFromMem(vform, i, addr3); 461 dst4.ReadUintFromMem(vform, i, addr4); 462 addr1 += 4 * esize; 463 addr2 += 4 * esize; 464 addr3 += 4 * esize; 465 addr4 += 4 * esize; 466 } 467 } 468 469 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1, 470 LogicVRegister dst2, LogicVRegister dst3, 471 LogicVRegister dst4, int index, uint64_t addr1) { 472 dst1.ClearForWrite(vform); 473 dst2.ClearForWrite(vform); 474 dst3.ClearForWrite(vform); 475 dst4.ClearForWrite(vform); 476 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform); 477 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 478 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 479 dst1.ReadUintFromMem(vform, index, addr1); 480 dst2.ReadUintFromMem(vform, index, addr2); 481 dst3.ReadUintFromMem(vform, index, addr3); 482 dst4.ReadUintFromMem(vform, index, addr4); 483 } 484 485 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1, 486 LogicVRegister dst2, LogicVRegister dst3, 487 LogicVRegister dst4, uint64_t addr) { 488 dst1.ClearForWrite(vform); 489 dst2.ClearForWrite(vform); 490 dst3.ClearForWrite(vform); 491 dst4.ClearForWrite(vform); 492 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform); 493 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform); 494 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform); 495 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 496 dst1.ReadUintFromMem(vform, i, addr); 497 dst2.ReadUintFromMem(vform, i, addr2); 498 dst3.ReadUintFromMem(vform, i, addr3); 499 dst4.ReadUintFromMem(vform, i, addr4); 500 } 501 } 502 503 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) { 504 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 505 src.WriteUintToMem(vform, i, addr); 506 addr += LaneSizeInBytesFromFormat(vform); 507 } 508 } 509 510 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index, 511 uint64_t addr) { 512 src.WriteUintToMem(vform, index, addr); 513 } 514 515 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 516 uint64_t addr) { 517 int esize = LaneSizeInBytesFromFormat(vform); 518 uint64_t addr2 = addr + esize; 519 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 520 dst.WriteUintToMem(vform, i, addr); 521 dst2.WriteUintToMem(vform, i, addr2); 522 addr += 2 * esize; 523 addr2 += 2 * esize; 524 } 525 } 526 527 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 528 int index, uint64_t addr) { 529 int esize = LaneSizeInBytesFromFormat(vform); 530 dst.WriteUintToMem(vform, index, addr); 531 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 532 } 533 534 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 535 LogicVRegister dst3, uint64_t addr) { 536 int esize = LaneSizeInBytesFromFormat(vform); 537 uint64_t addr2 = addr + esize; 538 uint64_t addr3 = addr2 + esize; 539 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 540 dst.WriteUintToMem(vform, i, addr); 541 dst2.WriteUintToMem(vform, i, addr2); 542 dst3.WriteUintToMem(vform, i, addr3); 543 addr += 3 * esize; 544 addr2 += 3 * esize; 545 addr3 += 3 * esize; 546 } 547 } 548 549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 550 LogicVRegister dst3, int index, uint64_t addr) { 551 int esize = LaneSizeInBytesFromFormat(vform); 552 dst.WriteUintToMem(vform, index, addr); 553 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 554 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 555 } 556 557 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 558 LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) { 559 int esize = LaneSizeInBytesFromFormat(vform); 560 uint64_t addr2 = addr + esize; 561 uint64_t addr3 = addr2 + esize; 562 uint64_t addr4 = addr3 + esize; 563 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 564 dst.WriteUintToMem(vform, i, addr); 565 dst2.WriteUintToMem(vform, i, addr2); 566 dst3.WriteUintToMem(vform, i, addr3); 567 dst4.WriteUintToMem(vform, i, addr4); 568 addr += 4 * esize; 569 addr2 += 4 * esize; 570 addr3 += 4 * esize; 571 addr4 += 4 * esize; 572 } 573 } 574 575 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2, 576 LogicVRegister dst3, LogicVRegister dst4, int index, 577 uint64_t addr) { 578 int esize = LaneSizeInBytesFromFormat(vform); 579 dst.WriteUintToMem(vform, index, addr); 580 dst2.WriteUintToMem(vform, index, addr + 1 * esize); 581 dst3.WriteUintToMem(vform, index, addr + 2 * esize); 582 dst4.WriteUintToMem(vform, index, addr + 3 * esize); 583 } 584 585 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, 586 const LogicVRegister& src1, 587 const LogicVRegister& src2, Condition cond) { 588 dst.ClearForWrite(vform); 589 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 590 int64_t sa = src1.Int(vform, i); 591 int64_t sb = src2.Int(vform, i); 592 uint64_t ua = src1.Uint(vform, i); 593 uint64_t ub = src2.Uint(vform, i); 594 bool result = false; 595 switch (cond) { 596 case eq: 597 result = (ua == ub); 598 break; 599 case ge: 600 result = (sa >= sb); 601 break; 602 case gt: 603 result = (sa > sb); 604 break; 605 case hi: 606 result = (ua > ub); 607 break; 608 case hs: 609 result = (ua >= ub); 610 break; 611 case lt: 612 result = (sa < sb); 613 break; 614 case le: 615 result = (sa <= sb); 616 break; 617 default: 618 UNREACHABLE(); 619 } 620 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 621 } 622 return dst; 623 } 624 625 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst, 626 const LogicVRegister& src1, int imm, 627 Condition cond) { 628 SimVRegister temp; 629 LogicVRegister imm_reg = dup_immediate(vform, temp, imm); 630 return cmp(vform, dst, src1, imm_reg, cond); 631 } 632 633 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst, 634 const LogicVRegister& src1, 635 const LogicVRegister& src2) { 636 dst.ClearForWrite(vform); 637 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 638 uint64_t ua = src1.Uint(vform, i); 639 uint64_t ub = src2.Uint(vform, i); 640 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0); 641 } 642 return dst; 643 } 644 645 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst, 646 const LogicVRegister& src1, 647 const LogicVRegister& src2) { 648 int lane_size = LaneSizeInBitsFromFormat(vform); 649 dst.ClearForWrite(vform); 650 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 651 // Test for unsigned saturation. 652 uint64_t ua = src1.UintLeftJustified(vform, i); 653 uint64_t ub = src2.UintLeftJustified(vform, i); 654 uint64_t ur = ua + ub; 655 if (ur < ua) { 656 dst.SetUnsignedSat(i, true); 657 } 658 659 // Test for signed saturation. 660 bool pos_a = (ua >> 63) == 0; 661 bool pos_b = (ub >> 63) == 0; 662 bool pos_r = (ur >> 63) == 0; 663 // If the signs of the operands are the same, but different from the result, 664 // there was an overflow. 665 if ((pos_a == pos_b) && (pos_a != pos_r)) { 666 dst.SetSignedSat(i, pos_a); 667 } 668 669 dst.SetInt(vform, i, ur >> (64 - lane_size)); 670 } 671 return dst; 672 } 673 674 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, 675 const LogicVRegister& src1, 676 const LogicVRegister& src2) { 677 SimVRegister temp1, temp2; 678 uzp1(vform, temp1, src1, src2); 679 uzp2(vform, temp2, src1, src2); 680 add(vform, dst, temp1, temp2); 681 return dst; 682 } 683 684 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, 685 const LogicVRegister& src1, 686 const LogicVRegister& src2) { 687 SimVRegister temp; 688 mul(vform, temp, src1, src2); 689 add(vform, dst, dst, temp); 690 return dst; 691 } 692 693 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, 694 const LogicVRegister& src1, 695 const LogicVRegister& src2) { 696 SimVRegister temp; 697 mul(vform, temp, src1, src2); 698 sub(vform, dst, dst, temp); 699 return dst; 700 } 701 702 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, 703 const LogicVRegister& src1, 704 const LogicVRegister& src2) { 705 dst.ClearForWrite(vform); 706 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 707 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i)); 708 } 709 return dst; 710 } 711 712 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst, 713 const LogicVRegister& src1, 714 const LogicVRegister& src2, int index) { 715 SimVRegister temp; 716 VectorFormat indexform = VectorFormatFillQ(vform); 717 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index)); 718 } 719 720 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst, 721 const LogicVRegister& src1, 722 const LogicVRegister& src2, int index) { 723 SimVRegister temp; 724 VectorFormat indexform = VectorFormatFillQ(vform); 725 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index)); 726 } 727 728 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst, 729 const LogicVRegister& src1, 730 const LogicVRegister& src2, int index) { 731 SimVRegister temp; 732 VectorFormat indexform = VectorFormatFillQ(vform); 733 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index)); 734 } 735 736 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, 737 const LogicVRegister& src1, 738 const LogicVRegister& src2, int index) { 739 SimVRegister temp; 740 VectorFormat indexform = 741 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 742 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 743 } 744 745 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, 746 const LogicVRegister& src1, 747 const LogicVRegister& src2, int index) { 748 SimVRegister temp; 749 VectorFormat indexform = 750 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 751 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 752 } 753 754 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, 755 const LogicVRegister& src1, 756 const LogicVRegister& src2, int index) { 757 SimVRegister temp; 758 VectorFormat indexform = 759 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 760 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 761 } 762 763 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, 764 const LogicVRegister& src1, 765 const LogicVRegister& src2, int index) { 766 SimVRegister temp; 767 VectorFormat indexform = 768 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 769 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 770 } 771 772 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, 773 const LogicVRegister& src1, 774 const LogicVRegister& src2, int index) { 775 SimVRegister temp; 776 VectorFormat indexform = 777 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 778 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 779 } 780 781 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, 782 const LogicVRegister& src1, 783 const LogicVRegister& src2, int index) { 784 SimVRegister temp; 785 VectorFormat indexform = 786 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 787 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 788 } 789 790 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, 791 const LogicVRegister& src1, 792 const LogicVRegister& src2, int index) { 793 SimVRegister temp; 794 VectorFormat indexform = 795 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 796 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 797 } 798 799 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, 800 const LogicVRegister& src1, 801 const LogicVRegister& src2, int index) { 802 SimVRegister temp; 803 VectorFormat indexform = 804 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 805 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 806 } 807 808 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, 809 const LogicVRegister& src1, 810 const LogicVRegister& src2, int index) { 811 SimVRegister temp; 812 VectorFormat indexform = 813 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 814 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 815 } 816 817 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, 818 const LogicVRegister& src1, 819 const LogicVRegister& src2, int index) { 820 SimVRegister temp; 821 VectorFormat indexform = 822 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 823 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 824 } 825 826 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, 827 const LogicVRegister& src1, 828 const LogicVRegister& src2, int index) { 829 SimVRegister temp; 830 VectorFormat indexform = 831 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 832 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 833 } 834 835 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, 836 const LogicVRegister& src1, 837 const LogicVRegister& src2, int index) { 838 SimVRegister temp; 839 VectorFormat indexform = 840 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 841 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 842 } 843 844 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, 845 const LogicVRegister& src1, 846 const LogicVRegister& src2, int index) { 847 SimVRegister temp; 848 VectorFormat indexform = 849 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 850 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index)); 851 } 852 853 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, 854 const LogicVRegister& src1, 855 const LogicVRegister& src2, int index) { 856 SimVRegister temp; 857 VectorFormat indexform = 858 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 859 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 860 } 861 862 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, 863 const LogicVRegister& src1, 864 const LogicVRegister& src2, int index) { 865 SimVRegister temp; 866 VectorFormat indexform = 867 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 868 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index)); 869 } 870 871 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, 872 const LogicVRegister& src1, 873 const LogicVRegister& src2, int index) { 874 SimVRegister temp; 875 VectorFormat indexform = 876 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 877 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 878 } 879 880 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, 881 const LogicVRegister& src1, 882 const LogicVRegister& src2, int index) { 883 SimVRegister temp; 884 VectorFormat indexform = 885 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 886 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index)); 887 } 888 889 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, 890 const LogicVRegister& src1, 891 const LogicVRegister& src2, int index) { 892 SimVRegister temp; 893 VectorFormat indexform = 894 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform)); 895 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index)); 896 } 897 898 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, 899 const LogicVRegister& src1, 900 const LogicVRegister& src2, int index) { 901 SimVRegister temp; 902 VectorFormat indexform = VectorFormatFillQ(vform); 903 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 904 } 905 906 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, 907 const LogicVRegister& src1, 908 const LogicVRegister& src2, int index) { 909 SimVRegister temp; 910 VectorFormat indexform = VectorFormatFillQ(vform); 911 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index)); 912 } 913 914 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) { 915 uint16_t result = 0; 916 uint16_t extended_op2 = op2; 917 for (int i = 0; i < 8; ++i) { 918 if ((op1 >> i) & 1) { 919 result = result ^ (extended_op2 << i); 920 } 921 } 922 return result; 923 } 924 925 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst, 926 const LogicVRegister& src1, 927 const LogicVRegister& src2) { 928 dst.ClearForWrite(vform); 929 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 930 dst.SetUint(vform, i, 931 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i))); 932 } 933 return dst; 934 } 935 936 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst, 937 const LogicVRegister& src1, 938 const LogicVRegister& src2) { 939 VectorFormat vform_src = VectorFormatHalfWidth(vform); 940 dst.ClearForWrite(vform); 941 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 942 dst.SetUint( 943 vform, i, 944 PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i))); 945 } 946 return dst; 947 } 948 949 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst, 950 const LogicVRegister& src1, 951 const LogicVRegister& src2) { 952 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform); 953 dst.ClearForWrite(vform); 954 int lane_count = LaneCountFromFormat(vform); 955 for (int i = 0; i < lane_count; i++) { 956 dst.SetUint(vform, i, 957 PolynomialMult(src1.Uint(vform_src, lane_count + i), 958 src2.Uint(vform_src, lane_count + i))); 959 } 960 return dst; 961 } 962 963 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst, 964 const LogicVRegister& src1, 965 const LogicVRegister& src2) { 966 int lane_size = LaneSizeInBitsFromFormat(vform); 967 dst.ClearForWrite(vform); 968 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 969 // Test for unsigned saturation. 970 uint64_t ua = src1.UintLeftJustified(vform, i); 971 uint64_t ub = src2.UintLeftJustified(vform, i); 972 uint64_t ur = ua - ub; 973 if (ub > ua) { 974 dst.SetUnsignedSat(i, false); 975 } 976 977 // Test for signed saturation. 978 bool pos_a = (ua >> 63) == 0; 979 bool pos_b = (ub >> 63) == 0; 980 bool pos_r = (ur >> 63) == 0; 981 // If the signs of the operands are different, and the sign of the first 982 // operand doesn't match the result, there was an overflow. 983 if ((pos_a != pos_b) && (pos_a != pos_r)) { 984 dst.SetSignedSat(i, pos_a); 985 } 986 987 dst.SetInt(vform, i, ur >> (64 - lane_size)); 988 } 989 return dst; 990 } 991 992 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst, 993 const LogicVRegister& src1, 994 const LogicVRegister& src2) { 995 dst.ClearForWrite(vform); 996 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 997 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i)); 998 } 999 return dst; 1000 } 1001 1002 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, 1003 const LogicVRegister& src1, 1004 const LogicVRegister& src2) { 1005 dst.ClearForWrite(vform); 1006 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1007 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i)); 1008 } 1009 return dst; 1010 } 1011 1012 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst, 1013 const LogicVRegister& src1, 1014 const LogicVRegister& src2) { 1015 dst.ClearForWrite(vform); 1016 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1017 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i)); 1018 } 1019 return dst; 1020 } 1021 1022 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst, 1023 const LogicVRegister& src1, 1024 const LogicVRegister& src2) { 1025 dst.ClearForWrite(vform); 1026 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1027 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i)); 1028 } 1029 return dst; 1030 } 1031 1032 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, 1033 const LogicVRegister& src1, 1034 const LogicVRegister& src2) { 1035 dst.ClearForWrite(vform); 1036 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1037 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i)); 1038 } 1039 return dst; 1040 } 1041 1042 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst, 1043 const LogicVRegister& src, uint64_t imm) { 1044 uint64_t result[16]; 1045 int laneCount = LaneCountFromFormat(vform); 1046 for (int i = 0; i < laneCount; ++i) { 1047 result[i] = src.Uint(vform, i) & ~imm; 1048 } 1049 dst.SetUintArray(vform, result); 1050 return dst; 1051 } 1052 1053 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst, 1054 const LogicVRegister& src1, 1055 const LogicVRegister& src2) { 1056 dst.ClearForWrite(vform); 1057 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1058 uint64_t operand1 = dst.Uint(vform, i); 1059 uint64_t operand2 = ~src2.Uint(vform, i); 1060 uint64_t operand3 = src1.Uint(vform, i); 1061 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1062 dst.SetUint(vform, i, result); 1063 } 1064 return dst; 1065 } 1066 1067 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst, 1068 const LogicVRegister& src1, 1069 const LogicVRegister& src2) { 1070 dst.ClearForWrite(vform); 1071 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1072 uint64_t operand1 = dst.Uint(vform, i); 1073 uint64_t operand2 = src2.Uint(vform, i); 1074 uint64_t operand3 = src1.Uint(vform, i); 1075 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1076 dst.SetUint(vform, i, result); 1077 } 1078 return dst; 1079 } 1080 1081 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst, 1082 const LogicVRegister& src1, 1083 const LogicVRegister& src2) { 1084 dst.ClearForWrite(vform); 1085 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1086 uint64_t operand1 = src2.Uint(vform, i); 1087 uint64_t operand2 = dst.Uint(vform, i); 1088 uint64_t operand3 = src1.Uint(vform, i); 1089 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2); 1090 dst.SetUint(vform, i, result); 1091 } 1092 return dst; 1093 } 1094 1095 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst, 1096 const LogicVRegister& src1, 1097 const LogicVRegister& src2, bool max) { 1098 dst.ClearForWrite(vform); 1099 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1100 int64_t src1_val = src1.Int(vform, i); 1101 int64_t src2_val = src2.Int(vform, i); 1102 int64_t dst_val; 1103 if (max) { 1104 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1105 } else { 1106 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1107 } 1108 dst.SetInt(vform, i, dst_val); 1109 } 1110 return dst; 1111 } 1112 1113 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst, 1114 const LogicVRegister& src1, 1115 const LogicVRegister& src2) { 1116 return SMinMax(vform, dst, src1, src2, true); 1117 } 1118 1119 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst, 1120 const LogicVRegister& src1, 1121 const LogicVRegister& src2) { 1122 return SMinMax(vform, dst, src1, src2, false); 1123 } 1124 1125 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst, 1126 const LogicVRegister& src1, 1127 const LogicVRegister& src2, bool max) { 1128 int lanes = LaneCountFromFormat(vform); 1129 int64_t result[kMaxLanesPerVector]; 1130 const LogicVRegister* src = &src1; 1131 for (int j = 0; j < 2; j++) { 1132 for (int i = 0; i < lanes; i += 2) { 1133 int64_t first_val = src->Int(vform, i); 1134 int64_t second_val = src->Int(vform, i + 1); 1135 int64_t dst_val; 1136 if (max) { 1137 dst_val = (first_val > second_val) ? first_val : second_val; 1138 } else { 1139 dst_val = (first_val < second_val) ? first_val : second_val; 1140 } 1141 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); 1142 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1143 } 1144 src = &src2; 1145 } 1146 dst.SetIntArray(vform, result); 1147 return dst; 1148 } 1149 1150 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst, 1151 const LogicVRegister& src1, 1152 const LogicVRegister& src2) { 1153 return SMinMaxP(vform, dst, src1, src2, true); 1154 } 1155 1156 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst, 1157 const LogicVRegister& src1, 1158 const LogicVRegister& src2) { 1159 return SMinMaxP(vform, dst, src1, src2, false); 1160 } 1161 1162 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst, 1163 const LogicVRegister& src) { 1164 DCHECK_EQ(vform, kFormatD); 1165 1166 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1); 1167 dst.ClearForWrite(vform); 1168 dst.SetUint(vform, 0, dst_val); 1169 return dst; 1170 } 1171 1172 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst, 1173 const LogicVRegister& src) { 1174 VectorFormat vform_dst = 1175 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform)); 1176 1177 int64_t dst_val = 0; 1178 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1179 dst_val += src.Int(vform, i); 1180 } 1181 1182 dst.ClearForWrite(vform_dst); 1183 dst.SetInt(vform_dst, 0, dst_val); 1184 return dst; 1185 } 1186 1187 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst, 1188 const LogicVRegister& src) { 1189 VectorFormat vform_dst = 1190 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1191 1192 int64_t dst_val = 0; 1193 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1194 dst_val += src.Int(vform, i); 1195 } 1196 1197 dst.ClearForWrite(vform_dst); 1198 dst.SetInt(vform_dst, 0, dst_val); 1199 return dst; 1200 } 1201 1202 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst, 1203 const LogicVRegister& src) { 1204 VectorFormat vform_dst = 1205 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2); 1206 1207 uint64_t dst_val = 0; 1208 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1209 dst_val += src.Uint(vform, i); 1210 } 1211 1212 dst.ClearForWrite(vform_dst); 1213 dst.SetUint(vform_dst, 0, dst_val); 1214 return dst; 1215 } 1216 1217 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst, 1218 const LogicVRegister& src, bool max) { 1219 int64_t dst_val = max ? INT64_MIN : INT64_MAX; 1220 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1221 int64_t src_val = src.Int(vform, i); 1222 if (max) { 1223 dst_val = (src_val > dst_val) ? src_val : dst_val; 1224 } else { 1225 dst_val = (src_val < dst_val) ? src_val : dst_val; 1226 } 1227 } 1228 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1229 dst.SetInt(vform, 0, dst_val); 1230 return dst; 1231 } 1232 1233 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst, 1234 const LogicVRegister& src) { 1235 SMinMaxV(vform, dst, src, true); 1236 return dst; 1237 } 1238 1239 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst, 1240 const LogicVRegister& src) { 1241 SMinMaxV(vform, dst, src, false); 1242 return dst; 1243 } 1244 1245 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst, 1246 const LogicVRegister& src1, 1247 const LogicVRegister& src2, bool max) { 1248 dst.ClearForWrite(vform); 1249 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1250 uint64_t src1_val = src1.Uint(vform, i); 1251 uint64_t src2_val = src2.Uint(vform, i); 1252 uint64_t dst_val; 1253 if (max) { 1254 dst_val = (src1_val > src2_val) ? src1_val : src2_val; 1255 } else { 1256 dst_val = (src1_val < src2_val) ? src1_val : src2_val; 1257 } 1258 dst.SetUint(vform, i, dst_val); 1259 } 1260 return dst; 1261 } 1262 1263 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst, 1264 const LogicVRegister& src1, 1265 const LogicVRegister& src2) { 1266 return UMinMax(vform, dst, src1, src2, true); 1267 } 1268 1269 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst, 1270 const LogicVRegister& src1, 1271 const LogicVRegister& src2) { 1272 return UMinMax(vform, dst, src1, src2, false); 1273 } 1274 1275 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst, 1276 const LogicVRegister& src1, 1277 const LogicVRegister& src2, bool max) { 1278 int lanes = LaneCountFromFormat(vform); 1279 uint64_t result[kMaxLanesPerVector]; 1280 const LogicVRegister* src = &src1; 1281 for (int j = 0; j < 2; j++) { 1282 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) { 1283 uint64_t first_val = src->Uint(vform, i); 1284 uint64_t second_val = src->Uint(vform, i + 1); 1285 uint64_t dst_val; 1286 if (max) { 1287 dst_val = (first_val > second_val) ? first_val : second_val; 1288 } else { 1289 dst_val = (first_val < second_val) ? first_val : second_val; 1290 } 1291 DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector); 1292 result[(i >> 1) + (j * lanes / 2)] = dst_val; 1293 } 1294 src = &src2; 1295 } 1296 dst.SetUintArray(vform, result); 1297 return dst; 1298 } 1299 1300 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst, 1301 const LogicVRegister& src1, 1302 const LogicVRegister& src2) { 1303 return UMinMaxP(vform, dst, src1, src2, true); 1304 } 1305 1306 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst, 1307 const LogicVRegister& src1, 1308 const LogicVRegister& src2) { 1309 return UMinMaxP(vform, dst, src1, src2, false); 1310 } 1311 1312 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst, 1313 const LogicVRegister& src, bool max) { 1314 uint64_t dst_val = max ? 0 : UINT64_MAX; 1315 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1316 uint64_t src_val = src.Uint(vform, i); 1317 if (max) { 1318 dst_val = (src_val > dst_val) ? src_val : dst_val; 1319 } else { 1320 dst_val = (src_val < dst_val) ? src_val : dst_val; 1321 } 1322 } 1323 dst.ClearForWrite(ScalarFormatFromFormat(vform)); 1324 dst.SetUint(vform, 0, dst_val); 1325 return dst; 1326 } 1327 1328 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst, 1329 const LogicVRegister& src) { 1330 UMinMaxV(vform, dst, src, true); 1331 return dst; 1332 } 1333 1334 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst, 1335 const LogicVRegister& src) { 1336 UMinMaxV(vform, dst, src, false); 1337 return dst; 1338 } 1339 1340 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst, 1341 const LogicVRegister& src, int shift) { 1342 DCHECK_GE(shift, 0); 1343 SimVRegister temp; 1344 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1345 return ushl(vform, dst, src, shiftreg); 1346 } 1347 1348 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst, 1349 const LogicVRegister& src, int shift) { 1350 DCHECK_GE(shift, 0); 1351 SimVRegister temp1, temp2; 1352 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1353 LogicVRegister extendedreg = sxtl(vform, temp2, src); 1354 return sshl(vform, dst, extendedreg, shiftreg); 1355 } 1356 1357 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst, 1358 const LogicVRegister& src, int shift) { 1359 DCHECK_GE(shift, 0); 1360 SimVRegister temp1, temp2; 1361 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1362 LogicVRegister extendedreg = sxtl2(vform, temp2, src); 1363 return sshl(vform, dst, extendedreg, shiftreg); 1364 } 1365 1366 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst, 1367 const LogicVRegister& src) { 1368 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1369 return sshll(vform, dst, src, shift); 1370 } 1371 1372 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst, 1373 const LogicVRegister& src) { 1374 int shift = LaneSizeInBitsFromFormat(vform) / 2; 1375 return sshll2(vform, dst, src, shift); 1376 } 1377 1378 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst, 1379 const LogicVRegister& src, int shift) { 1380 DCHECK_GE(shift, 0); 1381 SimVRegister temp1, temp2; 1382 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1383 LogicVRegister extendedreg = uxtl(vform, temp2, src); 1384 return ushl(vform, dst, extendedreg, shiftreg); 1385 } 1386 1387 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst, 1388 const LogicVRegister& src, int shift) { 1389 DCHECK_GE(shift, 0); 1390 SimVRegister temp1, temp2; 1391 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift); 1392 LogicVRegister extendedreg = uxtl2(vform, temp2, src); 1393 return ushl(vform, dst, extendedreg, shiftreg); 1394 } 1395 1396 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst, 1397 const LogicVRegister& src, int shift) { 1398 dst.ClearForWrite(vform); 1399 int laneCount = LaneCountFromFormat(vform); 1400 for (int i = 0; i < laneCount; i++) { 1401 uint64_t src_lane = src.Uint(vform, i); 1402 uint64_t dst_lane = dst.Uint(vform, i); 1403 uint64_t shifted = src_lane << shift; 1404 uint64_t mask = MaxUintFromFormat(vform) << shift; 1405 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1406 } 1407 return dst; 1408 } 1409 1410 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst, 1411 const LogicVRegister& src, int shift) { 1412 DCHECK_GE(shift, 0); 1413 SimVRegister temp; 1414 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1415 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform); 1416 } 1417 1418 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst, 1419 const LogicVRegister& src, int shift) { 1420 DCHECK_GE(shift, 0); 1421 SimVRegister temp; 1422 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1423 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1424 } 1425 1426 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst, 1427 const LogicVRegister& src, int shift) { 1428 DCHECK_GE(shift, 0); 1429 SimVRegister temp; 1430 LogicVRegister shiftreg = dup_immediate(vform, temp, shift); 1431 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform); 1432 } 1433 1434 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst, 1435 const LogicVRegister& src, int shift) { 1436 dst.ClearForWrite(vform); 1437 int laneCount = LaneCountFromFormat(vform); 1438 DCHECK((shift > 0) && 1439 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform)))); 1440 for (int i = 0; i < laneCount; i++) { 1441 uint64_t src_lane = src.Uint(vform, i); 1442 uint64_t dst_lane = dst.Uint(vform, i); 1443 uint64_t shifted; 1444 uint64_t mask; 1445 if (shift == 64) { 1446 shifted = 0; 1447 mask = 0; 1448 } else { 1449 shifted = src_lane >> shift; 1450 mask = MaxUintFromFormat(vform) >> shift; 1451 } 1452 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted); 1453 } 1454 return dst; 1455 } 1456 1457 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst, 1458 const LogicVRegister& src, int shift) { 1459 DCHECK_GE(shift, 0); 1460 SimVRegister temp; 1461 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1462 return ushl(vform, dst, src, shiftreg); 1463 } 1464 1465 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst, 1466 const LogicVRegister& src, int shift) { 1467 DCHECK_GE(shift, 0); 1468 SimVRegister temp; 1469 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift); 1470 return sshl(vform, dst, src, shiftreg); 1471 } 1472 1473 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst, 1474 const LogicVRegister& src, int shift) { 1475 SimVRegister temp; 1476 LogicVRegister shifted_reg = sshr(vform, temp, src, shift); 1477 return add(vform, dst, dst, shifted_reg); 1478 } 1479 1480 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst, 1481 const LogicVRegister& src, int shift) { 1482 SimVRegister temp; 1483 LogicVRegister shifted_reg = ushr(vform, temp, src, shift); 1484 return add(vform, dst, dst, shifted_reg); 1485 } 1486 1487 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst, 1488 const LogicVRegister& src, int shift) { 1489 SimVRegister temp; 1490 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform); 1491 return add(vform, dst, dst, shifted_reg); 1492 } 1493 1494 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst, 1495 const LogicVRegister& src, int shift) { 1496 SimVRegister temp; 1497 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform); 1498 return add(vform, dst, dst, shifted_reg); 1499 } 1500 1501 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst, 1502 const LogicVRegister& src) { 1503 uint64_t result[16]; 1504 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1505 int laneCount = LaneCountFromFormat(vform); 1506 for (int i = 0; i < laneCount; i++) { 1507 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits); 1508 } 1509 1510 dst.SetUintArray(vform, result); 1511 return dst; 1512 } 1513 1514 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst, 1515 const LogicVRegister& src) { 1516 uint64_t result[16]; 1517 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1518 int laneCount = LaneCountFromFormat(vform); 1519 for (int i = 0; i < laneCount; i++) { 1520 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits); 1521 } 1522 1523 dst.SetUintArray(vform, result); 1524 return dst; 1525 } 1526 1527 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst, 1528 const LogicVRegister& src) { 1529 uint64_t result[16]; 1530 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1531 int laneCount = LaneCountFromFormat(vform); 1532 for (int i = 0; i < laneCount; i++) { 1533 uint64_t value = src.Uint(vform, i); 1534 result[i] = 0; 1535 for (int j = 0; j < laneSizeInBits; j++) { 1536 result[i] += (value & 1); 1537 value >>= 1; 1538 } 1539 } 1540 1541 dst.SetUintArray(vform, result); 1542 return dst; 1543 } 1544 1545 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst, 1546 const LogicVRegister& src1, 1547 const LogicVRegister& src2) { 1548 dst.ClearForWrite(vform); 1549 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1550 int8_t shift_val = src2.Int(vform, i); 1551 int64_t lj_src_val = src1.IntLeftJustified(vform, i); 1552 1553 // Set signed saturation state. 1554 if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) && 1555 (lj_src_val != 0)) { 1556 dst.SetSignedSat(i, lj_src_val >= 0); 1557 } 1558 1559 // Set unsigned saturation state. 1560 if (lj_src_val < 0) { 1561 dst.SetUnsignedSat(i, false); 1562 } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && 1563 (lj_src_val != 0)) { 1564 dst.SetUnsignedSat(i, true); 1565 } 1566 1567 int64_t src_val = src1.Int(vform, i); 1568 bool src_is_negative = src_val < 0; 1569 if (shift_val > 63) { 1570 dst.SetInt(vform, i, 0); 1571 } else if (shift_val < -63) { 1572 dst.SetRounding(i, src_is_negative); 1573 dst.SetInt(vform, i, src_is_negative ? -1 : 0); 1574 } else { 1575 // Use unsigned types for shifts, as behaviour is undefined for signed 1576 // lhs. 1577 uint64_t usrc_val = static_cast<uint64_t>(src_val); 1578 1579 if (shift_val < 0) { 1580 // Convert to right shift. 1581 shift_val = -shift_val; 1582 1583 // Set rounding state by testing most-significant bit shifted out. 1584 // Rounding only needed on right shifts. 1585 if (((usrc_val >> (shift_val - 1)) & 1) == 1) { 1586 dst.SetRounding(i, true); 1587 } 1588 1589 usrc_val >>= shift_val; 1590 1591 if (src_is_negative) { 1592 // Simulate sign-extension. 1593 usrc_val |= (~UINT64_C(0) << (64 - shift_val)); 1594 } 1595 } else { 1596 usrc_val <<= shift_val; 1597 } 1598 dst.SetUint(vform, i, usrc_val); 1599 } 1600 } 1601 return dst; 1602 } 1603 1604 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst, 1605 const LogicVRegister& src1, 1606 const LogicVRegister& src2) { 1607 dst.ClearForWrite(vform); 1608 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1609 int8_t shift_val = src2.Int(vform, i); 1610 uint64_t lj_src_val = src1.UintLeftJustified(vform, i); 1611 1612 // Set saturation state. 1613 if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) { 1614 dst.SetUnsignedSat(i, true); 1615 } 1616 1617 uint64_t src_val = src1.Uint(vform, i); 1618 if ((shift_val > 63) || (shift_val < -64)) { 1619 dst.SetUint(vform, i, 0); 1620 } else { 1621 if (shift_val < 0) { 1622 // Set rounding state. Rounding only needed on right shifts. 1623 if (((src_val >> (-shift_val - 1)) & 1) == 1) { 1624 dst.SetRounding(i, true); 1625 } 1626 1627 if (shift_val == -64) { 1628 src_val = 0; 1629 } else { 1630 src_val >>= -shift_val; 1631 } 1632 } else { 1633 src_val <<= shift_val; 1634 } 1635 dst.SetUint(vform, i, src_val); 1636 } 1637 } 1638 return dst; 1639 } 1640 1641 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst, 1642 const LogicVRegister& src) { 1643 dst.ClearForWrite(vform); 1644 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1645 // Test for signed saturation. 1646 int64_t sa = src.Int(vform, i); 1647 if (sa == MinIntFromFormat(vform)) { 1648 dst.SetSignedSat(i, true); 1649 } 1650 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1651 } 1652 return dst; 1653 } 1654 1655 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst, 1656 const LogicVRegister& src) { 1657 dst.ClearForWrite(vform); 1658 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1659 int64_t sa = dst.IntLeftJustified(vform, i); 1660 uint64_t ub = src.UintLeftJustified(vform, i); 1661 uint64_t ur = sa + ub; 1662 1663 int64_t sr = bit_cast<int64_t>(ur); 1664 if (sr < sa) { // Test for signed positive saturation. 1665 dst.SetInt(vform, i, MaxIntFromFormat(vform)); 1666 } else { 1667 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i)); 1668 } 1669 } 1670 return dst; 1671 } 1672 1673 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst, 1674 const LogicVRegister& src) { 1675 dst.ClearForWrite(vform); 1676 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1677 uint64_t ua = dst.UintLeftJustified(vform, i); 1678 int64_t sb = src.IntLeftJustified(vform, i); 1679 uint64_t ur = ua + sb; 1680 1681 if ((sb > 0) && (ur <= ua)) { 1682 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation. 1683 } else if ((sb < 0) && (ur >= ua)) { 1684 dst.SetUint(vform, i, 0); // Negative saturation. 1685 } else { 1686 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i)); 1687 } 1688 } 1689 return dst; 1690 } 1691 1692 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst, 1693 const LogicVRegister& src) { 1694 dst.ClearForWrite(vform); 1695 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1696 // Test for signed saturation. 1697 int64_t sa = src.Int(vform, i); 1698 if (sa == MinIntFromFormat(vform)) { 1699 dst.SetSignedSat(i, true); 1700 } 1701 if (sa < 0) { 1702 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa); 1703 } else { 1704 dst.SetInt(vform, i, sa); 1705 } 1706 } 1707 return dst; 1708 } 1709 1710 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform, 1711 LogicVRegister dst, bool dstIsSigned, 1712 const LogicVRegister& src, 1713 bool srcIsSigned) { 1714 bool upperhalf = false; 1715 VectorFormat srcform = kFormatUndefined; 1716 int64_t ssrc[8]; 1717 uint64_t usrc[8]; 1718 1719 switch (dstform) { 1720 case kFormat8B: 1721 upperhalf = false; 1722 srcform = kFormat8H; 1723 break; 1724 case kFormat16B: 1725 upperhalf = true; 1726 srcform = kFormat8H; 1727 break; 1728 case kFormat4H: 1729 upperhalf = false; 1730 srcform = kFormat4S; 1731 break; 1732 case kFormat8H: 1733 upperhalf = true; 1734 srcform = kFormat4S; 1735 break; 1736 case kFormat2S: 1737 upperhalf = false; 1738 srcform = kFormat2D; 1739 break; 1740 case kFormat4S: 1741 upperhalf = true; 1742 srcform = kFormat2D; 1743 break; 1744 case kFormatB: 1745 upperhalf = false; 1746 srcform = kFormatH; 1747 break; 1748 case kFormatH: 1749 upperhalf = false; 1750 srcform = kFormatS; 1751 break; 1752 case kFormatS: 1753 upperhalf = false; 1754 srcform = kFormatD; 1755 break; 1756 default: 1757 UNIMPLEMENTED(); 1758 } 1759 1760 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1761 ssrc[i] = src.Int(srcform, i); 1762 usrc[i] = src.Uint(srcform, i); 1763 } 1764 1765 int offset; 1766 if (upperhalf) { 1767 offset = LaneCountFromFormat(dstform) / 2; 1768 } else { 1769 offset = 0; 1770 dst.ClearForWrite(dstform); 1771 } 1772 1773 for (int i = 0; i < LaneCountFromFormat(srcform); i++) { 1774 // Test for signed saturation 1775 if (ssrc[i] > MaxIntFromFormat(dstform)) { 1776 dst.SetSignedSat(offset + i, true); 1777 } else if (ssrc[i] < MinIntFromFormat(dstform)) { 1778 dst.SetSignedSat(offset + i, false); 1779 } 1780 1781 // Test for unsigned saturation 1782 if (srcIsSigned) { 1783 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) { 1784 dst.SetUnsignedSat(offset + i, true); 1785 } else if (ssrc[i] < 0) { 1786 dst.SetUnsignedSat(offset + i, false); 1787 } 1788 } else { 1789 if (usrc[i] > MaxUintFromFormat(dstform)) { 1790 dst.SetUnsignedSat(offset + i, true); 1791 } 1792 } 1793 1794 int64_t result; 1795 if (srcIsSigned) { 1796 result = ssrc[i] & MaxUintFromFormat(dstform); 1797 } else { 1798 result = usrc[i] & MaxUintFromFormat(dstform); 1799 } 1800 1801 if (dstIsSigned) { 1802 dst.SetInt(dstform, offset + i, result); 1803 } else { 1804 dst.SetUint(dstform, offset + i, result); 1805 } 1806 } 1807 return dst; 1808 } 1809 1810 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst, 1811 const LogicVRegister& src) { 1812 return ExtractNarrow(vform, dst, true, src, true); 1813 } 1814 1815 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst, 1816 const LogicVRegister& src) { 1817 return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform); 1818 } 1819 1820 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst, 1821 const LogicVRegister& src) { 1822 return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform); 1823 } 1824 1825 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst, 1826 const LogicVRegister& src) { 1827 return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform); 1828 } 1829 1830 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst, 1831 const LogicVRegister& src1, 1832 const LogicVRegister& src2, bool issigned) { 1833 dst.ClearForWrite(vform); 1834 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1835 if (issigned) { 1836 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i); 1837 sr = sr > 0 ? sr : -sr; 1838 dst.SetInt(vform, i, sr); 1839 } else { 1840 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i); 1841 sr = sr > 0 ? sr : -sr; 1842 dst.SetUint(vform, i, sr); 1843 } 1844 } 1845 return dst; 1846 } 1847 1848 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst, 1849 const LogicVRegister& src1, 1850 const LogicVRegister& src2) { 1851 SimVRegister temp; 1852 dst.ClearForWrite(vform); 1853 AbsDiff(vform, temp, src1, src2, true); 1854 add(vform, dst, dst, temp); 1855 return dst; 1856 } 1857 1858 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst, 1859 const LogicVRegister& src1, 1860 const LogicVRegister& src2) { 1861 SimVRegister temp; 1862 dst.ClearForWrite(vform); 1863 AbsDiff(vform, temp, src1, src2, false); 1864 add(vform, dst, dst, temp); 1865 return dst; 1866 } 1867 1868 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst, 1869 const LogicVRegister& src) { 1870 dst.ClearForWrite(vform); 1871 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 1872 dst.SetUint(vform, i, ~src.Uint(vform, i)); 1873 } 1874 return dst; 1875 } 1876 1877 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst, 1878 const LogicVRegister& src) { 1879 uint64_t result[16]; 1880 int laneCount = LaneCountFromFormat(vform); 1881 int laneSizeInBits = LaneSizeInBitsFromFormat(vform); 1882 uint64_t reversed_value; 1883 uint64_t value; 1884 for (int i = 0; i < laneCount; i++) { 1885 value = src.Uint(vform, i); 1886 reversed_value = 0; 1887 for (int j = 0; j < laneSizeInBits; j++) { 1888 reversed_value = (reversed_value << 1) | (value & 1); 1889 value >>= 1; 1890 } 1891 result[i] = reversed_value; 1892 } 1893 1894 dst.SetUintArray(vform, result); 1895 return dst; 1896 } 1897 1898 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst, 1899 const LogicVRegister& src, int revSize) { 1900 uint64_t result[16]; 1901 int laneCount = LaneCountFromFormat(vform); 1902 int laneSize = LaneSizeInBytesFromFormat(vform); 1903 int lanesPerLoop = revSize / laneSize; 1904 for (int i = 0; i < laneCount; i += lanesPerLoop) { 1905 for (int j = 0; j < lanesPerLoop; j++) { 1906 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j); 1907 } 1908 } 1909 dst.SetUintArray(vform, result); 1910 return dst; 1911 } 1912 1913 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst, 1914 const LogicVRegister& src) { 1915 return rev(vform, dst, src, 2); 1916 } 1917 1918 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst, 1919 const LogicVRegister& src) { 1920 return rev(vform, dst, src, 4); 1921 } 1922 1923 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst, 1924 const LogicVRegister& src) { 1925 return rev(vform, dst, src, 8); 1926 } 1927 1928 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst, 1929 const LogicVRegister& src, bool is_signed, 1930 bool do_accumulate) { 1931 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform); 1932 DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U); 1933 DCHECK_LE(LaneCountFromFormat(vform), 8); 1934 1935 uint64_t result[8]; 1936 int lane_count = LaneCountFromFormat(vform); 1937 for (int i = 0; i < lane_count; i++) { 1938 if (is_signed) { 1939 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) + 1940 src.Int(vformsrc, 2 * i + 1)); 1941 } else { 1942 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1); 1943 } 1944 } 1945 1946 dst.ClearForWrite(vform); 1947 for (int i = 0; i < lane_count; ++i) { 1948 if (do_accumulate) { 1949 result[i] += dst.Uint(vform, i); 1950 } 1951 dst.SetUint(vform, i, result[i]); 1952 } 1953 1954 return dst; 1955 } 1956 1957 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst, 1958 const LogicVRegister& src) { 1959 return addlp(vform, dst, src, true, false); 1960 } 1961 1962 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst, 1963 const LogicVRegister& src) { 1964 return addlp(vform, dst, src, false, false); 1965 } 1966 1967 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst, 1968 const LogicVRegister& src) { 1969 return addlp(vform, dst, src, true, true); 1970 } 1971 1972 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst, 1973 const LogicVRegister& src) { 1974 return addlp(vform, dst, src, false, true); 1975 } 1976 1977 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst, 1978 const LogicVRegister& src1, 1979 const LogicVRegister& src2, int index) { 1980 uint8_t result[16]; 1981 int laneCount = LaneCountFromFormat(vform); 1982 for (int i = 0; i < laneCount - index; ++i) { 1983 result[i] = src1.Uint(vform, i + index); 1984 } 1985 for (int i = 0; i < index; ++i) { 1986 result[laneCount - index + i] = src2.Uint(vform, i); 1987 } 1988 dst.ClearForWrite(vform); 1989 for (int i = 0; i < laneCount; ++i) { 1990 dst.SetUint(vform, i, result[i]); 1991 } 1992 return dst; 1993 } 1994 1995 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst, 1996 const LogicVRegister& src, 1997 int src_index) { 1998 int laneCount = LaneCountFromFormat(vform); 1999 uint64_t value = src.Uint(vform, src_index); 2000 dst.ClearForWrite(vform); 2001 for (int i = 0; i < laneCount; ++i) { 2002 dst.SetUint(vform, i, value); 2003 } 2004 return dst; 2005 } 2006 2007 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst, 2008 uint64_t imm) { 2009 int laneCount = LaneCountFromFormat(vform); 2010 uint64_t value = imm & MaxUintFromFormat(vform); 2011 dst.ClearForWrite(vform); 2012 for (int i = 0; i < laneCount; ++i) { 2013 dst.SetUint(vform, i, value); 2014 } 2015 return dst; 2016 } 2017 2018 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst, 2019 int dst_index, const LogicVRegister& src, 2020 int src_index) { 2021 dst.SetUint(vform, dst_index, src.Uint(vform, src_index)); 2022 return dst; 2023 } 2024 2025 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst, 2026 int dst_index, uint64_t imm) { 2027 uint64_t value = imm & MaxUintFromFormat(vform); 2028 dst.SetUint(vform, dst_index, value); 2029 return dst; 2030 } 2031 2032 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst, 2033 uint64_t imm) { 2034 int laneCount = LaneCountFromFormat(vform); 2035 dst.ClearForWrite(vform); 2036 for (int i = 0; i < laneCount; ++i) { 2037 dst.SetUint(vform, i, imm); 2038 } 2039 return dst; 2040 } 2041 2042 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst, 2043 uint64_t imm) { 2044 int laneCount = LaneCountFromFormat(vform); 2045 dst.ClearForWrite(vform); 2046 for (int i = 0; i < laneCount; ++i) { 2047 dst.SetUint(vform, i, ~imm); 2048 } 2049 return dst; 2050 } 2051 2052 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst, 2053 const LogicVRegister& src, uint64_t imm) { 2054 uint64_t result[16]; 2055 int laneCount = LaneCountFromFormat(vform); 2056 for (int i = 0; i < laneCount; ++i) { 2057 result[i] = src.Uint(vform, i) | imm; 2058 } 2059 dst.SetUintArray(vform, result); 2060 return dst; 2061 } 2062 2063 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst, 2064 const LogicVRegister& src) { 2065 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2066 2067 dst.ClearForWrite(vform); 2068 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2069 dst.SetUint(vform, i, src.Uint(vform_half, i)); 2070 } 2071 return dst; 2072 } 2073 2074 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst, 2075 const LogicVRegister& src) { 2076 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2077 2078 dst.ClearForWrite(vform); 2079 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2080 dst.SetInt(vform, i, src.Int(vform_half, i)); 2081 } 2082 return dst; 2083 } 2084 2085 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst, 2086 const LogicVRegister& src) { 2087 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2088 int lane_count = LaneCountFromFormat(vform); 2089 2090 dst.ClearForWrite(vform); 2091 for (int i = 0; i < lane_count; i++) { 2092 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i)); 2093 } 2094 return dst; 2095 } 2096 2097 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst, 2098 const LogicVRegister& src) { 2099 VectorFormat vform_half = VectorFormatHalfWidth(vform); 2100 int lane_count = LaneCountFromFormat(vform); 2101 2102 dst.ClearForWrite(vform); 2103 for (int i = 0; i < lane_count; i++) { 2104 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i)); 2105 } 2106 return dst; 2107 } 2108 2109 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst, 2110 const LogicVRegister& src, int shift) { 2111 SimVRegister temp; 2112 VectorFormat vform_src = VectorFormatDoubleWidth(vform); 2113 VectorFormat vform_dst = vform; 2114 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift); 2115 return ExtractNarrow(vform_dst, dst, false, shifted_src, false); 2116 } 2117 2118 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst, 2119 const LogicVRegister& src, int shift) { 2120 SimVRegister temp; 2121 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2122 VectorFormat vformdst = vform; 2123 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift); 2124 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2125 } 2126 2127 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst, 2128 const LogicVRegister& src, int shift) { 2129 SimVRegister temp; 2130 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2131 VectorFormat vformdst = vform; 2132 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2133 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2134 } 2135 2136 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst, 2137 const LogicVRegister& src, int shift) { 2138 SimVRegister temp; 2139 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2140 VectorFormat vformdst = vform; 2141 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc); 2142 return ExtractNarrow(vformdst, dst, false, shifted_src, false); 2143 } 2144 2145 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst, 2146 const LogicVRegister& ind, 2147 bool zero_out_of_bounds, 2148 const LogicVRegister* tab1, 2149 const LogicVRegister* tab2, 2150 const LogicVRegister* tab3, 2151 const LogicVRegister* tab4) { 2152 DCHECK_NOT_NULL(tab1); 2153 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4}; 2154 uint64_t result[kMaxLanesPerVector]; 2155 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2156 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i); 2157 } 2158 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2159 uint64_t j = ind.Uint(vform, i); 2160 int tab_idx = static_cast<int>(j >> 4); 2161 int j_idx = static_cast<int>(j & 15); 2162 if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) { 2163 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx); 2164 } 2165 } 2166 dst.SetUintArray(vform, result); 2167 return dst; 2168 } 2169 2170 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2171 const LogicVRegister& tab, 2172 const LogicVRegister& ind) { 2173 return Table(vform, dst, ind, true, &tab); 2174 } 2175 2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2177 const LogicVRegister& tab, 2178 const LogicVRegister& tab2, 2179 const LogicVRegister& ind) { 2180 return Table(vform, dst, ind, true, &tab, &tab2); 2181 } 2182 2183 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2184 const LogicVRegister& tab, 2185 const LogicVRegister& tab2, 2186 const LogicVRegister& tab3, 2187 const LogicVRegister& ind) { 2188 return Table(vform, dst, ind, true, &tab, &tab2, &tab3); 2189 } 2190 2191 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst, 2192 const LogicVRegister& tab, 2193 const LogicVRegister& tab2, 2194 const LogicVRegister& tab3, 2195 const LogicVRegister& tab4, 2196 const LogicVRegister& ind) { 2197 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4); 2198 } 2199 2200 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2201 const LogicVRegister& tab, 2202 const LogicVRegister& ind) { 2203 return Table(vform, dst, ind, false, &tab); 2204 } 2205 2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2207 const LogicVRegister& tab, 2208 const LogicVRegister& tab2, 2209 const LogicVRegister& ind) { 2210 return Table(vform, dst, ind, false, &tab, &tab2); 2211 } 2212 2213 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2214 const LogicVRegister& tab, 2215 const LogicVRegister& tab2, 2216 const LogicVRegister& tab3, 2217 const LogicVRegister& ind) { 2218 return Table(vform, dst, ind, false, &tab, &tab2, &tab3); 2219 } 2220 2221 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst, 2222 const LogicVRegister& tab, 2223 const LogicVRegister& tab2, 2224 const LogicVRegister& tab3, 2225 const LogicVRegister& tab4, 2226 const LogicVRegister& ind) { 2227 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4); 2228 } 2229 2230 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst, 2231 const LogicVRegister& src, int shift) { 2232 return shrn(vform, dst, src, shift).UnsignedSaturate(vform); 2233 } 2234 2235 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst, 2236 const LogicVRegister& src, int shift) { 2237 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2238 } 2239 2240 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst, 2241 const LogicVRegister& src, int shift) { 2242 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform); 2243 } 2244 2245 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst, 2246 const LogicVRegister& src, int shift) { 2247 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform); 2248 } 2249 2250 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst, 2251 const LogicVRegister& src, int shift) { 2252 SimVRegister temp; 2253 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2254 VectorFormat vformdst = vform; 2255 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2256 return sqxtn(vformdst, dst, shifted_src); 2257 } 2258 2259 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst, 2260 const LogicVRegister& src, int shift) { 2261 SimVRegister temp; 2262 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2263 VectorFormat vformdst = vform; 2264 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2265 return sqxtn(vformdst, dst, shifted_src); 2266 } 2267 2268 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst, 2269 const LogicVRegister& src, int shift) { 2270 SimVRegister temp; 2271 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2272 VectorFormat vformdst = vform; 2273 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2274 return sqxtn(vformdst, dst, shifted_src); 2275 } 2276 2277 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst, 2278 const LogicVRegister& src, int shift) { 2279 SimVRegister temp; 2280 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2281 VectorFormat vformdst = vform; 2282 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2283 return sqxtn(vformdst, dst, shifted_src); 2284 } 2285 2286 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst, 2287 const LogicVRegister& src, int shift) { 2288 SimVRegister temp; 2289 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2290 VectorFormat vformdst = vform; 2291 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2292 return sqxtun(vformdst, dst, shifted_src); 2293 } 2294 2295 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst, 2296 const LogicVRegister& src, int shift) { 2297 SimVRegister temp; 2298 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2299 VectorFormat vformdst = vform; 2300 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift); 2301 return sqxtun(vformdst, dst, shifted_src); 2302 } 2303 2304 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst, 2305 const LogicVRegister& src, int shift) { 2306 SimVRegister temp; 2307 VectorFormat vformsrc = VectorFormatDoubleWidth(vform); 2308 VectorFormat vformdst = vform; 2309 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2310 return sqxtun(vformdst, dst, shifted_src); 2311 } 2312 2313 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst, 2314 const LogicVRegister& src, int shift) { 2315 SimVRegister temp; 2316 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)); 2317 VectorFormat vformdst = vform; 2318 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc); 2319 return sqxtun(vformdst, dst, shifted_src); 2320 } 2321 2322 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst, 2323 const LogicVRegister& src1, 2324 const LogicVRegister& src2) { 2325 SimVRegister temp1, temp2; 2326 uxtl(vform, temp1, src1); 2327 uxtl(vform, temp2, src2); 2328 add(vform, dst, temp1, temp2); 2329 return dst; 2330 } 2331 2332 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst, 2333 const LogicVRegister& src1, 2334 const LogicVRegister& src2) { 2335 SimVRegister temp1, temp2; 2336 uxtl2(vform, temp1, src1); 2337 uxtl2(vform, temp2, src2); 2338 add(vform, dst, temp1, temp2); 2339 return dst; 2340 } 2341 2342 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst, 2343 const LogicVRegister& src1, 2344 const LogicVRegister& src2) { 2345 SimVRegister temp; 2346 uxtl(vform, temp, src2); 2347 add(vform, dst, src1, temp); 2348 return dst; 2349 } 2350 2351 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst, 2352 const LogicVRegister& src1, 2353 const LogicVRegister& src2) { 2354 SimVRegister temp; 2355 uxtl2(vform, temp, src2); 2356 add(vform, dst, src1, temp); 2357 return dst; 2358 } 2359 2360 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst, 2361 const LogicVRegister& src1, 2362 const LogicVRegister& src2) { 2363 SimVRegister temp1, temp2; 2364 sxtl(vform, temp1, src1); 2365 sxtl(vform, temp2, src2); 2366 add(vform, dst, temp1, temp2); 2367 return dst; 2368 } 2369 2370 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst, 2371 const LogicVRegister& src1, 2372 const LogicVRegister& src2) { 2373 SimVRegister temp1, temp2; 2374 sxtl2(vform, temp1, src1); 2375 sxtl2(vform, temp2, src2); 2376 add(vform, dst, temp1, temp2); 2377 return dst; 2378 } 2379 2380 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst, 2381 const LogicVRegister& src1, 2382 const LogicVRegister& src2) { 2383 SimVRegister temp; 2384 sxtl(vform, temp, src2); 2385 add(vform, dst, src1, temp); 2386 return dst; 2387 } 2388 2389 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst, 2390 const LogicVRegister& src1, 2391 const LogicVRegister& src2) { 2392 SimVRegister temp; 2393 sxtl2(vform, temp, src2); 2394 add(vform, dst, src1, temp); 2395 return dst; 2396 } 2397 2398 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst, 2399 const LogicVRegister& src1, 2400 const LogicVRegister& src2) { 2401 SimVRegister temp1, temp2; 2402 uxtl(vform, temp1, src1); 2403 uxtl(vform, temp2, src2); 2404 sub(vform, dst, temp1, temp2); 2405 return dst; 2406 } 2407 2408 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst, 2409 const LogicVRegister& src1, 2410 const LogicVRegister& src2) { 2411 SimVRegister temp1, temp2; 2412 uxtl2(vform, temp1, src1); 2413 uxtl2(vform, temp2, src2); 2414 sub(vform, dst, temp1, temp2); 2415 return dst; 2416 } 2417 2418 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst, 2419 const LogicVRegister& src1, 2420 const LogicVRegister& src2) { 2421 SimVRegister temp; 2422 uxtl(vform, temp, src2); 2423 sub(vform, dst, src1, temp); 2424 return dst; 2425 } 2426 2427 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst, 2428 const LogicVRegister& src1, 2429 const LogicVRegister& src2) { 2430 SimVRegister temp; 2431 uxtl2(vform, temp, src2); 2432 sub(vform, dst, src1, temp); 2433 return dst; 2434 } 2435 2436 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst, 2437 const LogicVRegister& src1, 2438 const LogicVRegister& src2) { 2439 SimVRegister temp1, temp2; 2440 sxtl(vform, temp1, src1); 2441 sxtl(vform, temp2, src2); 2442 sub(vform, dst, temp1, temp2); 2443 return dst; 2444 } 2445 2446 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst, 2447 const LogicVRegister& src1, 2448 const LogicVRegister& src2) { 2449 SimVRegister temp1, temp2; 2450 sxtl2(vform, temp1, src1); 2451 sxtl2(vform, temp2, src2); 2452 sub(vform, dst, temp1, temp2); 2453 return dst; 2454 } 2455 2456 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst, 2457 const LogicVRegister& src1, 2458 const LogicVRegister& src2) { 2459 SimVRegister temp; 2460 sxtl(vform, temp, src2); 2461 sub(vform, dst, src1, temp); 2462 return dst; 2463 } 2464 2465 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst, 2466 const LogicVRegister& src1, 2467 const LogicVRegister& src2) { 2468 SimVRegister temp; 2469 sxtl2(vform, temp, src2); 2470 sub(vform, dst, src1, temp); 2471 return dst; 2472 } 2473 2474 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst, 2475 const LogicVRegister& src1, 2476 const LogicVRegister& src2) { 2477 SimVRegister temp1, temp2; 2478 uxtl(vform, temp1, src1); 2479 uxtl(vform, temp2, src2); 2480 uaba(vform, dst, temp1, temp2); 2481 return dst; 2482 } 2483 2484 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst, 2485 const LogicVRegister& src1, 2486 const LogicVRegister& src2) { 2487 SimVRegister temp1, temp2; 2488 uxtl2(vform, temp1, src1); 2489 uxtl2(vform, temp2, src2); 2490 uaba(vform, dst, temp1, temp2); 2491 return dst; 2492 } 2493 2494 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst, 2495 const LogicVRegister& src1, 2496 const LogicVRegister& src2) { 2497 SimVRegister temp1, temp2; 2498 sxtl(vform, temp1, src1); 2499 sxtl(vform, temp2, src2); 2500 saba(vform, dst, temp1, temp2); 2501 return dst; 2502 } 2503 2504 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst, 2505 const LogicVRegister& src1, 2506 const LogicVRegister& src2) { 2507 SimVRegister temp1, temp2; 2508 sxtl2(vform, temp1, src1); 2509 sxtl2(vform, temp2, src2); 2510 saba(vform, dst, temp1, temp2); 2511 return dst; 2512 } 2513 2514 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst, 2515 const LogicVRegister& src1, 2516 const LogicVRegister& src2) { 2517 SimVRegister temp1, temp2; 2518 uxtl(vform, temp1, src1); 2519 uxtl(vform, temp2, src2); 2520 AbsDiff(vform, dst, temp1, temp2, false); 2521 return dst; 2522 } 2523 2524 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst, 2525 const LogicVRegister& src1, 2526 const LogicVRegister& src2) { 2527 SimVRegister temp1, temp2; 2528 uxtl2(vform, temp1, src1); 2529 uxtl2(vform, temp2, src2); 2530 AbsDiff(vform, dst, temp1, temp2, false); 2531 return dst; 2532 } 2533 2534 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst, 2535 const LogicVRegister& src1, 2536 const LogicVRegister& src2) { 2537 SimVRegister temp1, temp2; 2538 sxtl(vform, temp1, src1); 2539 sxtl(vform, temp2, src2); 2540 AbsDiff(vform, dst, temp1, temp2, true); 2541 return dst; 2542 } 2543 2544 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst, 2545 const LogicVRegister& src1, 2546 const LogicVRegister& src2) { 2547 SimVRegister temp1, temp2; 2548 sxtl2(vform, temp1, src1); 2549 sxtl2(vform, temp2, src2); 2550 AbsDiff(vform, dst, temp1, temp2, true); 2551 return dst; 2552 } 2553 2554 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst, 2555 const LogicVRegister& src1, 2556 const LogicVRegister& src2) { 2557 SimVRegister temp1, temp2; 2558 uxtl(vform, temp1, src1); 2559 uxtl(vform, temp2, src2); 2560 mul(vform, dst, temp1, temp2); 2561 return dst; 2562 } 2563 2564 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst, 2565 const LogicVRegister& src1, 2566 const LogicVRegister& src2) { 2567 SimVRegister temp1, temp2; 2568 uxtl2(vform, temp1, src1); 2569 uxtl2(vform, temp2, src2); 2570 mul(vform, dst, temp1, temp2); 2571 return dst; 2572 } 2573 2574 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst, 2575 const LogicVRegister& src1, 2576 const LogicVRegister& src2) { 2577 SimVRegister temp1, temp2; 2578 sxtl(vform, temp1, src1); 2579 sxtl(vform, temp2, src2); 2580 mul(vform, dst, temp1, temp2); 2581 return dst; 2582 } 2583 2584 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst, 2585 const LogicVRegister& src1, 2586 const LogicVRegister& src2) { 2587 SimVRegister temp1, temp2; 2588 sxtl2(vform, temp1, src1); 2589 sxtl2(vform, temp2, src2); 2590 mul(vform, dst, temp1, temp2); 2591 return dst; 2592 } 2593 2594 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst, 2595 const LogicVRegister& src1, 2596 const LogicVRegister& src2) { 2597 SimVRegister temp1, temp2; 2598 uxtl(vform, temp1, src1); 2599 uxtl(vform, temp2, src2); 2600 mls(vform, dst, temp1, temp2); 2601 return dst; 2602 } 2603 2604 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst, 2605 const LogicVRegister& src1, 2606 const LogicVRegister& src2) { 2607 SimVRegister temp1, temp2; 2608 uxtl2(vform, temp1, src1); 2609 uxtl2(vform, temp2, src2); 2610 mls(vform, dst, temp1, temp2); 2611 return dst; 2612 } 2613 2614 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst, 2615 const LogicVRegister& src1, 2616 const LogicVRegister& src2) { 2617 SimVRegister temp1, temp2; 2618 sxtl(vform, temp1, src1); 2619 sxtl(vform, temp2, src2); 2620 mls(vform, dst, temp1, temp2); 2621 return dst; 2622 } 2623 2624 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst, 2625 const LogicVRegister& src1, 2626 const LogicVRegister& src2) { 2627 SimVRegister temp1, temp2; 2628 sxtl2(vform, temp1, src1); 2629 sxtl2(vform, temp2, src2); 2630 mls(vform, dst, temp1, temp2); 2631 return dst; 2632 } 2633 2634 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst, 2635 const LogicVRegister& src1, 2636 const LogicVRegister& src2) { 2637 SimVRegister temp1, temp2; 2638 uxtl(vform, temp1, src1); 2639 uxtl(vform, temp2, src2); 2640 mla(vform, dst, temp1, temp2); 2641 return dst; 2642 } 2643 2644 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst, 2645 const LogicVRegister& src1, 2646 const LogicVRegister& src2) { 2647 SimVRegister temp1, temp2; 2648 uxtl2(vform, temp1, src1); 2649 uxtl2(vform, temp2, src2); 2650 mla(vform, dst, temp1, temp2); 2651 return dst; 2652 } 2653 2654 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst, 2655 const LogicVRegister& src1, 2656 const LogicVRegister& src2) { 2657 SimVRegister temp1, temp2; 2658 sxtl(vform, temp1, src1); 2659 sxtl(vform, temp2, src2); 2660 mla(vform, dst, temp1, temp2); 2661 return dst; 2662 } 2663 2664 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst, 2665 const LogicVRegister& src1, 2666 const LogicVRegister& src2) { 2667 SimVRegister temp1, temp2; 2668 sxtl2(vform, temp1, src1); 2669 sxtl2(vform, temp2, src2); 2670 mla(vform, dst, temp1, temp2); 2671 return dst; 2672 } 2673 2674 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst, 2675 const LogicVRegister& src1, 2676 const LogicVRegister& src2) { 2677 SimVRegister temp; 2678 LogicVRegister product = sqdmull(vform, temp, src1, src2); 2679 return add(vform, dst, dst, product).SignedSaturate(vform); 2680 } 2681 2682 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst, 2683 const LogicVRegister& src1, 2684 const LogicVRegister& src2) { 2685 SimVRegister temp; 2686 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 2687 return add(vform, dst, dst, product).SignedSaturate(vform); 2688 } 2689 2690 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst, 2691 const LogicVRegister& src1, 2692 const LogicVRegister& src2) { 2693 SimVRegister temp; 2694 LogicVRegister product = sqdmull(vform, temp, src1, src2); 2695 return sub(vform, dst, dst, product).SignedSaturate(vform); 2696 } 2697 2698 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst, 2699 const LogicVRegister& src1, 2700 const LogicVRegister& src2) { 2701 SimVRegister temp; 2702 LogicVRegister product = sqdmull2(vform, temp, src1, src2); 2703 return sub(vform, dst, dst, product).SignedSaturate(vform); 2704 } 2705 2706 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst, 2707 const LogicVRegister& src1, 2708 const LogicVRegister& src2) { 2709 SimVRegister temp; 2710 LogicVRegister product = smull(vform, temp, src1, src2); 2711 return add(vform, dst, product, product).SignedSaturate(vform); 2712 } 2713 2714 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst, 2715 const LogicVRegister& src1, 2716 const LogicVRegister& src2) { 2717 SimVRegister temp; 2718 LogicVRegister product = smull2(vform, temp, src1, src2); 2719 return add(vform, dst, product, product).SignedSaturate(vform); 2720 } 2721 2722 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst, 2723 const LogicVRegister& src1, 2724 const LogicVRegister& src2, bool round) { 2725 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow. 2726 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1) 2727 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize. 2728 2729 int esize = LaneSizeInBitsFromFormat(vform); 2730 int round_const = round ? (1 << (esize - 2)) : 0; 2731 int64_t product; 2732 2733 dst.ClearForWrite(vform); 2734 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 2735 product = src1.Int(vform, i) * src2.Int(vform, i); 2736 product += round_const; 2737 product = product >> (esize - 1); 2738 2739 if (product > MaxIntFromFormat(vform)) { 2740 product = MaxIntFromFormat(vform); 2741 } else if (product < MinIntFromFormat(vform)) { 2742 product = MinIntFromFormat(vform); 2743 } 2744 dst.SetInt(vform, i, product); 2745 } 2746 return dst; 2747 } 2748 2749 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst, 2750 const LogicVRegister& src1, 2751 const LogicVRegister& src2) { 2752 return sqrdmulh(vform, dst, src1, src2, false); 2753 } 2754 2755 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst, 2756 const LogicVRegister& src1, 2757 const LogicVRegister& src2) { 2758 SimVRegister temp; 2759 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 2760 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2761 return dst; 2762 } 2763 2764 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst, 2765 const LogicVRegister& src1, 2766 const LogicVRegister& src2) { 2767 SimVRegister temp; 2768 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2769 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2770 return dst; 2771 } 2772 2773 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst, 2774 const LogicVRegister& src1, 2775 const LogicVRegister& src2) { 2776 SimVRegister temp; 2777 add(VectorFormatDoubleWidth(vform), temp, src1, src2); 2778 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2779 return dst; 2780 } 2781 2782 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst, 2783 const LogicVRegister& src1, 2784 const LogicVRegister& src2) { 2785 SimVRegister temp; 2786 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2787 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2788 return dst; 2789 } 2790 2791 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst, 2792 const LogicVRegister& src1, 2793 const LogicVRegister& src2) { 2794 SimVRegister temp; 2795 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 2796 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2797 return dst; 2798 } 2799 2800 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst, 2801 const LogicVRegister& src1, 2802 const LogicVRegister& src2) { 2803 SimVRegister temp; 2804 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2805 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2806 return dst; 2807 } 2808 2809 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst, 2810 const LogicVRegister& src1, 2811 const LogicVRegister& src2) { 2812 SimVRegister temp; 2813 sub(VectorFormatDoubleWidth(vform), temp, src1, src2); 2814 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2815 return dst; 2816 } 2817 2818 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst, 2819 const LogicVRegister& src1, 2820 const LogicVRegister& src2) { 2821 SimVRegister temp; 2822 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2); 2823 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform)); 2824 return dst; 2825 } 2826 2827 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst, 2828 const LogicVRegister& src1, 2829 const LogicVRegister& src2) { 2830 uint64_t result[16]; 2831 int laneCount = LaneCountFromFormat(vform); 2832 int pairs = laneCount / 2; 2833 for (int i = 0; i < pairs; ++i) { 2834 result[2 * i] = src1.Uint(vform, 2 * i); 2835 result[(2 * i) + 1] = src2.Uint(vform, 2 * i); 2836 } 2837 2838 dst.SetUintArray(vform, result); 2839 return dst; 2840 } 2841 2842 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst, 2843 const LogicVRegister& src1, 2844 const LogicVRegister& src2) { 2845 uint64_t result[16]; 2846 int laneCount = LaneCountFromFormat(vform); 2847 int pairs = laneCount / 2; 2848 for (int i = 0; i < pairs; ++i) { 2849 result[2 * i] = src1.Uint(vform, (2 * i) + 1); 2850 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1); 2851 } 2852 2853 dst.SetUintArray(vform, result); 2854 return dst; 2855 } 2856 2857 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst, 2858 const LogicVRegister& src1, 2859 const LogicVRegister& src2) { 2860 uint64_t result[16]; 2861 int laneCount = LaneCountFromFormat(vform); 2862 int pairs = laneCount / 2; 2863 for (int i = 0; i < pairs; ++i) { 2864 result[2 * i] = src1.Uint(vform, i); 2865 result[(2 * i) + 1] = src2.Uint(vform, i); 2866 } 2867 2868 dst.SetUintArray(vform, result); 2869 return dst; 2870 } 2871 2872 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst, 2873 const LogicVRegister& src1, 2874 const LogicVRegister& src2) { 2875 uint64_t result[16]; 2876 int laneCount = LaneCountFromFormat(vform); 2877 int pairs = laneCount / 2; 2878 for (int i = 0; i < pairs; ++i) { 2879 result[2 * i] = src1.Uint(vform, pairs + i); 2880 result[(2 * i) + 1] = src2.Uint(vform, pairs + i); 2881 } 2882 2883 dst.SetUintArray(vform, result); 2884 return dst; 2885 } 2886 2887 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst, 2888 const LogicVRegister& src1, 2889 const LogicVRegister& src2) { 2890 uint64_t result[32]; 2891 int laneCount = LaneCountFromFormat(vform); 2892 for (int i = 0; i < laneCount; ++i) { 2893 result[i] = src1.Uint(vform, i); 2894 result[laneCount + i] = src2.Uint(vform, i); 2895 } 2896 2897 dst.ClearForWrite(vform); 2898 for (int i = 0; i < laneCount; ++i) { 2899 dst.SetUint(vform, i, result[2 * i]); 2900 } 2901 return dst; 2902 } 2903 2904 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst, 2905 const LogicVRegister& src1, 2906 const LogicVRegister& src2) { 2907 uint64_t result[32]; 2908 int laneCount = LaneCountFromFormat(vform); 2909 for (int i = 0; i < laneCount; ++i) { 2910 result[i] = src1.Uint(vform, i); 2911 result[laneCount + i] = src2.Uint(vform, i); 2912 } 2913 2914 dst.ClearForWrite(vform); 2915 for (int i = 0; i < laneCount; ++i) { 2916 dst.SetUint(vform, i, result[(2 * i) + 1]); 2917 } 2918 return dst; 2919 } 2920 2921 template <typename T> 2922 T Simulator::FPAdd(T op1, T op2) { 2923 T result = FPProcessNaNs(op1, op2); 2924 if (std::isnan(result)) return result; 2925 2926 if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) { 2927 // inf + -inf returns the default NaN. 2928 FPProcessException(); 2929 return FPDefaultNaN<T>(); 2930 } else { 2931 // Other cases should be handled by standard arithmetic. 2932 return op1 + op2; 2933 } 2934 } 2935 2936 template <typename T> 2937 T Simulator::FPSub(T op1, T op2) { 2938 // NaNs should be handled elsewhere. 2939 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 2940 2941 if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) { 2942 // inf - inf returns the default NaN. 2943 FPProcessException(); 2944 return FPDefaultNaN<T>(); 2945 } else { 2946 // Other cases should be handled by standard arithmetic. 2947 return op1 - op2; 2948 } 2949 } 2950 2951 template <typename T> 2952 T Simulator::FPMul(T op1, T op2) { 2953 // NaNs should be handled elsewhere. 2954 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 2955 2956 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 2957 // inf * 0.0 returns the default NaN. 2958 FPProcessException(); 2959 return FPDefaultNaN<T>(); 2960 } else { 2961 // Other cases should be handled by standard arithmetic. 2962 return op1 * op2; 2963 } 2964 } 2965 2966 template <typename T> 2967 T Simulator::FPMulx(T op1, T op2) { 2968 if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) { 2969 // inf * 0.0 returns +/-2.0. 2970 T two = 2.0; 2971 return copysign(1.0, op1) * copysign(1.0, op2) * two; 2972 } 2973 return FPMul(op1, op2); 2974 } 2975 2976 template <typename T> 2977 T Simulator::FPMulAdd(T a, T op1, T op2) { 2978 T result = FPProcessNaNs3(a, op1, op2); 2979 2980 T sign_a = copysign(1.0, a); 2981 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2); 2982 bool isinf_prod = std::isinf(op1) || std::isinf(op2); 2983 bool operation_generates_nan = 2984 (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0 2985 (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf 2986 (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf 2987 2988 if (std::isnan(result)) { 2989 // Generated NaNs override quiet NaNs propagated from a. 2990 if (operation_generates_nan && IsQuietNaN(a)) { 2991 FPProcessException(); 2992 return FPDefaultNaN<T>(); 2993 } else { 2994 return result; 2995 } 2996 } 2997 2998 // If the operation would produce a NaN, return the default NaN. 2999 if (operation_generates_nan) { 3000 FPProcessException(); 3001 return FPDefaultNaN<T>(); 3002 } 3003 3004 // Work around broken fma implementations for exact zero results: The sign of 3005 // exact 0.0 results is positive unless both a and op1 * op2 are negative. 3006 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) { 3007 return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0; 3008 } 3009 3010 result = FusedMultiplyAdd(op1, op2, a); 3011 DCHECK(!std::isnan(result)); 3012 3013 // Work around broken fma implementations for rounded zero results: If a is 3014 // 0.0, the sign of the result is the sign of op1 * op2 before rounding. 3015 if ((a == 0.0) && (result == 0.0)) { 3016 return copysign(0.0, sign_prod); 3017 } 3018 3019 return result; 3020 } 3021 3022 template <typename T> 3023 T Simulator::FPDiv(T op1, T op2) { 3024 // NaNs should be handled elsewhere. 3025 DCHECK(!std::isnan(op1) && !std::isnan(op2)); 3026 3027 if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) { 3028 // inf / inf and 0.0 / 0.0 return the default NaN. 3029 FPProcessException(); 3030 return FPDefaultNaN<T>(); 3031 } else { 3032 if (op2 == 0.0) { 3033 FPProcessException(); 3034 if (!std::isnan(op1)) { 3035 double op1_sign = copysign(1.0, op1); 3036 double op2_sign = copysign(1.0, op2); 3037 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity); 3038 } 3039 } 3040 3041 // Other cases should be handled by standard arithmetic. 3042 return op1 / op2; 3043 } 3044 } 3045 3046 template <typename T> 3047 T Simulator::FPSqrt(T op) { 3048 if (std::isnan(op)) { 3049 return FPProcessNaN(op); 3050 } else if (op < 0.0) { 3051 FPProcessException(); 3052 return FPDefaultNaN<T>(); 3053 } else { 3054 return sqrt(op); 3055 } 3056 } 3057 3058 template <typename T> 3059 T Simulator::FPMax(T a, T b) { 3060 T result = FPProcessNaNs(a, b); 3061 if (std::isnan(result)) return result; 3062 3063 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3064 // a and b are zero, and the sign differs: return +0.0. 3065 return 0.0; 3066 } else { 3067 return (a > b) ? a : b; 3068 } 3069 } 3070 3071 template <typename T> 3072 T Simulator::FPMaxNM(T a, T b) { 3073 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3074 a = kFP64NegativeInfinity; 3075 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3076 b = kFP64NegativeInfinity; 3077 } 3078 3079 T result = FPProcessNaNs(a, b); 3080 return std::isnan(result) ? result : FPMax(a, b); 3081 } 3082 3083 template <typename T> 3084 T Simulator::FPMin(T a, T b) { 3085 T result = FPProcessNaNs(a, b); 3086 if (std::isnan(result)) return result; 3087 3088 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) { 3089 // a and b are zero, and the sign differs: return -0.0. 3090 return -0.0; 3091 } else { 3092 return (a < b) ? a : b; 3093 } 3094 } 3095 3096 template <typename T> 3097 T Simulator::FPMinNM(T a, T b) { 3098 if (IsQuietNaN(a) && !IsQuietNaN(b)) { 3099 a = kFP64PositiveInfinity; 3100 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) { 3101 b = kFP64PositiveInfinity; 3102 } 3103 3104 T result = FPProcessNaNs(a, b); 3105 return std::isnan(result) ? result : FPMin(a, b); 3106 } 3107 3108 template <typename T> 3109 T Simulator::FPRecipStepFused(T op1, T op2) { 3110 const T two = 2.0; 3111 if ((std::isinf(op1) && (op2 == 0.0)) || 3112 ((op1 == 0.0) && (std::isinf(op2)))) { 3113 return two; 3114 } else if (std::isinf(op1) || std::isinf(op2)) { 3115 // Return +inf if signs match, otherwise -inf. 3116 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3117 : kFP64NegativeInfinity; 3118 } else { 3119 return FusedMultiplyAdd(op1, op2, two); 3120 } 3121 } 3122 3123 template <typename T> 3124 T Simulator::FPRSqrtStepFused(T op1, T op2) { 3125 const T one_point_five = 1.5; 3126 const T two = 2.0; 3127 3128 if ((std::isinf(op1) && (op2 == 0.0)) || 3129 ((op1 == 0.0) && (std::isinf(op2)))) { 3130 return one_point_five; 3131 } else if (std::isinf(op1) || std::isinf(op2)) { 3132 // Return +inf if signs match, otherwise -inf. 3133 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity 3134 : kFP64NegativeInfinity; 3135 } else { 3136 // The multiply-add-halve operation must be fully fused, so avoid interim 3137 // rounding by checking which operand can be losslessly divided by two 3138 // before doing the multiply-add. 3139 if (std::isnormal(op1 / two)) { 3140 return FusedMultiplyAdd(op1 / two, op2, one_point_five); 3141 } else if (std::isnormal(op2 / two)) { 3142 return FusedMultiplyAdd(op1, op2 / two, one_point_five); 3143 } else { 3144 // Neither operand is normal after halving: the result is dominated by 3145 // the addition term, so just return that. 3146 return one_point_five; 3147 } 3148 } 3149 } 3150 3151 double Simulator::FPRoundInt(double value, FPRounding round_mode) { 3152 if ((value == 0.0) || (value == kFP64PositiveInfinity) || 3153 (value == kFP64NegativeInfinity)) { 3154 return value; 3155 } else if (std::isnan(value)) { 3156 return FPProcessNaN(value); 3157 } 3158 3159 double int_result = std::floor(value); 3160 double error = value - int_result; 3161 switch (round_mode) { 3162 case FPTieAway: { 3163 // Take care of correctly handling the range ]-0.5, -0.0], which must 3164 // yield -0.0. 3165 if ((-0.5 < value) && (value < 0.0)) { 3166 int_result = -0.0; 3167 3168 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) { 3169 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3170 // result is positive, round up. 3171 int_result++; 3172 } 3173 break; 3174 } 3175 case FPTieEven: { 3176 // Take care of correctly handling the range [-0.5, -0.0], which must 3177 // yield -0.0. 3178 if ((-0.5 <= value) && (value < 0.0)) { 3179 int_result = -0.0; 3180 3181 // If the error is greater than 0.5, or is equal to 0.5 and the integer 3182 // result is odd, round up. 3183 } else if ((error > 0.5) || 3184 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) { 3185 int_result++; 3186 } 3187 break; 3188 } 3189 case FPZero: { 3190 // If value>0 then we take floor(value) 3191 // otherwise, ceil(value). 3192 if (value < 0) { 3193 int_result = ceil(value); 3194 } 3195 break; 3196 } 3197 case FPNegativeInfinity: { 3198 // We always use floor(value). 3199 break; 3200 } 3201 case FPPositiveInfinity: { 3202 // Take care of correctly handling the range ]-1.0, -0.0], which must 3203 // yield -0.0. 3204 if ((-1.0 < value) && (value < 0.0)) { 3205 int_result = -0.0; 3206 3207 // If the error is non-zero, round up. 3208 } else if (error > 0.0) { 3209 int_result++; 3210 } 3211 break; 3212 } 3213 default: 3214 UNIMPLEMENTED(); 3215 } 3216 return int_result; 3217 } 3218 3219 int32_t Simulator::FPToInt32(double value, FPRounding rmode) { 3220 value = FPRoundInt(value, rmode); 3221 if (value >= kWMaxInt) { 3222 return kWMaxInt; 3223 } else if (value < kWMinInt) { 3224 return kWMinInt; 3225 } 3226 return std::isnan(value) ? 0 : static_cast<int32_t>(value); 3227 } 3228 3229 int64_t Simulator::FPToInt64(double value, FPRounding rmode) { 3230 value = FPRoundInt(value, rmode); 3231 if (value >= kXMaxInt) { 3232 return kXMaxInt; 3233 } else if (value < kXMinInt) { 3234 return kXMinInt; 3235 } 3236 return std::isnan(value) ? 0 : static_cast<int64_t>(value); 3237 } 3238 3239 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) { 3240 value = FPRoundInt(value, rmode); 3241 if (value >= kWMaxUInt) { 3242 return kWMaxUInt; 3243 } else if (value < 0.0) { 3244 return 0; 3245 } 3246 return std::isnan(value) ? 0 : static_cast<uint32_t>(value); 3247 } 3248 3249 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) { 3250 value = FPRoundInt(value, rmode); 3251 if (value >= kXMaxUInt) { 3252 return kXMaxUInt; 3253 } else if (value < 0.0) { 3254 return 0; 3255 } 3256 return std::isnan(value) ? 0 : static_cast<uint64_t>(value); 3257 } 3258 3259 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \ 3260 template <typename T> \ 3261 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ 3262 const LogicVRegister& src1, \ 3263 const LogicVRegister& src2) { \ 3264 dst.ClearForWrite(vform); \ 3265 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \ 3266 T op1 = src1.Float<T>(i); \ 3267 T op2 = src2.Float<T>(i); \ 3268 T result; \ 3269 if (PROCNAN) { \ 3270 result = FPProcessNaNs(op1, op2); \ 3271 if (!std::isnan(result)) { \ 3272 result = OP(op1, op2); \ 3273 } \ 3274 } else { \ 3275 result = OP(op1, op2); \ 3276 } \ 3277 dst.SetFloat(i, result); \ 3278 } \ 3279 return dst; \ 3280 } \ 3281 \ 3282 LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \ 3283 const LogicVRegister& src1, \ 3284 const LogicVRegister& src2) { \ 3285 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \ 3286 FN<float>(vform, dst, src1, src2); \ 3287 } else { \ 3288 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \ 3289 FN<double>(vform, dst, src1, src2); \ 3290 } \ 3291 return dst; \ 3292 } 3293 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP) 3294 #undef DEFINE_NEON_FP_VECTOR_OP 3295 3296 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst, 3297 const LogicVRegister& src1, 3298 const LogicVRegister& src2) { 3299 SimVRegister temp; 3300 LogicVRegister product = fmul(vform, temp, src1, src2); 3301 return fneg(vform, dst, product); 3302 } 3303 3304 template <typename T> 3305 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, 3306 const LogicVRegister& src1, 3307 const LogicVRegister& src2) { 3308 dst.ClearForWrite(vform); 3309 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3310 T op1 = -src1.Float<T>(i); 3311 T op2 = src2.Float<T>(i); 3312 T result = FPProcessNaNs(op1, op2); 3313 dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2)); 3314 } 3315 return dst; 3316 } 3317 3318 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst, 3319 const LogicVRegister& src1, 3320 const LogicVRegister& src2) { 3321 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3322 frecps<float>(vform, dst, src1, src2); 3323 } else { 3324 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3325 frecps<double>(vform, dst, src1, src2); 3326 } 3327 return dst; 3328 } 3329 3330 template <typename T> 3331 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, 3332 const LogicVRegister& src1, 3333 const LogicVRegister& src2) { 3334 dst.ClearForWrite(vform); 3335 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3336 T op1 = -src1.Float<T>(i); 3337 T op2 = src2.Float<T>(i); 3338 T result = FPProcessNaNs(op1, op2); 3339 dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2)); 3340 } 3341 return dst; 3342 } 3343 3344 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst, 3345 const LogicVRegister& src1, 3346 const LogicVRegister& src2) { 3347 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3348 frsqrts<float>(vform, dst, src1, src2); 3349 } else { 3350 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3351 frsqrts<double>(vform, dst, src1, src2); 3352 } 3353 return dst; 3354 } 3355 3356 template <typename T> 3357 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, 3358 const LogicVRegister& src1, 3359 const LogicVRegister& src2, Condition cond) { 3360 dst.ClearForWrite(vform); 3361 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3362 bool result = false; 3363 T op1 = src1.Float<T>(i); 3364 T op2 = src2.Float<T>(i); 3365 T nan_result = FPProcessNaNs(op1, op2); 3366 if (!std::isnan(nan_result)) { 3367 switch (cond) { 3368 case eq: 3369 result = (op1 == op2); 3370 break; 3371 case ge: 3372 result = (op1 >= op2); 3373 break; 3374 case gt: 3375 result = (op1 > op2); 3376 break; 3377 case le: 3378 result = (op1 <= op2); 3379 break; 3380 case lt: 3381 result = (op1 < op2); 3382 break; 3383 default: 3384 UNREACHABLE(); 3385 } 3386 } 3387 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0); 3388 } 3389 return dst; 3390 } 3391 3392 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst, 3393 const LogicVRegister& src1, 3394 const LogicVRegister& src2, Condition cond) { 3395 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3396 fcmp<float>(vform, dst, src1, src2, cond); 3397 } else { 3398 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3399 fcmp<double>(vform, dst, src1, src2, cond); 3400 } 3401 return dst; 3402 } 3403 3404 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst, 3405 const LogicVRegister& src, Condition cond) { 3406 SimVRegister temp; 3407 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3408 LogicVRegister zero_reg = 3409 dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f)); 3410 fcmp<float>(vform, dst, src, zero_reg, cond); 3411 } else { 3412 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3413 LogicVRegister zero_reg = 3414 dup_immediate(vform, temp, bit_cast<uint64_t>(0.0)); 3415 fcmp<double>(vform, dst, src, zero_reg, cond); 3416 } 3417 return dst; 3418 } 3419 3420 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst, 3421 const LogicVRegister& src1, 3422 const LogicVRegister& src2, Condition cond) { 3423 SimVRegister temp1, temp2; 3424 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3425 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1); 3426 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2); 3427 fcmp<float>(vform, dst, abs_src1, abs_src2, cond); 3428 } else { 3429 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3430 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1); 3431 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2); 3432 fcmp<double>(vform, dst, abs_src1, abs_src2, cond); 3433 } 3434 return dst; 3435 } 3436 3437 template <typename T> 3438 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3439 const LogicVRegister& src1, 3440 const LogicVRegister& src2) { 3441 dst.ClearForWrite(vform); 3442 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3443 T op1 = src1.Float<T>(i); 3444 T op2 = src2.Float<T>(i); 3445 T acc = dst.Float<T>(i); 3446 T result = FPMulAdd(acc, op1, op2); 3447 dst.SetFloat(i, result); 3448 } 3449 return dst; 3450 } 3451 3452 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3453 const LogicVRegister& src1, 3454 const LogicVRegister& src2) { 3455 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3456 fmla<float>(vform, dst, src1, src2); 3457 } else { 3458 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3459 fmla<double>(vform, dst, src1, src2); 3460 } 3461 return dst; 3462 } 3463 3464 template <typename T> 3465 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3466 const LogicVRegister& src1, 3467 const LogicVRegister& src2) { 3468 dst.ClearForWrite(vform); 3469 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3470 T op1 = -src1.Float<T>(i); 3471 T op2 = src2.Float<T>(i); 3472 T acc = dst.Float<T>(i); 3473 T result = FPMulAdd(acc, op1, op2); 3474 dst.SetFloat(i, result); 3475 } 3476 return dst; 3477 } 3478 3479 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3480 const LogicVRegister& src1, 3481 const LogicVRegister& src2) { 3482 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3483 fmls<float>(vform, dst, src1, src2); 3484 } else { 3485 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3486 fmls<double>(vform, dst, src1, src2); 3487 } 3488 return dst; 3489 } 3490 3491 template <typename T> 3492 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, 3493 const LogicVRegister& src) { 3494 dst.ClearForWrite(vform); 3495 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3496 T op = src.Float<T>(i); 3497 op = -op; 3498 dst.SetFloat(i, op); 3499 } 3500 return dst; 3501 } 3502 3503 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst, 3504 const LogicVRegister& src) { 3505 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3506 fneg<float>(vform, dst, src); 3507 } else { 3508 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3509 fneg<double>(vform, dst, src); 3510 } 3511 return dst; 3512 } 3513 3514 template <typename T> 3515 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, 3516 const LogicVRegister& src) { 3517 dst.ClearForWrite(vform); 3518 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3519 T op = src.Float<T>(i); 3520 if (copysign(1.0, op) < 0.0) { 3521 op = -op; 3522 } 3523 dst.SetFloat(i, op); 3524 } 3525 return dst; 3526 } 3527 3528 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst, 3529 const LogicVRegister& src) { 3530 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3531 fabs_<float>(vform, dst, src); 3532 } else { 3533 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3534 fabs_<double>(vform, dst, src); 3535 } 3536 return dst; 3537 } 3538 3539 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst, 3540 const LogicVRegister& src1, 3541 const LogicVRegister& src2) { 3542 SimVRegister temp; 3543 fsub(vform, temp, src1, src2); 3544 fabs_(vform, dst, temp); 3545 return dst; 3546 } 3547 3548 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst, 3549 const LogicVRegister& src) { 3550 dst.ClearForWrite(vform); 3551 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3552 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3553 float result = FPSqrt(src.Float<float>(i)); 3554 dst.SetFloat(i, result); 3555 } 3556 } else { 3557 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3558 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3559 double result = FPSqrt(src.Float<double>(i)); 3560 dst.SetFloat(i, result); 3561 } 3562 } 3563 return dst; 3564 } 3565 3566 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \ 3567 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ 3568 const LogicVRegister& src1, \ 3569 const LogicVRegister& src2) { \ 3570 SimVRegister temp1, temp2; \ 3571 uzp1(vform, temp1, src1, src2); \ 3572 uzp2(vform, temp2, src1, src2); \ 3573 FN(vform, dst, temp1, temp2); \ 3574 return dst; \ 3575 } \ 3576 \ 3577 LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \ 3578 const LogicVRegister& src) { \ 3579 if (vform == kFormatS) { \ 3580 float result = OP(src.Float<float>(0), src.Float<float>(1)); \ 3581 dst.SetFloat(0, result); \ 3582 } else { \ 3583 DCHECK_EQ(vform, kFormatD); \ 3584 double result = OP(src.Float<double>(0), src.Float<double>(1)); \ 3585 dst.SetFloat(0, result); \ 3586 } \ 3587 dst.ClearForWrite(vform); \ 3588 return dst; \ 3589 } 3590 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP) 3591 #undef DEFINE_NEON_FP_PAIR_OP 3592 3593 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst, 3594 const LogicVRegister& src, FPMinMaxOp Op) { 3595 DCHECK_EQ(vform, kFormat4S); 3596 USE(vform); 3597 float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1)); 3598 float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3)); 3599 float result = (this->*Op)(result1, result2); 3600 dst.ClearForWrite(kFormatS); 3601 dst.SetFloat<float>(0, result); 3602 return dst; 3603 } 3604 3605 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst, 3606 const LogicVRegister& src) { 3607 return FMinMaxV(vform, dst, src, &Simulator::FPMax); 3608 } 3609 3610 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst, 3611 const LogicVRegister& src) { 3612 return FMinMaxV(vform, dst, src, &Simulator::FPMin); 3613 } 3614 3615 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst, 3616 const LogicVRegister& src) { 3617 return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM); 3618 } 3619 3620 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst, 3621 const LogicVRegister& src) { 3622 return FMinMaxV(vform, dst, src, &Simulator::FPMinNM); 3623 } 3624 3625 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst, 3626 const LogicVRegister& src1, 3627 const LogicVRegister& src2, int index) { 3628 dst.ClearForWrite(vform); 3629 SimVRegister temp; 3630 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3631 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3632 fmul<float>(vform, dst, src1, index_reg); 3633 } else { 3634 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3635 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3636 fmul<double>(vform, dst, src1, index_reg); 3637 } 3638 return dst; 3639 } 3640 3641 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst, 3642 const LogicVRegister& src1, 3643 const LogicVRegister& src2, int index) { 3644 dst.ClearForWrite(vform); 3645 SimVRegister temp; 3646 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3647 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3648 fmla<float>(vform, dst, src1, index_reg); 3649 } else { 3650 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3651 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3652 fmla<double>(vform, dst, src1, index_reg); 3653 } 3654 return dst; 3655 } 3656 3657 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst, 3658 const LogicVRegister& src1, 3659 const LogicVRegister& src2, int index) { 3660 dst.ClearForWrite(vform); 3661 SimVRegister temp; 3662 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3663 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3664 fmls<float>(vform, dst, src1, index_reg); 3665 } else { 3666 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3667 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3668 fmls<double>(vform, dst, src1, index_reg); 3669 } 3670 return dst; 3671 } 3672 3673 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst, 3674 const LogicVRegister& src1, 3675 const LogicVRegister& src2, int index) { 3676 dst.ClearForWrite(vform); 3677 SimVRegister temp; 3678 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3679 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index); 3680 fmulx<float>(vform, dst, src1, index_reg); 3681 3682 } else { 3683 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3684 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index); 3685 fmulx<double>(vform, dst, src1, index_reg); 3686 } 3687 return dst; 3688 } 3689 3690 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst, 3691 const LogicVRegister& src, 3692 FPRounding rounding_mode, 3693 bool inexact_exception) { 3694 dst.ClearForWrite(vform); 3695 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3696 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3697 float input = src.Float<float>(i); 3698 float rounded = FPRoundInt(input, rounding_mode); 3699 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 3700 FPProcessException(); 3701 } 3702 dst.SetFloat<float>(i, rounded); 3703 } 3704 } else { 3705 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3706 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3707 double input = src.Float<double>(i); 3708 double rounded = FPRoundInt(input, rounding_mode); 3709 if (inexact_exception && !std::isnan(input) && (input != rounded)) { 3710 FPProcessException(); 3711 } 3712 dst.SetFloat<double>(i, rounded); 3713 } 3714 } 3715 return dst; 3716 } 3717 3718 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst, 3719 const LogicVRegister& src, 3720 FPRounding rounding_mode, int fbits) { 3721 dst.ClearForWrite(vform); 3722 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3723 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3724 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 3725 dst.SetInt(vform, i, FPToInt32(op, rounding_mode)); 3726 } 3727 } else { 3728 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3729 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3730 double op = src.Float<double>(i) * std::pow(2.0, fbits); 3731 dst.SetInt(vform, i, FPToInt64(op, rounding_mode)); 3732 } 3733 } 3734 return dst; 3735 } 3736 3737 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst, 3738 const LogicVRegister& src, 3739 FPRounding rounding_mode, int fbits) { 3740 dst.ClearForWrite(vform); 3741 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3742 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3743 float op = src.Float<float>(i) * std::pow(2.0f, fbits); 3744 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode)); 3745 } 3746 } else { 3747 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3748 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3749 double op = src.Float<double>(i) * std::pow(2.0, fbits); 3750 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode)); 3751 } 3752 } 3753 return dst; 3754 } 3755 3756 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst, 3757 const LogicVRegister& src) { 3758 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3759 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 3760 dst.SetFloat(i, FPToFloat(src.Float<float16>(i))); 3761 } 3762 } else { 3763 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3764 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) { 3765 dst.SetFloat(i, FPToDouble(src.Float<float>(i))); 3766 } 3767 } 3768 return dst; 3769 } 3770 3771 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst, 3772 const LogicVRegister& src) { 3773 int lane_count = LaneCountFromFormat(vform); 3774 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3775 for (int i = 0; i < lane_count; i++) { 3776 dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count))); 3777 } 3778 } else { 3779 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3780 for (int i = 0; i < lane_count; i++) { 3781 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count))); 3782 } 3783 } 3784 return dst; 3785 } 3786 3787 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst, 3788 const LogicVRegister& src) { 3789 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { 3790 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3791 dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven)); 3792 } 3793 } else { 3794 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3795 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3796 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven)); 3797 } 3798 } 3799 return dst; 3800 } 3801 3802 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst, 3803 const LogicVRegister& src) { 3804 int lane_count = LaneCountFromFormat(vform) / 2; 3805 if (LaneSizeInBytesFromFormat(vform) == kHRegSize) { 3806 for (int i = lane_count - 1; i >= 0; i--) { 3807 dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven)); 3808 } 3809 } else { 3810 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3811 for (int i = lane_count - 1; i >= 0; i--) { 3812 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven)); 3813 } 3814 } 3815 return dst; 3816 } 3817 3818 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst, 3819 const LogicVRegister& src) { 3820 dst.ClearForWrite(vform); 3821 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3822 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3823 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd)); 3824 } 3825 return dst; 3826 } 3827 3828 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst, 3829 const LogicVRegister& src) { 3830 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize); 3831 int lane_count = LaneCountFromFormat(vform) / 2; 3832 for (int i = lane_count - 1; i >= 0; i--) { 3833 dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd)); 3834 } 3835 return dst; 3836 } 3837 3838 // Based on reference C function recip_sqrt_estimate from ARM ARM. 3839 double Simulator::recip_sqrt_estimate(double a) { 3840 int q0, q1, s; 3841 double r; 3842 if (a < 0.5) { 3843 q0 = static_cast<int>(a * 512.0); 3844 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0); 3845 } else { 3846 q1 = static_cast<int>(a * 256.0); 3847 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0); 3848 } 3849 s = static_cast<int>(256.0 * r + 0.5); 3850 return static_cast<double>(s) / 256.0; 3851 } 3852 3853 namespace { 3854 3855 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) { 3856 return unsigned_bitextract_64(start_bit, end_bit, val); 3857 } 3858 3859 } // anonymous namespace 3860 3861 template <typename T> 3862 T Simulator::FPRecipSqrtEstimate(T op) { 3863 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, 3864 "T must be a float or double"); 3865 3866 if (std::isnan(op)) { 3867 return FPProcessNaN(op); 3868 } else if (op == 0.0) { 3869 if (copysign(1.0, op) < 0.0) { 3870 return kFP64NegativeInfinity; 3871 } else { 3872 return kFP64PositiveInfinity; 3873 } 3874 } else if (copysign(1.0, op) < 0.0) { 3875 FPProcessException(); 3876 return FPDefaultNaN<T>(); 3877 } else if (std::isinf(op)) { 3878 return 0.0; 3879 } else { 3880 uint64_t fraction; 3881 int32_t exp, result_exp; 3882 3883 if (sizeof(T) == sizeof(float)) { 3884 exp = static_cast<int32_t>(float_exp(op)); 3885 fraction = float_mantissa(op); 3886 fraction <<= 29; 3887 } else { 3888 exp = static_cast<int32_t>(double_exp(op)); 3889 fraction = double_mantissa(op); 3890 } 3891 3892 if (exp == 0) { 3893 while (Bits(fraction, 51, 51) == 0) { 3894 fraction = Bits(fraction, 50, 0) << 1; 3895 exp -= 1; 3896 } 3897 fraction = Bits(fraction, 50, 0) << 1; 3898 } 3899 3900 double scaled; 3901 if (Bits(exp, 0, 0) == 0) { 3902 scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 3903 } else { 3904 scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44); 3905 } 3906 3907 if (sizeof(T) == sizeof(float)) { 3908 result_exp = (380 - exp) / 2; 3909 } else { 3910 result_exp = (3068 - exp) / 2; 3911 } 3912 3913 uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled)); 3914 3915 if (sizeof(T) == sizeof(float)) { 3916 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 3917 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29)); 3918 return float_pack(0, exp_bits, est_bits); 3919 } else { 3920 return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0)); 3921 } 3922 } 3923 } 3924 3925 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst, 3926 const LogicVRegister& src) { 3927 dst.ClearForWrite(vform); 3928 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 3929 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3930 float input = src.Float<float>(i); 3931 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input)); 3932 } 3933 } else { 3934 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 3935 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 3936 double input = src.Float<double>(i); 3937 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input)); 3938 } 3939 } 3940 return dst; 3941 } 3942 3943 template <typename T> 3944 T Simulator::FPRecipEstimate(T op, FPRounding rounding) { 3945 static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value, 3946 "T must be a float or double"); 3947 uint32_t sign; 3948 3949 if (sizeof(T) == sizeof(float)) { 3950 sign = float_sign(op); 3951 } else { 3952 sign = double_sign(op); 3953 } 3954 3955 if (std::isnan(op)) { 3956 return FPProcessNaN(op); 3957 } else if (std::isinf(op)) { 3958 return (sign == 1) ? -0.0 : 0.0; 3959 } else if (op == 0.0) { 3960 FPProcessException(); // FPExc_DivideByZero exception. 3961 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 3962 } else if (((sizeof(T) == sizeof(float)) && 3963 (std::fabs(op) < std::pow(2.0, -128.0))) || 3964 ((sizeof(T) == sizeof(double)) && 3965 (std::fabs(op) < std::pow(2.0, -1024.0)))) { 3966 bool overflow_to_inf = false; 3967 switch (rounding) { 3968 case FPTieEven: 3969 overflow_to_inf = true; 3970 break; 3971 case FPPositiveInfinity: 3972 overflow_to_inf = (sign == 0); 3973 break; 3974 case FPNegativeInfinity: 3975 overflow_to_inf = (sign == 1); 3976 break; 3977 case FPZero: 3978 overflow_to_inf = false; 3979 break; 3980 default: 3981 break; 3982 } 3983 FPProcessException(); // FPExc_Overflow and FPExc_Inexact. 3984 if (overflow_to_inf) { 3985 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity; 3986 } else { 3987 // Return FPMaxNormal(sign). 3988 if (sizeof(T) == sizeof(float)) { 3989 return float_pack(sign, 0xFE, 0x07FFFFF); 3990 } else { 3991 return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl); 3992 } 3993 } 3994 } else { 3995 uint64_t fraction; 3996 int32_t exp, result_exp; 3997 uint32_t sign; 3998 3999 if (sizeof(T) == sizeof(float)) { 4000 sign = float_sign(op); 4001 exp = static_cast<int32_t>(float_exp(op)); 4002 fraction = float_mantissa(op); 4003 fraction <<= 29; 4004 } else { 4005 sign = double_sign(op); 4006 exp = static_cast<int32_t>(double_exp(op)); 4007 fraction = double_mantissa(op); 4008 } 4009 4010 if (exp == 0) { 4011 if (Bits(fraction, 51, 51) == 0) { 4012 exp -= 1; 4013 fraction = Bits(fraction, 49, 0) << 2; 4014 } else { 4015 fraction = Bits(fraction, 50, 0) << 1; 4016 } 4017 } 4018 4019 double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44); 4020 4021 if (sizeof(T) == sizeof(float)) { 4022 result_exp = 253 - exp; 4023 } else { 4024 result_exp = 2045 - exp; 4025 } 4026 4027 double estimate = recip_estimate(scaled); 4028 4029 fraction = double_mantissa(estimate); 4030 if (result_exp == 0) { 4031 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1); 4032 } else if (result_exp == -1) { 4033 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2); 4034 result_exp = 0; 4035 } 4036 if (sizeof(T) == sizeof(float)) { 4037 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0)); 4038 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29)); 4039 return float_pack(sign, exp_bits, frac_bits); 4040 } else { 4041 return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0)); 4042 } 4043 } 4044 } 4045 4046 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst, 4047 const LogicVRegister& src, FPRounding round) { 4048 dst.ClearForWrite(vform); 4049 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4050 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4051 float input = src.Float<float>(i); 4052 dst.SetFloat(i, FPRecipEstimate<float>(input, round)); 4053 } 4054 } else { 4055 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4056 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4057 double input = src.Float<double>(i); 4058 dst.SetFloat(i, FPRecipEstimate<double>(input, round)); 4059 } 4060 } 4061 return dst; 4062 } 4063 4064 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst, 4065 const LogicVRegister& src) { 4066 dst.ClearForWrite(vform); 4067 uint64_t operand; 4068 uint32_t result; 4069 double dp_operand, dp_result; 4070 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4071 operand = src.Uint(vform, i); 4072 if (operand <= 0x3FFFFFFF) { 4073 result = 0xFFFFFFFF; 4074 } else { 4075 dp_operand = operand * std::pow(2.0, -32); 4076 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31); 4077 result = static_cast<uint32_t>(dp_result); 4078 } 4079 dst.SetUint(vform, i, result); 4080 } 4081 return dst; 4082 } 4083 4084 // Based on reference C function recip_estimate from ARM ARM. 4085 double Simulator::recip_estimate(double a) { 4086 int q, s; 4087 double r; 4088 q = static_cast<int>(a * 512.0); 4089 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0); 4090 s = static_cast<int>(256.0 * r + 0.5); 4091 return static_cast<double>(s) / 256.0; 4092 } 4093 4094 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst, 4095 const LogicVRegister& src) { 4096 dst.ClearForWrite(vform); 4097 uint64_t operand; 4098 uint32_t result; 4099 double dp_operand, dp_result; 4100 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4101 operand = src.Uint(vform, i); 4102 if (operand <= 0x7FFFFFFF) { 4103 result = 0xFFFFFFFF; 4104 } else { 4105 dp_operand = operand * std::pow(2.0, -32); 4106 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31); 4107 result = static_cast<uint32_t>(dp_result); 4108 } 4109 dst.SetUint(vform, i, result); 4110 } 4111 return dst; 4112 } 4113 4114 template <typename T> 4115 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, 4116 const LogicVRegister& src) { 4117 dst.ClearForWrite(vform); 4118 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4119 T op = src.Float<T>(i); 4120 T result; 4121 if (std::isnan(op)) { 4122 result = FPProcessNaN(op); 4123 } else { 4124 int exp; 4125 uint32_t sign; 4126 if (sizeof(T) == sizeof(float)) { 4127 sign = float_sign(op); 4128 exp = static_cast<int>(float_exp(op)); 4129 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0)); 4130 result = float_pack(sign, exp, 0); 4131 } else { 4132 sign = double_sign(op); 4133 exp = static_cast<int>(double_exp(op)); 4134 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0)); 4135 result = double_pack(sign, exp, 0); 4136 } 4137 } 4138 dst.SetFloat(i, result); 4139 } 4140 return dst; 4141 } 4142 4143 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst, 4144 const LogicVRegister& src) { 4145 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4146 frecpx<float>(vform, dst, src); 4147 } else { 4148 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4149 frecpx<double>(vform, dst, src); 4150 } 4151 return dst; 4152 } 4153 4154 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst, 4155 const LogicVRegister& src, int fbits, 4156 FPRounding round) { 4157 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4158 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4159 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round); 4160 dst.SetFloat<float>(i, result); 4161 } else { 4162 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4163 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round); 4164 dst.SetFloat<double>(i, result); 4165 } 4166 } 4167 return dst; 4168 } 4169 4170 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst, 4171 const LogicVRegister& src, int fbits, 4172 FPRounding round) { 4173 for (int i = 0; i < LaneCountFromFormat(vform); i++) { 4174 if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { 4175 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round); 4176 dst.SetFloat<float>(i, result); 4177 } else { 4178 DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); 4179 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round); 4180 dst.SetFloat<double>(i, result); 4181 } 4182 } 4183 return dst; 4184 } 4185 4186 #endif // USE_SIMULATOR 4187 4188 } // namespace internal 4189 } // namespace v8 4190 4191 #endif // V8_TARGET_ARCH_ARM64 4192