Home | History | Annotate | Download | only in arm64
      1 // Copyright 2016 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #if V8_TARGET_ARCH_ARM64
      6 
      7 #include <cmath>
      8 #include "src/arm64/simulator-arm64.h"
      9 
     10 namespace v8 {
     11 namespace internal {
     12 
     13 #if defined(USE_SIMULATOR)
     14 
     15 namespace {
     16 
     17 // See FPRound for a description of this function.
     18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
     19                               FPRounding round_mode) {
     20   uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
     21       sign, exponent, mantissa, round_mode);
     22   return bit_cast<double>(bits);
     23 }
     24 
     25 // See FPRound for a description of this function.
     26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
     27                             FPRounding round_mode) {
     28   uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
     29       sign, exponent, mantissa, round_mode);
     30   return bit_cast<float>(bits);
     31 }
     32 
     33 // See FPRound for a description of this function.
     34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
     35                                 uint64_t mantissa, FPRounding round_mode) {
     36   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
     37       sign, exponent, mantissa, round_mode);
     38 }
     39 
     40 }  // namespace
     41 
     42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
     43   if (src >= 0) {
     44     return UFixedToDouble(src, fbits, round);
     45   } else if (src == INT64_MIN) {
     46     return -UFixedToDouble(src, fbits, round);
     47   } else {
     48     return -UFixedToDouble(-src, fbits, round);
     49   }
     50 }
     51 
     52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
     53   // An input of 0 is a special case because the result is effectively
     54   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
     55   if (src == 0) {
     56     return 0.0;
     57   }
     58 
     59   // Calculate the exponent. The highest significant bit will have the value
     60   // 2^exponent.
     61   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
     62   const int64_t exponent = highest_significant_bit - fbits;
     63 
     64   return FPRoundToDouble(0, exponent, src, round);
     65 }
     66 
     67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
     68   if (src >= 0) {
     69     return UFixedToFloat(src, fbits, round);
     70   } else if (src == INT64_MIN) {
     71     return -UFixedToFloat(src, fbits, round);
     72   } else {
     73     return -UFixedToFloat(-src, fbits, round);
     74   }
     75 }
     76 
     77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
     78   // An input of 0 is a special case because the result is effectively
     79   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
     80   if (src == 0) {
     81     return 0.0f;
     82   }
     83 
     84   // Calculate the exponent. The highest significant bit will have the value
     85   // 2^exponent.
     86   const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
     87   const int32_t exponent = highest_significant_bit - fbits;
     88 
     89   return FPRoundToFloat(0, exponent, src, round);
     90 }
     91 
     92 double Simulator::FPToDouble(float value) {
     93   switch (std::fpclassify(value)) {
     94     case FP_NAN: {
     95       if (IsSignallingNaN(value)) {
     96         FPProcessException();
     97       }
     98       if (DN()) return kFP64DefaultNaN;
     99 
    100       // Convert NaNs as the processor would:
    101       //  - The sign is propagated.
    102       //  - The mantissa is transferred entirely, except that the top bit is
    103       //    forced to '1', making the result a quiet NaN. The unused (low-order)
    104       //    mantissa bits are set to 0.
    105       uint32_t raw = bit_cast<uint32_t>(value);
    106 
    107       uint64_t sign = raw >> 31;
    108       uint64_t exponent = (1 << kDoubleExponentBits) - 1;
    109       uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
    110 
    111       // Unused low-order bits remain zero.
    112       mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
    113 
    114       // Force a quiet NaN.
    115       mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
    116 
    117       return double_pack(sign, exponent, mantissa);
    118     }
    119 
    120     case FP_ZERO:
    121     case FP_NORMAL:
    122     case FP_SUBNORMAL:
    123     case FP_INFINITE: {
    124       // All other inputs are preserved in a standard cast, because every value
    125       // representable using an IEEE-754 float is also representable using an
    126       // IEEE-754 double.
    127       return static_cast<double>(value);
    128     }
    129   }
    130 
    131   UNREACHABLE();
    132 }
    133 
    134 float Simulator::FPToFloat(float16 value) {
    135   uint32_t sign = value >> 15;
    136   uint32_t exponent =
    137       unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
    138                              kFloat16MantissaBits, value);
    139   uint32_t mantissa =
    140       unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
    141 
    142   switch (float16classify(value)) {
    143     case FP_ZERO:
    144       return (sign == 0) ? 0.0f : -0.0f;
    145 
    146     case FP_INFINITE:
    147       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
    148 
    149     case FP_SUBNORMAL: {
    150       // Calculate shift required to put mantissa into the most-significant bits
    151       // of the destination mantissa.
    152       int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
    153 
    154       // Shift mantissa and discard implicit '1'.
    155       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
    156       mantissa &= (1 << kFloatMantissaBits) - 1;
    157 
    158       // Adjust the exponent for the shift applied, and rebias.
    159       exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
    160       break;
    161     }
    162 
    163     case FP_NAN: {
    164       if (IsSignallingNaN(value)) {
    165         FPProcessException();
    166       }
    167       if (DN()) return kFP32DefaultNaN;
    168 
    169       // Convert NaNs as the processor would:
    170       //  - The sign is propagated.
    171       //  - The mantissa is transferred entirely, except that the top bit is
    172       //    forced to '1', making the result a quiet NaN. The unused (low-order)
    173       //    mantissa bits are set to 0.
    174       exponent = (1 << kFloatExponentBits) - 1;
    175 
    176       // Increase bits in mantissa, making low-order bits 0.
    177       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    178       mantissa |= 1 << (kFloatMantissaBits - 1);  // Force a quiet NaN.
    179       break;
    180     }
    181 
    182     case FP_NORMAL: {
    183       // Increase bits in mantissa, making low-order bits 0.
    184       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    185 
    186       // Change exponent bias.
    187       exponent += (kFloatExponentBias - kFloat16ExponentBias);
    188       break;
    189     }
    190 
    191     default:
    192       UNREACHABLE();
    193   }
    194   return float_pack(sign, exponent, mantissa);
    195 }
    196 
    197 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
    198   // Only the FPTieEven rounding mode is implemented.
    199   DCHECK_EQ(round_mode, FPTieEven);
    200   USE(round_mode);
    201 
    202   int64_t sign = float_sign(value);
    203   int64_t exponent =
    204       static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
    205   uint32_t mantissa = float_mantissa(value);
    206 
    207   switch (std::fpclassify(value)) {
    208     case FP_NAN: {
    209       if (IsSignallingNaN(value)) {
    210         FPProcessException();
    211       }
    212       if (DN()) return kFP16DefaultNaN;
    213 
    214       // Convert NaNs as the processor would:
    215       //  - The sign is propagated.
    216       //  - The mantissa is transferred as much as possible, except that the top
    217       //    bit is forced to '1', making the result a quiet NaN.
    218       float16 result =
    219           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    220       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
    221       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
    222       return result;
    223     }
    224 
    225     case FP_ZERO:
    226       return (sign == 0) ? 0 : 0x8000;
    227 
    228     case FP_INFINITE:
    229       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    230 
    231     case FP_NORMAL:
    232     case FP_SUBNORMAL: {
    233       // Convert float-to-half as the processor would, assuming that FPCR.FZ
    234       // (flush-to-zero) is not set.
    235 
    236       // Add the implicit '1' bit to the mantissa.
    237       mantissa += (1 << kFloatMantissaBits);
    238       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    239     }
    240   }
    241 
    242   UNREACHABLE();
    243 }
    244 
    245 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
    246   // Only the FPTieEven rounding mode is implemented.
    247   DCHECK_EQ(round_mode, FPTieEven);
    248   USE(round_mode);
    249 
    250   int64_t sign = double_sign(value);
    251   int64_t exponent =
    252       static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
    253   uint64_t mantissa = double_mantissa(value);
    254 
    255   switch (std::fpclassify(value)) {
    256     case FP_NAN: {
    257       if (IsSignallingNaN(value)) {
    258         FPProcessException();
    259       }
    260       if (DN()) return kFP16DefaultNaN;
    261 
    262       // Convert NaNs as the processor would:
    263       //  - The sign is propagated.
    264       //  - The mantissa is transferred as much as possible, except that the top
    265       //    bit is forced to '1', making the result a quiet NaN.
    266       float16 result =
    267           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    268       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
    269       result |= (1 << (kFloat16MantissaBits - 1));  // Force a quiet NaN;
    270       return result;
    271     }
    272 
    273     case FP_ZERO:
    274       return (sign == 0) ? 0 : 0x8000;
    275 
    276     case FP_INFINITE:
    277       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    278 
    279     case FP_NORMAL:
    280     case FP_SUBNORMAL: {
    281       // Convert double-to-half as the processor would, assuming that FPCR.FZ
    282       // (flush-to-zero) is not set.
    283 
    284       // Add the implicit '1' bit to the mantissa.
    285       mantissa += (UINT64_C(1) << kDoubleMantissaBits);
    286       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    287     }
    288   }
    289 
    290   UNREACHABLE();
    291 }
    292 
    293 float Simulator::FPToFloat(double value, FPRounding round_mode) {
    294   // Only the FPTieEven rounding mode is implemented.
    295   DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
    296   USE(round_mode);
    297 
    298   switch (std::fpclassify(value)) {
    299     case FP_NAN: {
    300       if (IsSignallingNaN(value)) {
    301         FPProcessException();
    302       }
    303       if (DN()) return kFP32DefaultNaN;
    304 
    305       // Convert NaNs as the processor would:
    306       //  - The sign is propagated.
    307       //  - The mantissa is transferred as much as possible, except that the
    308       //    top bit is forced to '1', making the result a quiet NaN.
    309 
    310       uint64_t raw = bit_cast<uint64_t>(value);
    311 
    312       uint32_t sign = raw >> 63;
    313       uint32_t exponent = (1 << 8) - 1;
    314       uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
    315           50, kDoubleMantissaBits - kFloatMantissaBits, raw));
    316       mantissa |= (1 << (kFloatMantissaBits - 1));  // Force a quiet NaN.
    317 
    318       return float_pack(sign, exponent, mantissa);
    319     }
    320 
    321     case FP_ZERO:
    322     case FP_INFINITE: {
    323       // In a C++ cast, any value representable in the target type will be
    324       // unchanged. This is always the case for +/-0.0 and infinities.
    325       return static_cast<float>(value);
    326     }
    327 
    328     case FP_NORMAL:
    329     case FP_SUBNORMAL: {
    330       // Convert double-to-float as the processor would, assuming that FPCR.FZ
    331       // (flush-to-zero) is not set.
    332       uint32_t sign = double_sign(value);
    333       int64_t exponent =
    334           static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
    335       uint64_t mantissa = double_mantissa(value);
    336       if (std::fpclassify(value) == FP_NORMAL) {
    337         // For normal FP values, add the hidden bit.
    338         mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
    339       }
    340       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
    341     }
    342   }
    343 
    344   UNREACHABLE();
    345 }
    346 
    347 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    348   dst.ClearForWrite(vform);
    349   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    350     dst.ReadUintFromMem(vform, i, addr);
    351     addr += LaneSizeInBytesFromFormat(vform);
    352   }
    353 }
    354 
    355 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
    356                     uint64_t addr) {
    357   dst.ReadUintFromMem(vform, index, addr);
    358 }
    359 
    360 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    361   dst.ClearForWrite(vform);
    362   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    363     dst.ReadUintFromMem(vform, i, addr);
    364   }
    365 }
    366 
    367 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
    368                     LogicVRegister dst2, uint64_t addr1) {
    369   dst1.ClearForWrite(vform);
    370   dst2.ClearForWrite(vform);
    371   int esize = LaneSizeInBytesFromFormat(vform);
    372   uint64_t addr2 = addr1 + esize;
    373   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    374     dst1.ReadUintFromMem(vform, i, addr1);
    375     dst2.ReadUintFromMem(vform, i, addr2);
    376     addr1 += 2 * esize;
    377     addr2 += 2 * esize;
    378   }
    379 }
    380 
    381 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
    382                     LogicVRegister dst2, int index, uint64_t addr1) {
    383   dst1.ClearForWrite(vform);
    384   dst2.ClearForWrite(vform);
    385   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    386   dst1.ReadUintFromMem(vform, index, addr1);
    387   dst2.ReadUintFromMem(vform, index, addr2);
    388 }
    389 
    390 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
    391                      LogicVRegister dst2, uint64_t addr) {
    392   dst1.ClearForWrite(vform);
    393   dst2.ClearForWrite(vform);
    394   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    395   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    396     dst1.ReadUintFromMem(vform, i, addr);
    397     dst2.ReadUintFromMem(vform, i, addr2);
    398   }
    399 }
    400 
    401 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
    402                     LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
    403   dst1.ClearForWrite(vform);
    404   dst2.ClearForWrite(vform);
    405   dst3.ClearForWrite(vform);
    406   int esize = LaneSizeInBytesFromFormat(vform);
    407   uint64_t addr2 = addr1 + esize;
    408   uint64_t addr3 = addr2 + esize;
    409   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    410     dst1.ReadUintFromMem(vform, i, addr1);
    411     dst2.ReadUintFromMem(vform, i, addr2);
    412     dst3.ReadUintFromMem(vform, i, addr3);
    413     addr1 += 3 * esize;
    414     addr2 += 3 * esize;
    415     addr3 += 3 * esize;
    416   }
    417 }
    418 
    419 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
    420                     LogicVRegister dst2, LogicVRegister dst3, int index,
    421                     uint64_t addr1) {
    422   dst1.ClearForWrite(vform);
    423   dst2.ClearForWrite(vform);
    424   dst3.ClearForWrite(vform);
    425   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    426   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    427   dst1.ReadUintFromMem(vform, index, addr1);
    428   dst2.ReadUintFromMem(vform, index, addr2);
    429   dst3.ReadUintFromMem(vform, index, addr3);
    430 }
    431 
    432 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
    433                      LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
    434   dst1.ClearForWrite(vform);
    435   dst2.ClearForWrite(vform);
    436   dst3.ClearForWrite(vform);
    437   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    438   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    439   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    440     dst1.ReadUintFromMem(vform, i, addr);
    441     dst2.ReadUintFromMem(vform, i, addr2);
    442     dst3.ReadUintFromMem(vform, i, addr3);
    443   }
    444 }
    445 
    446 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
    447                     LogicVRegister dst2, LogicVRegister dst3,
    448                     LogicVRegister dst4, uint64_t addr1) {
    449   dst1.ClearForWrite(vform);
    450   dst2.ClearForWrite(vform);
    451   dst3.ClearForWrite(vform);
    452   dst4.ClearForWrite(vform);
    453   int esize = LaneSizeInBytesFromFormat(vform);
    454   uint64_t addr2 = addr1 + esize;
    455   uint64_t addr3 = addr2 + esize;
    456   uint64_t addr4 = addr3 + esize;
    457   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    458     dst1.ReadUintFromMem(vform, i, addr1);
    459     dst2.ReadUintFromMem(vform, i, addr2);
    460     dst3.ReadUintFromMem(vform, i, addr3);
    461     dst4.ReadUintFromMem(vform, i, addr4);
    462     addr1 += 4 * esize;
    463     addr2 += 4 * esize;
    464     addr3 += 4 * esize;
    465     addr4 += 4 * esize;
    466   }
    467 }
    468 
    469 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
    470                     LogicVRegister dst2, LogicVRegister dst3,
    471                     LogicVRegister dst4, int index, uint64_t addr1) {
    472   dst1.ClearForWrite(vform);
    473   dst2.ClearForWrite(vform);
    474   dst3.ClearForWrite(vform);
    475   dst4.ClearForWrite(vform);
    476   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    477   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    478   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    479   dst1.ReadUintFromMem(vform, index, addr1);
    480   dst2.ReadUintFromMem(vform, index, addr2);
    481   dst3.ReadUintFromMem(vform, index, addr3);
    482   dst4.ReadUintFromMem(vform, index, addr4);
    483 }
    484 
    485 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
    486                      LogicVRegister dst2, LogicVRegister dst3,
    487                      LogicVRegister dst4, uint64_t addr) {
    488   dst1.ClearForWrite(vform);
    489   dst2.ClearForWrite(vform);
    490   dst3.ClearForWrite(vform);
    491   dst4.ClearForWrite(vform);
    492   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    493   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    494   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    496     dst1.ReadUintFromMem(vform, i, addr);
    497     dst2.ReadUintFromMem(vform, i, addr2);
    498     dst3.ReadUintFromMem(vform, i, addr3);
    499     dst4.ReadUintFromMem(vform, i, addr4);
    500   }
    501 }
    502 
    503 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
    504   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    505     src.WriteUintToMem(vform, i, addr);
    506     addr += LaneSizeInBytesFromFormat(vform);
    507   }
    508 }
    509 
    510 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
    511                     uint64_t addr) {
    512   src.WriteUintToMem(vform, index, addr);
    513 }
    514 
    515 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    516                     uint64_t addr) {
    517   int esize = LaneSizeInBytesFromFormat(vform);
    518   uint64_t addr2 = addr + esize;
    519   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    520     dst.WriteUintToMem(vform, i, addr);
    521     dst2.WriteUintToMem(vform, i, addr2);
    522     addr += 2 * esize;
    523     addr2 += 2 * esize;
    524   }
    525 }
    526 
    527 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    528                     int index, uint64_t addr) {
    529   int esize = LaneSizeInBytesFromFormat(vform);
    530   dst.WriteUintToMem(vform, index, addr);
    531   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    532 }
    533 
    534 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    535                     LogicVRegister dst3, uint64_t addr) {
    536   int esize = LaneSizeInBytesFromFormat(vform);
    537   uint64_t addr2 = addr + esize;
    538   uint64_t addr3 = addr2 + esize;
    539   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    540     dst.WriteUintToMem(vform, i, addr);
    541     dst2.WriteUintToMem(vform, i, addr2);
    542     dst3.WriteUintToMem(vform, i, addr3);
    543     addr += 3 * esize;
    544     addr2 += 3 * esize;
    545     addr3 += 3 * esize;
    546   }
    547 }
    548 
    549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    550                     LogicVRegister dst3, int index, uint64_t addr) {
    551   int esize = LaneSizeInBytesFromFormat(vform);
    552   dst.WriteUintToMem(vform, index, addr);
    553   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    554   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    555 }
    556 
    557 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    558                     LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
    559   int esize = LaneSizeInBytesFromFormat(vform);
    560   uint64_t addr2 = addr + esize;
    561   uint64_t addr3 = addr2 + esize;
    562   uint64_t addr4 = addr3 + esize;
    563   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    564     dst.WriteUintToMem(vform, i, addr);
    565     dst2.WriteUintToMem(vform, i, addr2);
    566     dst3.WriteUintToMem(vform, i, addr3);
    567     dst4.WriteUintToMem(vform, i, addr4);
    568     addr += 4 * esize;
    569     addr2 += 4 * esize;
    570     addr3 += 4 * esize;
    571     addr4 += 4 * esize;
    572   }
    573 }
    574 
    575 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
    576                     LogicVRegister dst3, LogicVRegister dst4, int index,
    577                     uint64_t addr) {
    578   int esize = LaneSizeInBytesFromFormat(vform);
    579   dst.WriteUintToMem(vform, index, addr);
    580   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    581   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    582   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
    583 }
    584 
    585 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
    586                               const LogicVRegister& src1,
    587                               const LogicVRegister& src2, Condition cond) {
    588   dst.ClearForWrite(vform);
    589   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    590     int64_t sa = src1.Int(vform, i);
    591     int64_t sb = src2.Int(vform, i);
    592     uint64_t ua = src1.Uint(vform, i);
    593     uint64_t ub = src2.Uint(vform, i);
    594     bool result = false;
    595     switch (cond) {
    596       case eq:
    597         result = (ua == ub);
    598         break;
    599       case ge:
    600         result = (sa >= sb);
    601         break;
    602       case gt:
    603         result = (sa > sb);
    604         break;
    605       case hi:
    606         result = (ua > ub);
    607         break;
    608       case hs:
    609         result = (ua >= ub);
    610         break;
    611       case lt:
    612         result = (sa < sb);
    613         break;
    614       case le:
    615         result = (sa <= sb);
    616         break;
    617       default:
    618         UNREACHABLE();
    619     }
    620     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
    621   }
    622   return dst;
    623 }
    624 
    625 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
    626                               const LogicVRegister& src1, int imm,
    627                               Condition cond) {
    628   SimVRegister temp;
    629   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
    630   return cmp(vform, dst, src1, imm_reg, cond);
    631 }
    632 
    633 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
    634                                  const LogicVRegister& src1,
    635                                  const LogicVRegister& src2) {
    636   dst.ClearForWrite(vform);
    637   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    638     uint64_t ua = src1.Uint(vform, i);
    639     uint64_t ub = src2.Uint(vform, i);
    640     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
    641   }
    642   return dst;
    643 }
    644 
    645 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
    646                               const LogicVRegister& src1,
    647                               const LogicVRegister& src2) {
    648   int lane_size = LaneSizeInBitsFromFormat(vform);
    649   dst.ClearForWrite(vform);
    650   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    651     // Test for unsigned saturation.
    652     uint64_t ua = src1.UintLeftJustified(vform, i);
    653     uint64_t ub = src2.UintLeftJustified(vform, i);
    654     uint64_t ur = ua + ub;
    655     if (ur < ua) {
    656       dst.SetUnsignedSat(i, true);
    657     }
    658 
    659     // Test for signed saturation.
    660     bool pos_a = (ua >> 63) == 0;
    661     bool pos_b = (ub >> 63) == 0;
    662     bool pos_r = (ur >> 63) == 0;
    663     // If the signs of the operands are the same, but different from the result,
    664     // there was an overflow.
    665     if ((pos_a == pos_b) && (pos_a != pos_r)) {
    666       dst.SetSignedSat(i, pos_a);
    667     }
    668 
    669     dst.SetInt(vform, i, ur >> (64 - lane_size));
    670   }
    671   return dst;
    672 }
    673 
    674 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
    675                                const LogicVRegister& src1,
    676                                const LogicVRegister& src2) {
    677   SimVRegister temp1, temp2;
    678   uzp1(vform, temp1, src1, src2);
    679   uzp2(vform, temp2, src1, src2);
    680   add(vform, dst, temp1, temp2);
    681   return dst;
    682 }
    683 
    684 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
    685                               const LogicVRegister& src1,
    686                               const LogicVRegister& src2) {
    687   SimVRegister temp;
    688   mul(vform, temp, src1, src2);
    689   add(vform, dst, dst, temp);
    690   return dst;
    691 }
    692 
    693 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
    694                               const LogicVRegister& src1,
    695                               const LogicVRegister& src2) {
    696   SimVRegister temp;
    697   mul(vform, temp, src1, src2);
    698   sub(vform, dst, dst, temp);
    699   return dst;
    700 }
    701 
    702 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
    703                               const LogicVRegister& src1,
    704                               const LogicVRegister& src2) {
    705   dst.ClearForWrite(vform);
    706   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    707     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
    708   }
    709   return dst;
    710 }
    711 
    712 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
    713                               const LogicVRegister& src1,
    714                               const LogicVRegister& src2, int index) {
    715   SimVRegister temp;
    716   VectorFormat indexform = VectorFormatFillQ(vform);
    717   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
    718 }
    719 
    720 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
    721                               const LogicVRegister& src1,
    722                               const LogicVRegister& src2, int index) {
    723   SimVRegister temp;
    724   VectorFormat indexform = VectorFormatFillQ(vform);
    725   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
    726 }
    727 
    728 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
    729                               const LogicVRegister& src1,
    730                               const LogicVRegister& src2, int index) {
    731   SimVRegister temp;
    732   VectorFormat indexform = VectorFormatFillQ(vform);
    733   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
    734 }
    735 
    736 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
    737                                 const LogicVRegister& src1,
    738                                 const LogicVRegister& src2, int index) {
    739   SimVRegister temp;
    740   VectorFormat indexform =
    741       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    742   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    743 }
    744 
    745 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
    746                                  const LogicVRegister& src1,
    747                                  const LogicVRegister& src2, int index) {
    748   SimVRegister temp;
    749   VectorFormat indexform =
    750       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    751   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    752 }
    753 
    754 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
    755                                 const LogicVRegister& src1,
    756                                 const LogicVRegister& src2, int index) {
    757   SimVRegister temp;
    758   VectorFormat indexform =
    759       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    760   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    761 }
    762 
    763 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
    764                                  const LogicVRegister& src1,
    765                                  const LogicVRegister& src2, int index) {
    766   SimVRegister temp;
    767   VectorFormat indexform =
    768       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    769   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    770 }
    771 
    772 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
    773                                 const LogicVRegister& src1,
    774                                 const LogicVRegister& src2, int index) {
    775   SimVRegister temp;
    776   VectorFormat indexform =
    777       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    778   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    779 }
    780 
    781 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
    782                                  const LogicVRegister& src1,
    783                                  const LogicVRegister& src2, int index) {
    784   SimVRegister temp;
    785   VectorFormat indexform =
    786       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    787   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    788 }
    789 
    790 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
    791                                 const LogicVRegister& src1,
    792                                 const LogicVRegister& src2, int index) {
    793   SimVRegister temp;
    794   VectorFormat indexform =
    795       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    796   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    797 }
    798 
    799 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
    800                                  const LogicVRegister& src1,
    801                                  const LogicVRegister& src2, int index) {
    802   SimVRegister temp;
    803   VectorFormat indexform =
    804       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    805   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    806 }
    807 
    808 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
    809                                 const LogicVRegister& src1,
    810                                 const LogicVRegister& src2, int index) {
    811   SimVRegister temp;
    812   VectorFormat indexform =
    813       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    814   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    815 }
    816 
    817 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
    818                                  const LogicVRegister& src1,
    819                                  const LogicVRegister& src2, int index) {
    820   SimVRegister temp;
    821   VectorFormat indexform =
    822       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    823   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    824 }
    825 
    826 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
    827                                 const LogicVRegister& src1,
    828                                 const LogicVRegister& src2, int index) {
    829   SimVRegister temp;
    830   VectorFormat indexform =
    831       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    832   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    833 }
    834 
    835 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
    836                                  const LogicVRegister& src1,
    837                                  const LogicVRegister& src2, int index) {
    838   SimVRegister temp;
    839   VectorFormat indexform =
    840       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    841   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    842 }
    843 
    844 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
    845                                   const LogicVRegister& src1,
    846                                   const LogicVRegister& src2, int index) {
    847   SimVRegister temp;
    848   VectorFormat indexform =
    849       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    850   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    851 }
    852 
    853 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
    854                                    const LogicVRegister& src1,
    855                                    const LogicVRegister& src2, int index) {
    856   SimVRegister temp;
    857   VectorFormat indexform =
    858       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    859   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    860 }
    861 
    862 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
    863                                   const LogicVRegister& src1,
    864                                   const LogicVRegister& src2, int index) {
    865   SimVRegister temp;
    866   VectorFormat indexform =
    867       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    868   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    869 }
    870 
    871 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
    872                                    const LogicVRegister& src1,
    873                                    const LogicVRegister& src2, int index) {
    874   SimVRegister temp;
    875   VectorFormat indexform =
    876       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    877   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    878 }
    879 
    880 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
    881                                   const LogicVRegister& src1,
    882                                   const LogicVRegister& src2, int index) {
    883   SimVRegister temp;
    884   VectorFormat indexform =
    885       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    886   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    887 }
    888 
    889 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
    890                                    const LogicVRegister& src1,
    891                                    const LogicVRegister& src2, int index) {
    892   SimVRegister temp;
    893   VectorFormat indexform =
    894       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    895   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    896 }
    897 
    898 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
    899                                   const LogicVRegister& src1,
    900                                   const LogicVRegister& src2, int index) {
    901   SimVRegister temp;
    902   VectorFormat indexform = VectorFormatFillQ(vform);
    903   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
    904 }
    905 
    906 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
    907                                    const LogicVRegister& src1,
    908                                    const LogicVRegister& src2, int index) {
    909   SimVRegister temp;
    910   VectorFormat indexform = VectorFormatFillQ(vform);
    911   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
    912 }
    913 
    914 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
    915   uint16_t result = 0;
    916   uint16_t extended_op2 = op2;
    917   for (int i = 0; i < 8; ++i) {
    918     if ((op1 >> i) & 1) {
    919       result = result ^ (extended_op2 << i);
    920     }
    921   }
    922   return result;
    923 }
    924 
    925 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
    926                                const LogicVRegister& src1,
    927                                const LogicVRegister& src2) {
    928   dst.ClearForWrite(vform);
    929   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    930     dst.SetUint(vform, i,
    931                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
    932   }
    933   return dst;
    934 }
    935 
    936 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
    937                                 const LogicVRegister& src1,
    938                                 const LogicVRegister& src2) {
    939   VectorFormat vform_src = VectorFormatHalfWidth(vform);
    940   dst.ClearForWrite(vform);
    941   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    942     dst.SetUint(
    943         vform, i,
    944         PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
    945   }
    946   return dst;
    947 }
    948 
    949 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
    950                                  const LogicVRegister& src1,
    951                                  const LogicVRegister& src2) {
    952   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
    953   dst.ClearForWrite(vform);
    954   int lane_count = LaneCountFromFormat(vform);
    955   for (int i = 0; i < lane_count; i++) {
    956     dst.SetUint(vform, i,
    957                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
    958                                src2.Uint(vform_src, lane_count + i)));
    959   }
    960   return dst;
    961 }
    962 
    963 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
    964                               const LogicVRegister& src1,
    965                               const LogicVRegister& src2) {
    966   int lane_size = LaneSizeInBitsFromFormat(vform);
    967   dst.ClearForWrite(vform);
    968   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    969     // Test for unsigned saturation.
    970     uint64_t ua = src1.UintLeftJustified(vform, i);
    971     uint64_t ub = src2.UintLeftJustified(vform, i);
    972     uint64_t ur = ua - ub;
    973     if (ub > ua) {
    974       dst.SetUnsignedSat(i, false);
    975     }
    976 
    977     // Test for signed saturation.
    978     bool pos_a = (ua >> 63) == 0;
    979     bool pos_b = (ub >> 63) == 0;
    980     bool pos_r = (ur >> 63) == 0;
    981     // If the signs of the operands are different, and the sign of the first
    982     // operand doesn't match the result, there was an overflow.
    983     if ((pos_a != pos_b) && (pos_a != pos_r)) {
    984       dst.SetSignedSat(i, pos_a);
    985     }
    986 
    987     dst.SetInt(vform, i, ur >> (64 - lane_size));
    988   }
    989   return dst;
    990 }
    991 
    992 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
    993                                const LogicVRegister& src1,
    994                                const LogicVRegister& src2) {
    995   dst.ClearForWrite(vform);
    996   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    997     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
    998   }
    999   return dst;
   1000 }
   1001 
   1002 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
   1003                               const LogicVRegister& src1,
   1004                               const LogicVRegister& src2) {
   1005   dst.ClearForWrite(vform);
   1006   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1007     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
   1008   }
   1009   return dst;
   1010 }
   1011 
   1012 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
   1013                               const LogicVRegister& src1,
   1014                               const LogicVRegister& src2) {
   1015   dst.ClearForWrite(vform);
   1016   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1017     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
   1018   }
   1019   return dst;
   1020 }
   1021 
   1022 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
   1023                               const LogicVRegister& src1,
   1024                               const LogicVRegister& src2) {
   1025   dst.ClearForWrite(vform);
   1026   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1027     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
   1028   }
   1029   return dst;
   1030 }
   1031 
   1032 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
   1033                               const LogicVRegister& src1,
   1034                               const LogicVRegister& src2) {
   1035   dst.ClearForWrite(vform);
   1036   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1037     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
   1038   }
   1039   return dst;
   1040 }
   1041 
   1042 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
   1043                               const LogicVRegister& src, uint64_t imm) {
   1044   uint64_t result[16];
   1045   int laneCount = LaneCountFromFormat(vform);
   1046   for (int i = 0; i < laneCount; ++i) {
   1047     result[i] = src.Uint(vform, i) & ~imm;
   1048   }
   1049   dst.SetUintArray(vform, result);
   1050   return dst;
   1051 }
   1052 
   1053 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
   1054                               const LogicVRegister& src1,
   1055                               const LogicVRegister& src2) {
   1056   dst.ClearForWrite(vform);
   1057   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1058     uint64_t operand1 = dst.Uint(vform, i);
   1059     uint64_t operand2 = ~src2.Uint(vform, i);
   1060     uint64_t operand3 = src1.Uint(vform, i);
   1061     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1062     dst.SetUint(vform, i, result);
   1063   }
   1064   return dst;
   1065 }
   1066 
   1067 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
   1068                               const LogicVRegister& src1,
   1069                               const LogicVRegister& src2) {
   1070   dst.ClearForWrite(vform);
   1071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1072     uint64_t operand1 = dst.Uint(vform, i);
   1073     uint64_t operand2 = src2.Uint(vform, i);
   1074     uint64_t operand3 = src1.Uint(vform, i);
   1075     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1076     dst.SetUint(vform, i, result);
   1077   }
   1078   return dst;
   1079 }
   1080 
   1081 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
   1082                               const LogicVRegister& src1,
   1083                               const LogicVRegister& src2) {
   1084   dst.ClearForWrite(vform);
   1085   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1086     uint64_t operand1 = src2.Uint(vform, i);
   1087     uint64_t operand2 = dst.Uint(vform, i);
   1088     uint64_t operand3 = src1.Uint(vform, i);
   1089     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1090     dst.SetUint(vform, i, result);
   1091   }
   1092   return dst;
   1093 }
   1094 
   1095 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
   1096                                   const LogicVRegister& src1,
   1097                                   const LogicVRegister& src2, bool max) {
   1098   dst.ClearForWrite(vform);
   1099   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1100     int64_t src1_val = src1.Int(vform, i);
   1101     int64_t src2_val = src2.Int(vform, i);
   1102     int64_t dst_val;
   1103     if (max) {
   1104       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1105     } else {
   1106       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1107     }
   1108     dst.SetInt(vform, i, dst_val);
   1109   }
   1110   return dst;
   1111 }
   1112 
   1113 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
   1114                                const LogicVRegister& src1,
   1115                                const LogicVRegister& src2) {
   1116   return SMinMax(vform, dst, src1, src2, true);
   1117 }
   1118 
   1119 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
   1120                                const LogicVRegister& src1,
   1121                                const LogicVRegister& src2) {
   1122   return SMinMax(vform, dst, src1, src2, false);
   1123 }
   1124 
   1125 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
   1126                                    const LogicVRegister& src1,
   1127                                    const LogicVRegister& src2, bool max) {
   1128   int lanes = LaneCountFromFormat(vform);
   1129   int64_t result[kMaxLanesPerVector];
   1130   const LogicVRegister* src = &src1;
   1131   for (int j = 0; j < 2; j++) {
   1132     for (int i = 0; i < lanes; i += 2) {
   1133       int64_t first_val = src->Int(vform, i);
   1134       int64_t second_val = src->Int(vform, i + 1);
   1135       int64_t dst_val;
   1136       if (max) {
   1137         dst_val = (first_val > second_val) ? first_val : second_val;
   1138       } else {
   1139         dst_val = (first_val < second_val) ? first_val : second_val;
   1140       }
   1141       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
   1142       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1143     }
   1144     src = &src2;
   1145   }
   1146   dst.SetIntArray(vform, result);
   1147   return dst;
   1148 }
   1149 
   1150 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
   1151                                 const LogicVRegister& src1,
   1152                                 const LogicVRegister& src2) {
   1153   return SMinMaxP(vform, dst, src1, src2, true);
   1154 }
   1155 
   1156 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
   1157                                 const LogicVRegister& src1,
   1158                                 const LogicVRegister& src2) {
   1159   return SMinMaxP(vform, dst, src1, src2, false);
   1160 }
   1161 
   1162 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
   1163                                const LogicVRegister& src) {
   1164   DCHECK_EQ(vform, kFormatD);
   1165 
   1166   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
   1167   dst.ClearForWrite(vform);
   1168   dst.SetUint(vform, 0, dst_val);
   1169   return dst;
   1170 }
   1171 
   1172 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
   1173                                const LogicVRegister& src) {
   1174   VectorFormat vform_dst =
   1175       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
   1176 
   1177   int64_t dst_val = 0;
   1178   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1179     dst_val += src.Int(vform, i);
   1180   }
   1181 
   1182   dst.ClearForWrite(vform_dst);
   1183   dst.SetInt(vform_dst, 0, dst_val);
   1184   return dst;
   1185 }
   1186 
   1187 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
   1188                                  const LogicVRegister& src) {
   1189   VectorFormat vform_dst =
   1190       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1191 
   1192   int64_t dst_val = 0;
   1193   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1194     dst_val += src.Int(vform, i);
   1195   }
   1196 
   1197   dst.ClearForWrite(vform_dst);
   1198   dst.SetInt(vform_dst, 0, dst_val);
   1199   return dst;
   1200 }
   1201 
   1202 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
   1203                                  const LogicVRegister& src) {
   1204   VectorFormat vform_dst =
   1205       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1206 
   1207   uint64_t dst_val = 0;
   1208   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1209     dst_val += src.Uint(vform, i);
   1210   }
   1211 
   1212   dst.ClearForWrite(vform_dst);
   1213   dst.SetUint(vform_dst, 0, dst_val);
   1214   return dst;
   1215 }
   1216 
   1217 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
   1218                                    const LogicVRegister& src, bool max) {
   1219   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   1220   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1221     int64_t src_val = src.Int(vform, i);
   1222     if (max) {
   1223       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1224     } else {
   1225       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1226     }
   1227   }
   1228   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1229   dst.SetInt(vform, 0, dst_val);
   1230   return dst;
   1231 }
   1232 
   1233 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
   1234                                 const LogicVRegister& src) {
   1235   SMinMaxV(vform, dst, src, true);
   1236   return dst;
   1237 }
   1238 
   1239 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
   1240                                 const LogicVRegister& src) {
   1241   SMinMaxV(vform, dst, src, false);
   1242   return dst;
   1243 }
   1244 
   1245 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
   1246                                   const LogicVRegister& src1,
   1247                                   const LogicVRegister& src2, bool max) {
   1248   dst.ClearForWrite(vform);
   1249   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1250     uint64_t src1_val = src1.Uint(vform, i);
   1251     uint64_t src2_val = src2.Uint(vform, i);
   1252     uint64_t dst_val;
   1253     if (max) {
   1254       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1255     } else {
   1256       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1257     }
   1258     dst.SetUint(vform, i, dst_val);
   1259   }
   1260   return dst;
   1261 }
   1262 
   1263 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
   1264                                const LogicVRegister& src1,
   1265                                const LogicVRegister& src2) {
   1266   return UMinMax(vform, dst, src1, src2, true);
   1267 }
   1268 
   1269 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
   1270                                const LogicVRegister& src1,
   1271                                const LogicVRegister& src2) {
   1272   return UMinMax(vform, dst, src1, src2, false);
   1273 }
   1274 
   1275 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
   1276                                    const LogicVRegister& src1,
   1277                                    const LogicVRegister& src2, bool max) {
   1278   int lanes = LaneCountFromFormat(vform);
   1279   uint64_t result[kMaxLanesPerVector];
   1280   const LogicVRegister* src = &src1;
   1281   for (int j = 0; j < 2; j++) {
   1282     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1283       uint64_t first_val = src->Uint(vform, i);
   1284       uint64_t second_val = src->Uint(vform, i + 1);
   1285       uint64_t dst_val;
   1286       if (max) {
   1287         dst_val = (first_val > second_val) ? first_val : second_val;
   1288       } else {
   1289         dst_val = (first_val < second_val) ? first_val : second_val;
   1290       }
   1291       DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
   1292       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1293     }
   1294     src = &src2;
   1295   }
   1296   dst.SetUintArray(vform, result);
   1297   return dst;
   1298 }
   1299 
   1300 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
   1301                                 const LogicVRegister& src1,
   1302                                 const LogicVRegister& src2) {
   1303   return UMinMaxP(vform, dst, src1, src2, true);
   1304 }
   1305 
   1306 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
   1307                                 const LogicVRegister& src1,
   1308                                 const LogicVRegister& src2) {
   1309   return UMinMaxP(vform, dst, src1, src2, false);
   1310 }
   1311 
   1312 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
   1313                                    const LogicVRegister& src, bool max) {
   1314   uint64_t dst_val = max ? 0 : UINT64_MAX;
   1315   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1316     uint64_t src_val = src.Uint(vform, i);
   1317     if (max) {
   1318       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1319     } else {
   1320       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1321     }
   1322   }
   1323   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1324   dst.SetUint(vform, 0, dst_val);
   1325   return dst;
   1326 }
   1327 
   1328 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
   1329                                 const LogicVRegister& src) {
   1330   UMinMaxV(vform, dst, src, true);
   1331   return dst;
   1332 }
   1333 
   1334 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
   1335                                 const LogicVRegister& src) {
   1336   UMinMaxV(vform, dst, src, false);
   1337   return dst;
   1338 }
   1339 
   1340 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
   1341                               const LogicVRegister& src, int shift) {
   1342   DCHECK_GE(shift, 0);
   1343   SimVRegister temp;
   1344   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1345   return ushl(vform, dst, src, shiftreg);
   1346 }
   1347 
   1348 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
   1349                                 const LogicVRegister& src, int shift) {
   1350   DCHECK_GE(shift, 0);
   1351   SimVRegister temp1, temp2;
   1352   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1353   LogicVRegister extendedreg = sxtl(vform, temp2, src);
   1354   return sshl(vform, dst, extendedreg, shiftreg);
   1355 }
   1356 
   1357 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
   1358                                  const LogicVRegister& src, int shift) {
   1359   DCHECK_GE(shift, 0);
   1360   SimVRegister temp1, temp2;
   1361   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1362   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
   1363   return sshl(vform, dst, extendedreg, shiftreg);
   1364 }
   1365 
   1366 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
   1367                                const LogicVRegister& src) {
   1368   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1369   return sshll(vform, dst, src, shift);
   1370 }
   1371 
   1372 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
   1373                                 const LogicVRegister& src) {
   1374   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1375   return sshll2(vform, dst, src, shift);
   1376 }
   1377 
   1378 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
   1379                                 const LogicVRegister& src, int shift) {
   1380   DCHECK_GE(shift, 0);
   1381   SimVRegister temp1, temp2;
   1382   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1383   LogicVRegister extendedreg = uxtl(vform, temp2, src);
   1384   return ushl(vform, dst, extendedreg, shiftreg);
   1385 }
   1386 
   1387 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
   1388                                  const LogicVRegister& src, int shift) {
   1389   DCHECK_GE(shift, 0);
   1390   SimVRegister temp1, temp2;
   1391   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1392   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
   1393   return ushl(vform, dst, extendedreg, shiftreg);
   1394 }
   1395 
   1396 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
   1397                               const LogicVRegister& src, int shift) {
   1398   dst.ClearForWrite(vform);
   1399   int laneCount = LaneCountFromFormat(vform);
   1400   for (int i = 0; i < laneCount; i++) {
   1401     uint64_t src_lane = src.Uint(vform, i);
   1402     uint64_t dst_lane = dst.Uint(vform, i);
   1403     uint64_t shifted = src_lane << shift;
   1404     uint64_t mask = MaxUintFromFormat(vform) << shift;
   1405     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1406   }
   1407   return dst;
   1408 }
   1409 
   1410 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
   1411                                 const LogicVRegister& src, int shift) {
   1412   DCHECK_GE(shift, 0);
   1413   SimVRegister temp;
   1414   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1415   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
   1416 }
   1417 
   1418 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
   1419                                 const LogicVRegister& src, int shift) {
   1420   DCHECK_GE(shift, 0);
   1421   SimVRegister temp;
   1422   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1423   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1424 }
   1425 
   1426 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
   1427                                  const LogicVRegister& src, int shift) {
   1428   DCHECK_GE(shift, 0);
   1429   SimVRegister temp;
   1430   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1431   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1432 }
   1433 
   1434 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
   1435                               const LogicVRegister& src, int shift) {
   1436   dst.ClearForWrite(vform);
   1437   int laneCount = LaneCountFromFormat(vform);
   1438   DCHECK((shift > 0) &&
   1439          (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
   1440   for (int i = 0; i < laneCount; i++) {
   1441     uint64_t src_lane = src.Uint(vform, i);
   1442     uint64_t dst_lane = dst.Uint(vform, i);
   1443     uint64_t shifted;
   1444     uint64_t mask;
   1445     if (shift == 64) {
   1446       shifted = 0;
   1447       mask = 0;
   1448     } else {
   1449       shifted = src_lane >> shift;
   1450       mask = MaxUintFromFormat(vform) >> shift;
   1451     }
   1452     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1453   }
   1454   return dst;
   1455 }
   1456 
   1457 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
   1458                                const LogicVRegister& src, int shift) {
   1459   DCHECK_GE(shift, 0);
   1460   SimVRegister temp;
   1461   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1462   return ushl(vform, dst, src, shiftreg);
   1463 }
   1464 
   1465 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
   1466                                const LogicVRegister& src, int shift) {
   1467   DCHECK_GE(shift, 0);
   1468   SimVRegister temp;
   1469   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1470   return sshl(vform, dst, src, shiftreg);
   1471 }
   1472 
   1473 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
   1474                                const LogicVRegister& src, int shift) {
   1475   SimVRegister temp;
   1476   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
   1477   return add(vform, dst, dst, shifted_reg);
   1478 }
   1479 
   1480 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
   1481                                const LogicVRegister& src, int shift) {
   1482   SimVRegister temp;
   1483   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
   1484   return add(vform, dst, dst, shifted_reg);
   1485 }
   1486 
   1487 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
   1488                                 const LogicVRegister& src, int shift) {
   1489   SimVRegister temp;
   1490   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
   1491   return add(vform, dst, dst, shifted_reg);
   1492 }
   1493 
   1494 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
   1495                                 const LogicVRegister& src, int shift) {
   1496   SimVRegister temp;
   1497   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
   1498   return add(vform, dst, dst, shifted_reg);
   1499 }
   1500 
   1501 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
   1502                               const LogicVRegister& src) {
   1503   uint64_t result[16];
   1504   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1505   int laneCount = LaneCountFromFormat(vform);
   1506   for (int i = 0; i < laneCount; i++) {
   1507     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
   1508   }
   1509 
   1510   dst.SetUintArray(vform, result);
   1511   return dst;
   1512 }
   1513 
   1514 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
   1515                               const LogicVRegister& src) {
   1516   uint64_t result[16];
   1517   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1518   int laneCount = LaneCountFromFormat(vform);
   1519   for (int i = 0; i < laneCount; i++) {
   1520     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
   1521   }
   1522 
   1523   dst.SetUintArray(vform, result);
   1524   return dst;
   1525 }
   1526 
   1527 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
   1528                               const LogicVRegister& src) {
   1529   uint64_t result[16];
   1530   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1531   int laneCount = LaneCountFromFormat(vform);
   1532   for (int i = 0; i < laneCount; i++) {
   1533     uint64_t value = src.Uint(vform, i);
   1534     result[i] = 0;
   1535     for (int j = 0; j < laneSizeInBits; j++) {
   1536       result[i] += (value & 1);
   1537       value >>= 1;
   1538     }
   1539   }
   1540 
   1541   dst.SetUintArray(vform, result);
   1542   return dst;
   1543 }
   1544 
   1545 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
   1546                                const LogicVRegister& src1,
   1547                                const LogicVRegister& src2) {
   1548   dst.ClearForWrite(vform);
   1549   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1550     int8_t shift_val = src2.Int(vform, i);
   1551     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
   1552 
   1553     // Set signed saturation state.
   1554     if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
   1555         (lj_src_val != 0)) {
   1556       dst.SetSignedSat(i, lj_src_val >= 0);
   1557     }
   1558 
   1559     // Set unsigned saturation state.
   1560     if (lj_src_val < 0) {
   1561       dst.SetUnsignedSat(i, false);
   1562     } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
   1563                (lj_src_val != 0)) {
   1564       dst.SetUnsignedSat(i, true);
   1565     }
   1566 
   1567     int64_t src_val = src1.Int(vform, i);
   1568     bool src_is_negative = src_val < 0;
   1569     if (shift_val > 63) {
   1570       dst.SetInt(vform, i, 0);
   1571     } else if (shift_val < -63) {
   1572       dst.SetRounding(i, src_is_negative);
   1573       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
   1574     } else {
   1575       // Use unsigned types for shifts, as behaviour is undefined for signed
   1576       // lhs.
   1577       uint64_t usrc_val = static_cast<uint64_t>(src_val);
   1578 
   1579       if (shift_val < 0) {
   1580         // Convert to right shift.
   1581         shift_val = -shift_val;
   1582 
   1583         // Set rounding state by testing most-significant bit shifted out.
   1584         // Rounding only needed on right shifts.
   1585         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
   1586           dst.SetRounding(i, true);
   1587         }
   1588 
   1589         usrc_val >>= shift_val;
   1590 
   1591         if (src_is_negative) {
   1592           // Simulate sign-extension.
   1593           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
   1594         }
   1595       } else {
   1596         usrc_val <<= shift_val;
   1597       }
   1598       dst.SetUint(vform, i, usrc_val);
   1599     }
   1600   }
   1601   return dst;
   1602 }
   1603 
   1604 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
   1605                                const LogicVRegister& src1,
   1606                                const LogicVRegister& src2) {
   1607   dst.ClearForWrite(vform);
   1608   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1609     int8_t shift_val = src2.Int(vform, i);
   1610     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
   1611 
   1612     // Set saturation state.
   1613     if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
   1614       dst.SetUnsignedSat(i, true);
   1615     }
   1616 
   1617     uint64_t src_val = src1.Uint(vform, i);
   1618     if ((shift_val > 63) || (shift_val < -64)) {
   1619       dst.SetUint(vform, i, 0);
   1620     } else {
   1621       if (shift_val < 0) {
   1622         // Set rounding state. Rounding only needed on right shifts.
   1623         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1624           dst.SetRounding(i, true);
   1625         }
   1626 
   1627         if (shift_val == -64) {
   1628           src_val = 0;
   1629         } else {
   1630           src_val >>= -shift_val;
   1631         }
   1632       } else {
   1633         src_val <<= shift_val;
   1634       }
   1635       dst.SetUint(vform, i, src_val);
   1636     }
   1637   }
   1638   return dst;
   1639 }
   1640 
   1641 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
   1642                               const LogicVRegister& src) {
   1643   dst.ClearForWrite(vform);
   1644   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1645     // Test for signed saturation.
   1646     int64_t sa = src.Int(vform, i);
   1647     if (sa == MinIntFromFormat(vform)) {
   1648       dst.SetSignedSat(i, true);
   1649     }
   1650     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   1651   }
   1652   return dst;
   1653 }
   1654 
   1655 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
   1656                                  const LogicVRegister& src) {
   1657   dst.ClearForWrite(vform);
   1658   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1659     int64_t sa = dst.IntLeftJustified(vform, i);
   1660     uint64_t ub = src.UintLeftJustified(vform, i);
   1661     uint64_t ur = sa + ub;
   1662 
   1663     int64_t sr = bit_cast<int64_t>(ur);
   1664     if (sr < sa) {  // Test for signed positive saturation.
   1665       dst.SetInt(vform, i, MaxIntFromFormat(vform));
   1666     } else {
   1667       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
   1668     }
   1669   }
   1670   return dst;
   1671 }
   1672 
   1673 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
   1674                                  const LogicVRegister& src) {
   1675   dst.ClearForWrite(vform);
   1676   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1677     uint64_t ua = dst.UintLeftJustified(vform, i);
   1678     int64_t sb = src.IntLeftJustified(vform, i);
   1679     uint64_t ur = ua + sb;
   1680 
   1681     if ((sb > 0) && (ur <= ua)) {
   1682       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
   1683     } else if ((sb < 0) && (ur >= ua)) {
   1684       dst.SetUint(vform, i, 0);  // Negative saturation.
   1685     } else {
   1686       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
   1687     }
   1688   }
   1689   return dst;
   1690 }
   1691 
   1692 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
   1693                               const LogicVRegister& src) {
   1694   dst.ClearForWrite(vform);
   1695   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1696     // Test for signed saturation.
   1697     int64_t sa = src.Int(vform, i);
   1698     if (sa == MinIntFromFormat(vform)) {
   1699       dst.SetSignedSat(i, true);
   1700     }
   1701     if (sa < 0) {
   1702       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   1703     } else {
   1704       dst.SetInt(vform, i, sa);
   1705     }
   1706   }
   1707   return dst;
   1708 }
   1709 
   1710 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
   1711                                         LogicVRegister dst, bool dstIsSigned,
   1712                                         const LogicVRegister& src,
   1713                                         bool srcIsSigned) {
   1714   bool upperhalf = false;
   1715   VectorFormat srcform = kFormatUndefined;
   1716   int64_t ssrc[8];
   1717   uint64_t usrc[8];
   1718 
   1719   switch (dstform) {
   1720     case kFormat8B:
   1721       upperhalf = false;
   1722       srcform = kFormat8H;
   1723       break;
   1724     case kFormat16B:
   1725       upperhalf = true;
   1726       srcform = kFormat8H;
   1727       break;
   1728     case kFormat4H:
   1729       upperhalf = false;
   1730       srcform = kFormat4S;
   1731       break;
   1732     case kFormat8H:
   1733       upperhalf = true;
   1734       srcform = kFormat4S;
   1735       break;
   1736     case kFormat2S:
   1737       upperhalf = false;
   1738       srcform = kFormat2D;
   1739       break;
   1740     case kFormat4S:
   1741       upperhalf = true;
   1742       srcform = kFormat2D;
   1743       break;
   1744     case kFormatB:
   1745       upperhalf = false;
   1746       srcform = kFormatH;
   1747       break;
   1748     case kFormatH:
   1749       upperhalf = false;
   1750       srcform = kFormatS;
   1751       break;
   1752     case kFormatS:
   1753       upperhalf = false;
   1754       srcform = kFormatD;
   1755       break;
   1756     default:
   1757       UNIMPLEMENTED();
   1758   }
   1759 
   1760   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   1761     ssrc[i] = src.Int(srcform, i);
   1762     usrc[i] = src.Uint(srcform, i);
   1763   }
   1764 
   1765   int offset;
   1766   if (upperhalf) {
   1767     offset = LaneCountFromFormat(dstform) / 2;
   1768   } else {
   1769     offset = 0;
   1770     dst.ClearForWrite(dstform);
   1771   }
   1772 
   1773   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   1774     // Test for signed saturation
   1775     if (ssrc[i] > MaxIntFromFormat(dstform)) {
   1776       dst.SetSignedSat(offset + i, true);
   1777     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
   1778       dst.SetSignedSat(offset + i, false);
   1779     }
   1780 
   1781     // Test for unsigned saturation
   1782     if (srcIsSigned) {
   1783       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
   1784         dst.SetUnsignedSat(offset + i, true);
   1785       } else if (ssrc[i] < 0) {
   1786         dst.SetUnsignedSat(offset + i, false);
   1787       }
   1788     } else {
   1789       if (usrc[i] > MaxUintFromFormat(dstform)) {
   1790         dst.SetUnsignedSat(offset + i, true);
   1791       }
   1792     }
   1793 
   1794     int64_t result;
   1795     if (srcIsSigned) {
   1796       result = ssrc[i] & MaxUintFromFormat(dstform);
   1797     } else {
   1798       result = usrc[i] & MaxUintFromFormat(dstform);
   1799     }
   1800 
   1801     if (dstIsSigned) {
   1802       dst.SetInt(dstform, offset + i, result);
   1803     } else {
   1804       dst.SetUint(dstform, offset + i, result);
   1805     }
   1806   }
   1807   return dst;
   1808 }
   1809 
   1810 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
   1811                               const LogicVRegister& src) {
   1812   return ExtractNarrow(vform, dst, true, src, true);
   1813 }
   1814 
   1815 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
   1816                                 const LogicVRegister& src) {
   1817   return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
   1818 }
   1819 
   1820 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
   1821                                  const LogicVRegister& src) {
   1822   return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
   1823 }
   1824 
   1825 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
   1826                                 const LogicVRegister& src) {
   1827   return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
   1828 }
   1829 
   1830 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
   1831                                   const LogicVRegister& src1,
   1832                                   const LogicVRegister& src2, bool issigned) {
   1833   dst.ClearForWrite(vform);
   1834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1835     if (issigned) {
   1836       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
   1837       sr = sr > 0 ? sr : -sr;
   1838       dst.SetInt(vform, i, sr);
   1839     } else {
   1840       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
   1841       sr = sr > 0 ? sr : -sr;
   1842       dst.SetUint(vform, i, sr);
   1843     }
   1844   }
   1845   return dst;
   1846 }
   1847 
   1848 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
   1849                                const LogicVRegister& src1,
   1850                                const LogicVRegister& src2) {
   1851   SimVRegister temp;
   1852   dst.ClearForWrite(vform);
   1853   AbsDiff(vform, temp, src1, src2, true);
   1854   add(vform, dst, dst, temp);
   1855   return dst;
   1856 }
   1857 
   1858 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
   1859                                const LogicVRegister& src1,
   1860                                const LogicVRegister& src2) {
   1861   SimVRegister temp;
   1862   dst.ClearForWrite(vform);
   1863   AbsDiff(vform, temp, src1, src2, false);
   1864   add(vform, dst, dst, temp);
   1865   return dst;
   1866 }
   1867 
   1868 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
   1869                                const LogicVRegister& src) {
   1870   dst.ClearForWrite(vform);
   1871   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1872     dst.SetUint(vform, i, ~src.Uint(vform, i));
   1873   }
   1874   return dst;
   1875 }
   1876 
   1877 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
   1878                                const LogicVRegister& src) {
   1879   uint64_t result[16];
   1880   int laneCount = LaneCountFromFormat(vform);
   1881   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1882   uint64_t reversed_value;
   1883   uint64_t value;
   1884   for (int i = 0; i < laneCount; i++) {
   1885     value = src.Uint(vform, i);
   1886     reversed_value = 0;
   1887     for (int j = 0; j < laneSizeInBits; j++) {
   1888       reversed_value = (reversed_value << 1) | (value & 1);
   1889       value >>= 1;
   1890     }
   1891     result[i] = reversed_value;
   1892   }
   1893 
   1894   dst.SetUintArray(vform, result);
   1895   return dst;
   1896 }
   1897 
   1898 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
   1899                               const LogicVRegister& src, int revSize) {
   1900   uint64_t result[16];
   1901   int laneCount = LaneCountFromFormat(vform);
   1902   int laneSize = LaneSizeInBytesFromFormat(vform);
   1903   int lanesPerLoop = revSize / laneSize;
   1904   for (int i = 0; i < laneCount; i += lanesPerLoop) {
   1905     for (int j = 0; j < lanesPerLoop; j++) {
   1906       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
   1907     }
   1908   }
   1909   dst.SetUintArray(vform, result);
   1910   return dst;
   1911 }
   1912 
   1913 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
   1914                                 const LogicVRegister& src) {
   1915   return rev(vform, dst, src, 2);
   1916 }
   1917 
   1918 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
   1919                                 const LogicVRegister& src) {
   1920   return rev(vform, dst, src, 4);
   1921 }
   1922 
   1923 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
   1924                                 const LogicVRegister& src) {
   1925   return rev(vform, dst, src, 8);
   1926 }
   1927 
   1928 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
   1929                                 const LogicVRegister& src, bool is_signed,
   1930                                 bool do_accumulate) {
   1931   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
   1932   DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
   1933   DCHECK_LE(LaneCountFromFormat(vform), 8);
   1934 
   1935   uint64_t result[8];
   1936   int lane_count = LaneCountFromFormat(vform);
   1937   for (int i = 0; i < lane_count; i++) {
   1938     if (is_signed) {
   1939       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
   1940                                         src.Int(vformsrc, 2 * i + 1));
   1941     } else {
   1942       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
   1943     }
   1944   }
   1945 
   1946   dst.ClearForWrite(vform);
   1947   for (int i = 0; i < lane_count; ++i) {
   1948     if (do_accumulate) {
   1949       result[i] += dst.Uint(vform, i);
   1950     }
   1951     dst.SetUint(vform, i, result[i]);
   1952   }
   1953 
   1954   return dst;
   1955 }
   1956 
   1957 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
   1958                                  const LogicVRegister& src) {
   1959   return addlp(vform, dst, src, true, false);
   1960 }
   1961 
   1962 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
   1963                                  const LogicVRegister& src) {
   1964   return addlp(vform, dst, src, false, false);
   1965 }
   1966 
   1967 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
   1968                                  const LogicVRegister& src) {
   1969   return addlp(vform, dst, src, true, true);
   1970 }
   1971 
   1972 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
   1973                                  const LogicVRegister& src) {
   1974   return addlp(vform, dst, src, false, true);
   1975 }
   1976 
   1977 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
   1978                               const LogicVRegister& src1,
   1979                               const LogicVRegister& src2, int index) {
   1980   uint8_t result[16];
   1981   int laneCount = LaneCountFromFormat(vform);
   1982   for (int i = 0; i < laneCount - index; ++i) {
   1983     result[i] = src1.Uint(vform, i + index);
   1984   }
   1985   for (int i = 0; i < index; ++i) {
   1986     result[laneCount - index + i] = src2.Uint(vform, i);
   1987   }
   1988   dst.ClearForWrite(vform);
   1989   for (int i = 0; i < laneCount; ++i) {
   1990     dst.SetUint(vform, i, result[i]);
   1991   }
   1992   return dst;
   1993 }
   1994 
   1995 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
   1996                                       const LogicVRegister& src,
   1997                                       int src_index) {
   1998   int laneCount = LaneCountFromFormat(vform);
   1999   uint64_t value = src.Uint(vform, src_index);
   2000   dst.ClearForWrite(vform);
   2001   for (int i = 0; i < laneCount; ++i) {
   2002     dst.SetUint(vform, i, value);
   2003   }
   2004   return dst;
   2005 }
   2006 
   2007 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
   2008                                         uint64_t imm) {
   2009   int laneCount = LaneCountFromFormat(vform);
   2010   uint64_t value = imm & MaxUintFromFormat(vform);
   2011   dst.ClearForWrite(vform);
   2012   for (int i = 0; i < laneCount; ++i) {
   2013     dst.SetUint(vform, i, value);
   2014   }
   2015   return dst;
   2016 }
   2017 
   2018 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
   2019                                       int dst_index, const LogicVRegister& src,
   2020                                       int src_index) {
   2021   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
   2022   return dst;
   2023 }
   2024 
   2025 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
   2026                                         int dst_index, uint64_t imm) {
   2027   uint64_t value = imm & MaxUintFromFormat(vform);
   2028   dst.SetUint(vform, dst_index, value);
   2029   return dst;
   2030 }
   2031 
   2032 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
   2033                                uint64_t imm) {
   2034   int laneCount = LaneCountFromFormat(vform);
   2035   dst.ClearForWrite(vform);
   2036   for (int i = 0; i < laneCount; ++i) {
   2037     dst.SetUint(vform, i, imm);
   2038   }
   2039   return dst;
   2040 }
   2041 
   2042 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
   2043                                uint64_t imm) {
   2044   int laneCount = LaneCountFromFormat(vform);
   2045   dst.ClearForWrite(vform);
   2046   for (int i = 0; i < laneCount; ++i) {
   2047     dst.SetUint(vform, i, ~imm);
   2048   }
   2049   return dst;
   2050 }
   2051 
   2052 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
   2053                               const LogicVRegister& src, uint64_t imm) {
   2054   uint64_t result[16];
   2055   int laneCount = LaneCountFromFormat(vform);
   2056   for (int i = 0; i < laneCount; ++i) {
   2057     result[i] = src.Uint(vform, i) | imm;
   2058   }
   2059   dst.SetUintArray(vform, result);
   2060   return dst;
   2061 }
   2062 
   2063 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
   2064                                const LogicVRegister& src) {
   2065   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2066 
   2067   dst.ClearForWrite(vform);
   2068   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2069     dst.SetUint(vform, i, src.Uint(vform_half, i));
   2070   }
   2071   return dst;
   2072 }
   2073 
   2074 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
   2075                                const LogicVRegister& src) {
   2076   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2077 
   2078   dst.ClearForWrite(vform);
   2079   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2080     dst.SetInt(vform, i, src.Int(vform_half, i));
   2081   }
   2082   return dst;
   2083 }
   2084 
   2085 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
   2086                                 const LogicVRegister& src) {
   2087   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2088   int lane_count = LaneCountFromFormat(vform);
   2089 
   2090   dst.ClearForWrite(vform);
   2091   for (int i = 0; i < lane_count; i++) {
   2092     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
   2093   }
   2094   return dst;
   2095 }
   2096 
   2097 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
   2098                                 const LogicVRegister& src) {
   2099   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2100   int lane_count = LaneCountFromFormat(vform);
   2101 
   2102   dst.ClearForWrite(vform);
   2103   for (int i = 0; i < lane_count; i++) {
   2104     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
   2105   }
   2106   return dst;
   2107 }
   2108 
   2109 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
   2110                                const LogicVRegister& src, int shift) {
   2111   SimVRegister temp;
   2112   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
   2113   VectorFormat vform_dst = vform;
   2114   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
   2115   return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
   2116 }
   2117 
   2118 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
   2119                                 const LogicVRegister& src, int shift) {
   2120   SimVRegister temp;
   2121   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2122   VectorFormat vformdst = vform;
   2123   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
   2124   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
   2125 }
   2126 
   2127 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
   2128                                 const LogicVRegister& src, int shift) {
   2129   SimVRegister temp;
   2130   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2131   VectorFormat vformdst = vform;
   2132   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2133   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
   2134 }
   2135 
   2136 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
   2137                                  const LogicVRegister& src, int shift) {
   2138   SimVRegister temp;
   2139   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2140   VectorFormat vformdst = vform;
   2141   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2142   return ExtractNarrow(vformdst, dst, false, shifted_src, false);
   2143 }
   2144 
   2145 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
   2146                                 const LogicVRegister& ind,
   2147                                 bool zero_out_of_bounds,
   2148                                 const LogicVRegister* tab1,
   2149                                 const LogicVRegister* tab2,
   2150                                 const LogicVRegister* tab3,
   2151                                 const LogicVRegister* tab4) {
   2152   DCHECK_NOT_NULL(tab1);
   2153   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
   2154   uint64_t result[kMaxLanesPerVector];
   2155   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2156     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
   2157   }
   2158   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2159     uint64_t j = ind.Uint(vform, i);
   2160     int tab_idx = static_cast<int>(j >> 4);
   2161     int j_idx = static_cast<int>(j & 15);
   2162     if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
   2163       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
   2164     }
   2165   }
   2166   dst.SetUintArray(vform, result);
   2167   return dst;
   2168 }
   2169 
   2170 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
   2171                               const LogicVRegister& tab,
   2172                               const LogicVRegister& ind) {
   2173   return Table(vform, dst, ind, true, &tab);
   2174 }
   2175 
   2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
   2177                               const LogicVRegister& tab,
   2178                               const LogicVRegister& tab2,
   2179                               const LogicVRegister& ind) {
   2180   return Table(vform, dst, ind, true, &tab, &tab2);
   2181 }
   2182 
   2183 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
   2184                               const LogicVRegister& tab,
   2185                               const LogicVRegister& tab2,
   2186                               const LogicVRegister& tab3,
   2187                               const LogicVRegister& ind) {
   2188   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
   2189 }
   2190 
   2191 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
   2192                               const LogicVRegister& tab,
   2193                               const LogicVRegister& tab2,
   2194                               const LogicVRegister& tab3,
   2195                               const LogicVRegister& tab4,
   2196                               const LogicVRegister& ind) {
   2197   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
   2198 }
   2199 
   2200 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
   2201                               const LogicVRegister& tab,
   2202                               const LogicVRegister& ind) {
   2203   return Table(vform, dst, ind, false, &tab);
   2204 }
   2205 
   2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
   2207                               const LogicVRegister& tab,
   2208                               const LogicVRegister& tab2,
   2209                               const LogicVRegister& ind) {
   2210   return Table(vform, dst, ind, false, &tab, &tab2);
   2211 }
   2212 
   2213 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
   2214                               const LogicVRegister& tab,
   2215                               const LogicVRegister& tab2,
   2216                               const LogicVRegister& tab3,
   2217                               const LogicVRegister& ind) {
   2218   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
   2219 }
   2220 
   2221 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
   2222                               const LogicVRegister& tab,
   2223                               const LogicVRegister& tab2,
   2224                               const LogicVRegister& tab3,
   2225                               const LogicVRegister& tab4,
   2226                               const LogicVRegister& ind) {
   2227   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
   2228 }
   2229 
   2230 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
   2231                                  const LogicVRegister& src, int shift) {
   2232   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2233 }
   2234 
   2235 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
   2236                                   const LogicVRegister& src, int shift) {
   2237   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2238 }
   2239 
   2240 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
   2241                                   const LogicVRegister& src, int shift) {
   2242   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2243 }
   2244 
   2245 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
   2246                                    const LogicVRegister& src, int shift) {
   2247   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2248 }
   2249 
   2250 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
   2251                                  const LogicVRegister& src, int shift) {
   2252   SimVRegister temp;
   2253   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2254   VectorFormat vformdst = vform;
   2255   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2256   return sqxtn(vformdst, dst, shifted_src);
   2257 }
   2258 
   2259 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
   2260                                   const LogicVRegister& src, int shift) {
   2261   SimVRegister temp;
   2262   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2263   VectorFormat vformdst = vform;
   2264   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2265   return sqxtn(vformdst, dst, shifted_src);
   2266 }
   2267 
   2268 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
   2269                                   const LogicVRegister& src, int shift) {
   2270   SimVRegister temp;
   2271   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2272   VectorFormat vformdst = vform;
   2273   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2274   return sqxtn(vformdst, dst, shifted_src);
   2275 }
   2276 
   2277 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
   2278                                    const LogicVRegister& src, int shift) {
   2279   SimVRegister temp;
   2280   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2281   VectorFormat vformdst = vform;
   2282   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2283   return sqxtn(vformdst, dst, shifted_src);
   2284 }
   2285 
   2286 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
   2287                                   const LogicVRegister& src, int shift) {
   2288   SimVRegister temp;
   2289   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2290   VectorFormat vformdst = vform;
   2291   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2292   return sqxtun(vformdst, dst, shifted_src);
   2293 }
   2294 
   2295 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
   2296                                    const LogicVRegister& src, int shift) {
   2297   SimVRegister temp;
   2298   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2299   VectorFormat vformdst = vform;
   2300   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2301   return sqxtun(vformdst, dst, shifted_src);
   2302 }
   2303 
   2304 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
   2305                                    const LogicVRegister& src, int shift) {
   2306   SimVRegister temp;
   2307   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2308   VectorFormat vformdst = vform;
   2309   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2310   return sqxtun(vformdst, dst, shifted_src);
   2311 }
   2312 
   2313 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
   2314                                     const LogicVRegister& src, int shift) {
   2315   SimVRegister temp;
   2316   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2317   VectorFormat vformdst = vform;
   2318   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2319   return sqxtun(vformdst, dst, shifted_src);
   2320 }
   2321 
   2322 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
   2323                                 const LogicVRegister& src1,
   2324                                 const LogicVRegister& src2) {
   2325   SimVRegister temp1, temp2;
   2326   uxtl(vform, temp1, src1);
   2327   uxtl(vform, temp2, src2);
   2328   add(vform, dst, temp1, temp2);
   2329   return dst;
   2330 }
   2331 
   2332 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
   2333                                  const LogicVRegister& src1,
   2334                                  const LogicVRegister& src2) {
   2335   SimVRegister temp1, temp2;
   2336   uxtl2(vform, temp1, src1);
   2337   uxtl2(vform, temp2, src2);
   2338   add(vform, dst, temp1, temp2);
   2339   return dst;
   2340 }
   2341 
   2342 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
   2343                                 const LogicVRegister& src1,
   2344                                 const LogicVRegister& src2) {
   2345   SimVRegister temp;
   2346   uxtl(vform, temp, src2);
   2347   add(vform, dst, src1, temp);
   2348   return dst;
   2349 }
   2350 
   2351 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
   2352                                  const LogicVRegister& src1,
   2353                                  const LogicVRegister& src2) {
   2354   SimVRegister temp;
   2355   uxtl2(vform, temp, src2);
   2356   add(vform, dst, src1, temp);
   2357   return dst;
   2358 }
   2359 
   2360 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
   2361                                 const LogicVRegister& src1,
   2362                                 const LogicVRegister& src2) {
   2363   SimVRegister temp1, temp2;
   2364   sxtl(vform, temp1, src1);
   2365   sxtl(vform, temp2, src2);
   2366   add(vform, dst, temp1, temp2);
   2367   return dst;
   2368 }
   2369 
   2370 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
   2371                                  const LogicVRegister& src1,
   2372                                  const LogicVRegister& src2) {
   2373   SimVRegister temp1, temp2;
   2374   sxtl2(vform, temp1, src1);
   2375   sxtl2(vform, temp2, src2);
   2376   add(vform, dst, temp1, temp2);
   2377   return dst;
   2378 }
   2379 
   2380 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
   2381                                 const LogicVRegister& src1,
   2382                                 const LogicVRegister& src2) {
   2383   SimVRegister temp;
   2384   sxtl(vform, temp, src2);
   2385   add(vform, dst, src1, temp);
   2386   return dst;
   2387 }
   2388 
   2389 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
   2390                                  const LogicVRegister& src1,
   2391                                  const LogicVRegister& src2) {
   2392   SimVRegister temp;
   2393   sxtl2(vform, temp, src2);
   2394   add(vform, dst, src1, temp);
   2395   return dst;
   2396 }
   2397 
   2398 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
   2399                                 const LogicVRegister& src1,
   2400                                 const LogicVRegister& src2) {
   2401   SimVRegister temp1, temp2;
   2402   uxtl(vform, temp1, src1);
   2403   uxtl(vform, temp2, src2);
   2404   sub(vform, dst, temp1, temp2);
   2405   return dst;
   2406 }
   2407 
   2408 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
   2409                                  const LogicVRegister& src1,
   2410                                  const LogicVRegister& src2) {
   2411   SimVRegister temp1, temp2;
   2412   uxtl2(vform, temp1, src1);
   2413   uxtl2(vform, temp2, src2);
   2414   sub(vform, dst, temp1, temp2);
   2415   return dst;
   2416 }
   2417 
   2418 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
   2419                                 const LogicVRegister& src1,
   2420                                 const LogicVRegister& src2) {
   2421   SimVRegister temp;
   2422   uxtl(vform, temp, src2);
   2423   sub(vform, dst, src1, temp);
   2424   return dst;
   2425 }
   2426 
   2427 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
   2428                                  const LogicVRegister& src1,
   2429                                  const LogicVRegister& src2) {
   2430   SimVRegister temp;
   2431   uxtl2(vform, temp, src2);
   2432   sub(vform, dst, src1, temp);
   2433   return dst;
   2434 }
   2435 
   2436 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
   2437                                 const LogicVRegister& src1,
   2438                                 const LogicVRegister& src2) {
   2439   SimVRegister temp1, temp2;
   2440   sxtl(vform, temp1, src1);
   2441   sxtl(vform, temp2, src2);
   2442   sub(vform, dst, temp1, temp2);
   2443   return dst;
   2444 }
   2445 
   2446 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
   2447                                  const LogicVRegister& src1,
   2448                                  const LogicVRegister& src2) {
   2449   SimVRegister temp1, temp2;
   2450   sxtl2(vform, temp1, src1);
   2451   sxtl2(vform, temp2, src2);
   2452   sub(vform, dst, temp1, temp2);
   2453   return dst;
   2454 }
   2455 
   2456 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
   2457                                 const LogicVRegister& src1,
   2458                                 const LogicVRegister& src2) {
   2459   SimVRegister temp;
   2460   sxtl(vform, temp, src2);
   2461   sub(vform, dst, src1, temp);
   2462   return dst;
   2463 }
   2464 
   2465 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
   2466                                  const LogicVRegister& src1,
   2467                                  const LogicVRegister& src2) {
   2468   SimVRegister temp;
   2469   sxtl2(vform, temp, src2);
   2470   sub(vform, dst, src1, temp);
   2471   return dst;
   2472 }
   2473 
   2474 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
   2475                                 const LogicVRegister& src1,
   2476                                 const LogicVRegister& src2) {
   2477   SimVRegister temp1, temp2;
   2478   uxtl(vform, temp1, src1);
   2479   uxtl(vform, temp2, src2);
   2480   uaba(vform, dst, temp1, temp2);
   2481   return dst;
   2482 }
   2483 
   2484 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
   2485                                  const LogicVRegister& src1,
   2486                                  const LogicVRegister& src2) {
   2487   SimVRegister temp1, temp2;
   2488   uxtl2(vform, temp1, src1);
   2489   uxtl2(vform, temp2, src2);
   2490   uaba(vform, dst, temp1, temp2);
   2491   return dst;
   2492 }
   2493 
   2494 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
   2495                                 const LogicVRegister& src1,
   2496                                 const LogicVRegister& src2) {
   2497   SimVRegister temp1, temp2;
   2498   sxtl(vform, temp1, src1);
   2499   sxtl(vform, temp2, src2);
   2500   saba(vform, dst, temp1, temp2);
   2501   return dst;
   2502 }
   2503 
   2504 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
   2505                                  const LogicVRegister& src1,
   2506                                  const LogicVRegister& src2) {
   2507   SimVRegister temp1, temp2;
   2508   sxtl2(vform, temp1, src1);
   2509   sxtl2(vform, temp2, src2);
   2510   saba(vform, dst, temp1, temp2);
   2511   return dst;
   2512 }
   2513 
   2514 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
   2515                                 const LogicVRegister& src1,
   2516                                 const LogicVRegister& src2) {
   2517   SimVRegister temp1, temp2;
   2518   uxtl(vform, temp1, src1);
   2519   uxtl(vform, temp2, src2);
   2520   AbsDiff(vform, dst, temp1, temp2, false);
   2521   return dst;
   2522 }
   2523 
   2524 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
   2525                                  const LogicVRegister& src1,
   2526                                  const LogicVRegister& src2) {
   2527   SimVRegister temp1, temp2;
   2528   uxtl2(vform, temp1, src1);
   2529   uxtl2(vform, temp2, src2);
   2530   AbsDiff(vform, dst, temp1, temp2, false);
   2531   return dst;
   2532 }
   2533 
   2534 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
   2535                                 const LogicVRegister& src1,
   2536                                 const LogicVRegister& src2) {
   2537   SimVRegister temp1, temp2;
   2538   sxtl(vform, temp1, src1);
   2539   sxtl(vform, temp2, src2);
   2540   AbsDiff(vform, dst, temp1, temp2, true);
   2541   return dst;
   2542 }
   2543 
   2544 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
   2545                                  const LogicVRegister& src1,
   2546                                  const LogicVRegister& src2) {
   2547   SimVRegister temp1, temp2;
   2548   sxtl2(vform, temp1, src1);
   2549   sxtl2(vform, temp2, src2);
   2550   AbsDiff(vform, dst, temp1, temp2, true);
   2551   return dst;
   2552 }
   2553 
   2554 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
   2555                                 const LogicVRegister& src1,
   2556                                 const LogicVRegister& src2) {
   2557   SimVRegister temp1, temp2;
   2558   uxtl(vform, temp1, src1);
   2559   uxtl(vform, temp2, src2);
   2560   mul(vform, dst, temp1, temp2);
   2561   return dst;
   2562 }
   2563 
   2564 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
   2565                                  const LogicVRegister& src1,
   2566                                  const LogicVRegister& src2) {
   2567   SimVRegister temp1, temp2;
   2568   uxtl2(vform, temp1, src1);
   2569   uxtl2(vform, temp2, src2);
   2570   mul(vform, dst, temp1, temp2);
   2571   return dst;
   2572 }
   2573 
   2574 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
   2575                                 const LogicVRegister& src1,
   2576                                 const LogicVRegister& src2) {
   2577   SimVRegister temp1, temp2;
   2578   sxtl(vform, temp1, src1);
   2579   sxtl(vform, temp2, src2);
   2580   mul(vform, dst, temp1, temp2);
   2581   return dst;
   2582 }
   2583 
   2584 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
   2585                                  const LogicVRegister& src1,
   2586                                  const LogicVRegister& src2) {
   2587   SimVRegister temp1, temp2;
   2588   sxtl2(vform, temp1, src1);
   2589   sxtl2(vform, temp2, src2);
   2590   mul(vform, dst, temp1, temp2);
   2591   return dst;
   2592 }
   2593 
   2594 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
   2595                                 const LogicVRegister& src1,
   2596                                 const LogicVRegister& src2) {
   2597   SimVRegister temp1, temp2;
   2598   uxtl(vform, temp1, src1);
   2599   uxtl(vform, temp2, src2);
   2600   mls(vform, dst, temp1, temp2);
   2601   return dst;
   2602 }
   2603 
   2604 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
   2605                                  const LogicVRegister& src1,
   2606                                  const LogicVRegister& src2) {
   2607   SimVRegister temp1, temp2;
   2608   uxtl2(vform, temp1, src1);
   2609   uxtl2(vform, temp2, src2);
   2610   mls(vform, dst, temp1, temp2);
   2611   return dst;
   2612 }
   2613 
   2614 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
   2615                                 const LogicVRegister& src1,
   2616                                 const LogicVRegister& src2) {
   2617   SimVRegister temp1, temp2;
   2618   sxtl(vform, temp1, src1);
   2619   sxtl(vform, temp2, src2);
   2620   mls(vform, dst, temp1, temp2);
   2621   return dst;
   2622 }
   2623 
   2624 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
   2625                                  const LogicVRegister& src1,
   2626                                  const LogicVRegister& src2) {
   2627   SimVRegister temp1, temp2;
   2628   sxtl2(vform, temp1, src1);
   2629   sxtl2(vform, temp2, src2);
   2630   mls(vform, dst, temp1, temp2);
   2631   return dst;
   2632 }
   2633 
   2634 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
   2635                                 const LogicVRegister& src1,
   2636                                 const LogicVRegister& src2) {
   2637   SimVRegister temp1, temp2;
   2638   uxtl(vform, temp1, src1);
   2639   uxtl(vform, temp2, src2);
   2640   mla(vform, dst, temp1, temp2);
   2641   return dst;
   2642 }
   2643 
   2644 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
   2645                                  const LogicVRegister& src1,
   2646                                  const LogicVRegister& src2) {
   2647   SimVRegister temp1, temp2;
   2648   uxtl2(vform, temp1, src1);
   2649   uxtl2(vform, temp2, src2);
   2650   mla(vform, dst, temp1, temp2);
   2651   return dst;
   2652 }
   2653 
   2654 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
   2655                                 const LogicVRegister& src1,
   2656                                 const LogicVRegister& src2) {
   2657   SimVRegister temp1, temp2;
   2658   sxtl(vform, temp1, src1);
   2659   sxtl(vform, temp2, src2);
   2660   mla(vform, dst, temp1, temp2);
   2661   return dst;
   2662 }
   2663 
   2664 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
   2665                                  const LogicVRegister& src1,
   2666                                  const LogicVRegister& src2) {
   2667   SimVRegister temp1, temp2;
   2668   sxtl2(vform, temp1, src1);
   2669   sxtl2(vform, temp2, src2);
   2670   mla(vform, dst, temp1, temp2);
   2671   return dst;
   2672 }
   2673 
   2674 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
   2675                                   const LogicVRegister& src1,
   2676                                   const LogicVRegister& src2) {
   2677   SimVRegister temp;
   2678   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   2679   return add(vform, dst, dst, product).SignedSaturate(vform);
   2680 }
   2681 
   2682 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
   2683                                    const LogicVRegister& src1,
   2684                                    const LogicVRegister& src2) {
   2685   SimVRegister temp;
   2686   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   2687   return add(vform, dst, dst, product).SignedSaturate(vform);
   2688 }
   2689 
   2690 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
   2691                                   const LogicVRegister& src1,
   2692                                   const LogicVRegister& src2) {
   2693   SimVRegister temp;
   2694   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   2695   return sub(vform, dst, dst, product).SignedSaturate(vform);
   2696 }
   2697 
   2698 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
   2699                                    const LogicVRegister& src1,
   2700                                    const LogicVRegister& src2) {
   2701   SimVRegister temp;
   2702   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   2703   return sub(vform, dst, dst, product).SignedSaturate(vform);
   2704 }
   2705 
   2706 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
   2707                                   const LogicVRegister& src1,
   2708                                   const LogicVRegister& src2) {
   2709   SimVRegister temp;
   2710   LogicVRegister product = smull(vform, temp, src1, src2);
   2711   return add(vform, dst, product, product).SignedSaturate(vform);
   2712 }
   2713 
   2714 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
   2715                                    const LogicVRegister& src1,
   2716                                    const LogicVRegister& src2) {
   2717   SimVRegister temp;
   2718   LogicVRegister product = smull2(vform, temp, src1, src2);
   2719   return add(vform, dst, product, product).SignedSaturate(vform);
   2720 }
   2721 
   2722 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
   2723                                    const LogicVRegister& src1,
   2724                                    const LogicVRegister& src2, bool round) {
   2725   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   2726   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   2727   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   2728 
   2729   int esize = LaneSizeInBitsFromFormat(vform);
   2730   int round_const = round ? (1 << (esize - 2)) : 0;
   2731   int64_t product;
   2732 
   2733   dst.ClearForWrite(vform);
   2734   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2735     product = src1.Int(vform, i) * src2.Int(vform, i);
   2736     product += round_const;
   2737     product = product >> (esize - 1);
   2738 
   2739     if (product > MaxIntFromFormat(vform)) {
   2740       product = MaxIntFromFormat(vform);
   2741     } else if (product < MinIntFromFormat(vform)) {
   2742       product = MinIntFromFormat(vform);
   2743     }
   2744     dst.SetInt(vform, i, product);
   2745   }
   2746   return dst;
   2747 }
   2748 
   2749 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
   2750                                   const LogicVRegister& src1,
   2751                                   const LogicVRegister& src2) {
   2752   return sqrdmulh(vform, dst, src1, src2, false);
   2753 }
   2754 
   2755 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
   2756                                 const LogicVRegister& src1,
   2757                                 const LogicVRegister& src2) {
   2758   SimVRegister temp;
   2759   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   2760   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2761   return dst;
   2762 }
   2763 
   2764 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
   2765                                  const LogicVRegister& src1,
   2766                                  const LogicVRegister& src2) {
   2767   SimVRegister temp;
   2768   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   2769   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2770   return dst;
   2771 }
   2772 
   2773 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
   2774                                  const LogicVRegister& src1,
   2775                                  const LogicVRegister& src2) {
   2776   SimVRegister temp;
   2777   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   2778   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2779   return dst;
   2780 }
   2781 
   2782 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
   2783                                   const LogicVRegister& src1,
   2784                                   const LogicVRegister& src2) {
   2785   SimVRegister temp;
   2786   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   2787   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2788   return dst;
   2789 }
   2790 
   2791 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
   2792                                 const LogicVRegister& src1,
   2793                                 const LogicVRegister& src2) {
   2794   SimVRegister temp;
   2795   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   2796   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2797   return dst;
   2798 }
   2799 
   2800 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
   2801                                  const LogicVRegister& src1,
   2802                                  const LogicVRegister& src2) {
   2803   SimVRegister temp;
   2804   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   2805   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2806   return dst;
   2807 }
   2808 
   2809 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
   2810                                  const LogicVRegister& src1,
   2811                                  const LogicVRegister& src2) {
   2812   SimVRegister temp;
   2813   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   2814   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2815   return dst;
   2816 }
   2817 
   2818 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
   2819                                   const LogicVRegister& src1,
   2820                                   const LogicVRegister& src2) {
   2821   SimVRegister temp;
   2822   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   2823   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   2824   return dst;
   2825 }
   2826 
   2827 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
   2828                                const LogicVRegister& src1,
   2829                                const LogicVRegister& src2) {
   2830   uint64_t result[16];
   2831   int laneCount = LaneCountFromFormat(vform);
   2832   int pairs = laneCount / 2;
   2833   for (int i = 0; i < pairs; ++i) {
   2834     result[2 * i] = src1.Uint(vform, 2 * i);
   2835     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   2836   }
   2837 
   2838   dst.SetUintArray(vform, result);
   2839   return dst;
   2840 }
   2841 
   2842 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
   2843                                const LogicVRegister& src1,
   2844                                const LogicVRegister& src2) {
   2845   uint64_t result[16];
   2846   int laneCount = LaneCountFromFormat(vform);
   2847   int pairs = laneCount / 2;
   2848   for (int i = 0; i < pairs; ++i) {
   2849     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
   2850     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   2851   }
   2852 
   2853   dst.SetUintArray(vform, result);
   2854   return dst;
   2855 }
   2856 
   2857 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
   2858                                const LogicVRegister& src1,
   2859                                const LogicVRegister& src2) {
   2860   uint64_t result[16];
   2861   int laneCount = LaneCountFromFormat(vform);
   2862   int pairs = laneCount / 2;
   2863   for (int i = 0; i < pairs; ++i) {
   2864     result[2 * i] = src1.Uint(vform, i);
   2865     result[(2 * i) + 1] = src2.Uint(vform, i);
   2866   }
   2867 
   2868   dst.SetUintArray(vform, result);
   2869   return dst;
   2870 }
   2871 
   2872 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
   2873                                const LogicVRegister& src1,
   2874                                const LogicVRegister& src2) {
   2875   uint64_t result[16];
   2876   int laneCount = LaneCountFromFormat(vform);
   2877   int pairs = laneCount / 2;
   2878   for (int i = 0; i < pairs; ++i) {
   2879     result[2 * i] = src1.Uint(vform, pairs + i);
   2880     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   2881   }
   2882 
   2883   dst.SetUintArray(vform, result);
   2884   return dst;
   2885 }
   2886 
   2887 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
   2888                                const LogicVRegister& src1,
   2889                                const LogicVRegister& src2) {
   2890   uint64_t result[32];
   2891   int laneCount = LaneCountFromFormat(vform);
   2892   for (int i = 0; i < laneCount; ++i) {
   2893     result[i] = src1.Uint(vform, i);
   2894     result[laneCount + i] = src2.Uint(vform, i);
   2895   }
   2896 
   2897   dst.ClearForWrite(vform);
   2898   for (int i = 0; i < laneCount; ++i) {
   2899     dst.SetUint(vform, i, result[2 * i]);
   2900   }
   2901   return dst;
   2902 }
   2903 
   2904 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
   2905                                const LogicVRegister& src1,
   2906                                const LogicVRegister& src2) {
   2907   uint64_t result[32];
   2908   int laneCount = LaneCountFromFormat(vform);
   2909   for (int i = 0; i < laneCount; ++i) {
   2910     result[i] = src1.Uint(vform, i);
   2911     result[laneCount + i] = src2.Uint(vform, i);
   2912   }
   2913 
   2914   dst.ClearForWrite(vform);
   2915   for (int i = 0; i < laneCount; ++i) {
   2916     dst.SetUint(vform, i, result[(2 * i) + 1]);
   2917   }
   2918   return dst;
   2919 }
   2920 
   2921 template <typename T>
   2922 T Simulator::FPAdd(T op1, T op2) {
   2923   T result = FPProcessNaNs(op1, op2);
   2924   if (std::isnan(result)) return result;
   2925 
   2926   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
   2927     // inf + -inf returns the default NaN.
   2928     FPProcessException();
   2929     return FPDefaultNaN<T>();
   2930   } else {
   2931     // Other cases should be handled by standard arithmetic.
   2932     return op1 + op2;
   2933   }
   2934 }
   2935 
   2936 template <typename T>
   2937 T Simulator::FPSub(T op1, T op2) {
   2938   // NaNs should be handled elsewhere.
   2939   DCHECK(!std::isnan(op1) && !std::isnan(op2));
   2940 
   2941   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
   2942     // inf - inf returns the default NaN.
   2943     FPProcessException();
   2944     return FPDefaultNaN<T>();
   2945   } else {
   2946     // Other cases should be handled by standard arithmetic.
   2947     return op1 - op2;
   2948   }
   2949 }
   2950 
   2951 template <typename T>
   2952 T Simulator::FPMul(T op1, T op2) {
   2953   // NaNs should be handled elsewhere.
   2954   DCHECK(!std::isnan(op1) && !std::isnan(op2));
   2955 
   2956   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   2957     // inf * 0.0 returns the default NaN.
   2958     FPProcessException();
   2959     return FPDefaultNaN<T>();
   2960   } else {
   2961     // Other cases should be handled by standard arithmetic.
   2962     return op1 * op2;
   2963   }
   2964 }
   2965 
   2966 template <typename T>
   2967 T Simulator::FPMulx(T op1, T op2) {
   2968   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   2969     // inf * 0.0 returns +/-2.0.
   2970     T two = 2.0;
   2971     return copysign(1.0, op1) * copysign(1.0, op2) * two;
   2972   }
   2973   return FPMul(op1, op2);
   2974 }
   2975 
   2976 template <typename T>
   2977 T Simulator::FPMulAdd(T a, T op1, T op2) {
   2978   T result = FPProcessNaNs3(a, op1, op2);
   2979 
   2980   T sign_a = copysign(1.0, a);
   2981   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
   2982   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
   2983   bool operation_generates_nan =
   2984       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
   2985       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
   2986       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
   2987 
   2988   if (std::isnan(result)) {
   2989     // Generated NaNs override quiet NaNs propagated from a.
   2990     if (operation_generates_nan && IsQuietNaN(a)) {
   2991       FPProcessException();
   2992       return FPDefaultNaN<T>();
   2993     } else {
   2994       return result;
   2995     }
   2996   }
   2997 
   2998   // If the operation would produce a NaN, return the default NaN.
   2999   if (operation_generates_nan) {
   3000     FPProcessException();
   3001     return FPDefaultNaN<T>();
   3002   }
   3003 
   3004   // Work around broken fma implementations for exact zero results: The sign of
   3005   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   3006   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
   3007     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
   3008   }
   3009 
   3010   result = FusedMultiplyAdd(op1, op2, a);
   3011   DCHECK(!std::isnan(result));
   3012 
   3013   // Work around broken fma implementations for rounded zero results: If a is
   3014   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   3015   if ((a == 0.0) && (result == 0.0)) {
   3016     return copysign(0.0, sign_prod);
   3017   }
   3018 
   3019   return result;
   3020 }
   3021 
   3022 template <typename T>
   3023 T Simulator::FPDiv(T op1, T op2) {
   3024   // NaNs should be handled elsewhere.
   3025   DCHECK(!std::isnan(op1) && !std::isnan(op2));
   3026 
   3027   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
   3028     // inf / inf and 0.0 / 0.0 return the default NaN.
   3029     FPProcessException();
   3030     return FPDefaultNaN<T>();
   3031   } else {
   3032     if (op2 == 0.0) {
   3033       FPProcessException();
   3034       if (!std::isnan(op1)) {
   3035         double op1_sign = copysign(1.0, op1);
   3036         double op2_sign = copysign(1.0, op2);
   3037         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
   3038       }
   3039     }
   3040 
   3041     // Other cases should be handled by standard arithmetic.
   3042     return op1 / op2;
   3043   }
   3044 }
   3045 
   3046 template <typename T>
   3047 T Simulator::FPSqrt(T op) {
   3048   if (std::isnan(op)) {
   3049     return FPProcessNaN(op);
   3050   } else if (op < 0.0) {
   3051     FPProcessException();
   3052     return FPDefaultNaN<T>();
   3053   } else {
   3054     return sqrt(op);
   3055   }
   3056 }
   3057 
   3058 template <typename T>
   3059 T Simulator::FPMax(T a, T b) {
   3060   T result = FPProcessNaNs(a, b);
   3061   if (std::isnan(result)) return result;
   3062 
   3063   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3064     // a and b are zero, and the sign differs: return +0.0.
   3065     return 0.0;
   3066   } else {
   3067     return (a > b) ? a : b;
   3068   }
   3069 }
   3070 
   3071 template <typename T>
   3072 T Simulator::FPMaxNM(T a, T b) {
   3073   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3074     a = kFP64NegativeInfinity;
   3075   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3076     b = kFP64NegativeInfinity;
   3077   }
   3078 
   3079   T result = FPProcessNaNs(a, b);
   3080   return std::isnan(result) ? result : FPMax(a, b);
   3081 }
   3082 
   3083 template <typename T>
   3084 T Simulator::FPMin(T a, T b) {
   3085   T result = FPProcessNaNs(a, b);
   3086   if (std::isnan(result)) return result;
   3087 
   3088   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3089     // a and b are zero, and the sign differs: return -0.0.
   3090     return -0.0;
   3091   } else {
   3092     return (a < b) ? a : b;
   3093   }
   3094 }
   3095 
   3096 template <typename T>
   3097 T Simulator::FPMinNM(T a, T b) {
   3098   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3099     a = kFP64PositiveInfinity;
   3100   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3101     b = kFP64PositiveInfinity;
   3102   }
   3103 
   3104   T result = FPProcessNaNs(a, b);
   3105   return std::isnan(result) ? result : FPMin(a, b);
   3106 }
   3107 
   3108 template <typename T>
   3109 T Simulator::FPRecipStepFused(T op1, T op2) {
   3110   const T two = 2.0;
   3111   if ((std::isinf(op1) && (op2 == 0.0)) ||
   3112       ((op1 == 0.0) && (std::isinf(op2)))) {
   3113     return two;
   3114   } else if (std::isinf(op1) || std::isinf(op2)) {
   3115     // Return +inf if signs match, otherwise -inf.
   3116     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3117                                           : kFP64NegativeInfinity;
   3118   } else {
   3119     return FusedMultiplyAdd(op1, op2, two);
   3120   }
   3121 }
   3122 
   3123 template <typename T>
   3124 T Simulator::FPRSqrtStepFused(T op1, T op2) {
   3125   const T one_point_five = 1.5;
   3126   const T two = 2.0;
   3127 
   3128   if ((std::isinf(op1) && (op2 == 0.0)) ||
   3129       ((op1 == 0.0) && (std::isinf(op2)))) {
   3130     return one_point_five;
   3131   } else if (std::isinf(op1) || std::isinf(op2)) {
   3132     // Return +inf if signs match, otherwise -inf.
   3133     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3134                                           : kFP64NegativeInfinity;
   3135   } else {
   3136     // The multiply-add-halve operation must be fully fused, so avoid interim
   3137     // rounding by checking which operand can be losslessly divided by two
   3138     // before doing the multiply-add.
   3139     if (std::isnormal(op1 / two)) {
   3140       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
   3141     } else if (std::isnormal(op2 / two)) {
   3142       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
   3143     } else {
   3144       // Neither operand is normal after halving: the result is dominated by
   3145       // the addition term, so just return that.
   3146       return one_point_five;
   3147     }
   3148   }
   3149 }
   3150 
   3151 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
   3152   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   3153       (value == kFP64NegativeInfinity)) {
   3154     return value;
   3155   } else if (std::isnan(value)) {
   3156     return FPProcessNaN(value);
   3157   }
   3158 
   3159   double int_result = std::floor(value);
   3160   double error = value - int_result;
   3161   switch (round_mode) {
   3162     case FPTieAway: {
   3163       // Take care of correctly handling the range ]-0.5, -0.0], which must
   3164       // yield -0.0.
   3165       if ((-0.5 < value) && (value < 0.0)) {
   3166         int_result = -0.0;
   3167 
   3168       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
   3169         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3170         // result is positive, round up.
   3171         int_result++;
   3172       }
   3173       break;
   3174     }
   3175     case FPTieEven: {
   3176       // Take care of correctly handling the range [-0.5, -0.0], which must
   3177       // yield -0.0.
   3178       if ((-0.5 <= value) && (value < 0.0)) {
   3179         int_result = -0.0;
   3180 
   3181         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3182         // result is odd, round up.
   3183       } else if ((error > 0.5) ||
   3184                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
   3185         int_result++;
   3186       }
   3187       break;
   3188     }
   3189     case FPZero: {
   3190       // If value>0 then we take floor(value)
   3191       // otherwise, ceil(value).
   3192       if (value < 0) {
   3193         int_result = ceil(value);
   3194       }
   3195       break;
   3196     }
   3197     case FPNegativeInfinity: {
   3198       // We always use floor(value).
   3199       break;
   3200     }
   3201     case FPPositiveInfinity: {
   3202       // Take care of correctly handling the range ]-1.0, -0.0], which must
   3203       // yield -0.0.
   3204       if ((-1.0 < value) && (value < 0.0)) {
   3205         int_result = -0.0;
   3206 
   3207         // If the error is non-zero, round up.
   3208       } else if (error > 0.0) {
   3209         int_result++;
   3210       }
   3211       break;
   3212     }
   3213     default:
   3214       UNIMPLEMENTED();
   3215   }
   3216   return int_result;
   3217 }
   3218 
   3219 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
   3220   value = FPRoundInt(value, rmode);
   3221   if (value >= kWMaxInt) {
   3222     return kWMaxInt;
   3223   } else if (value < kWMinInt) {
   3224     return kWMinInt;
   3225   }
   3226   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
   3227 }
   3228 
   3229 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
   3230   value = FPRoundInt(value, rmode);
   3231   if (value >= kXMaxInt) {
   3232     return kXMaxInt;
   3233   } else if (value < kXMinInt) {
   3234     return kXMinInt;
   3235   }
   3236   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
   3237 }
   3238 
   3239 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
   3240   value = FPRoundInt(value, rmode);
   3241   if (value >= kWMaxUInt) {
   3242     return kWMaxUInt;
   3243   } else if (value < 0.0) {
   3244     return 0;
   3245   }
   3246   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
   3247 }
   3248 
   3249 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
   3250   value = FPRoundInt(value, rmode);
   3251   if (value >= kXMaxUInt) {
   3252     return kXMaxUInt;
   3253   } else if (value < 0.0) {
   3254     return 0;
   3255   }
   3256   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
   3257 }
   3258 
   3259 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                      \
   3260   template <typename T>                                                \
   3261   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
   3262                                const LogicVRegister& src1,             \
   3263                                const LogicVRegister& src2) {           \
   3264     dst.ClearForWrite(vform);                                          \
   3265     for (int i = 0; i < LaneCountFromFormat(vform); i++) {             \
   3266       T op1 = src1.Float<T>(i);                                        \
   3267       T op2 = src2.Float<T>(i);                                        \
   3268       T result;                                                        \
   3269       if (PROCNAN) {                                                   \
   3270         result = FPProcessNaNs(op1, op2);                              \
   3271         if (!std::isnan(result)) {                                     \
   3272           result = OP(op1, op2);                                       \
   3273         }                                                              \
   3274       } else {                                                         \
   3275         result = OP(op1, op2);                                         \
   3276       }                                                                \
   3277       dst.SetFloat(i, result);                                         \
   3278     }                                                                  \
   3279     return dst;                                                        \
   3280   }                                                                    \
   3281                                                                        \
   3282   LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
   3283                                const LogicVRegister& src1,             \
   3284                                const LogicVRegister& src2) {           \
   3285     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {               \
   3286       FN<float>(vform, dst, src1, src2);                               \
   3287     } else {                                                           \
   3288       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);          \
   3289       FN<double>(vform, dst, src1, src2);                              \
   3290     }                                                                  \
   3291     return dst;                                                        \
   3292   }
   3293 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
   3294 #undef DEFINE_NEON_FP_VECTOR_OP
   3295 
   3296 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
   3297                                 const LogicVRegister& src1,
   3298                                 const LogicVRegister& src2) {
   3299   SimVRegister temp;
   3300   LogicVRegister product = fmul(vform, temp, src1, src2);
   3301   return fneg(vform, dst, product);
   3302 }
   3303 
   3304 template <typename T>
   3305 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
   3306                                  const LogicVRegister& src1,
   3307                                  const LogicVRegister& src2) {
   3308   dst.ClearForWrite(vform);
   3309   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3310     T op1 = -src1.Float<T>(i);
   3311     T op2 = src2.Float<T>(i);
   3312     T result = FPProcessNaNs(op1, op2);
   3313     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
   3314   }
   3315   return dst;
   3316 }
   3317 
   3318 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
   3319                                  const LogicVRegister& src1,
   3320                                  const LogicVRegister& src2) {
   3321   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3322     frecps<float>(vform, dst, src1, src2);
   3323   } else {
   3324     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3325     frecps<double>(vform, dst, src1, src2);
   3326   }
   3327   return dst;
   3328 }
   3329 
   3330 template <typename T>
   3331 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
   3332                                   const LogicVRegister& src1,
   3333                                   const LogicVRegister& src2) {
   3334   dst.ClearForWrite(vform);
   3335   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3336     T op1 = -src1.Float<T>(i);
   3337     T op2 = src2.Float<T>(i);
   3338     T result = FPProcessNaNs(op1, op2);
   3339     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
   3340   }
   3341   return dst;
   3342 }
   3343 
   3344 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
   3345                                   const LogicVRegister& src1,
   3346                                   const LogicVRegister& src2) {
   3347   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3348     frsqrts<float>(vform, dst, src1, src2);
   3349   } else {
   3350     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3351     frsqrts<double>(vform, dst, src1, src2);
   3352   }
   3353   return dst;
   3354 }
   3355 
   3356 template <typename T>
   3357 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
   3358                                const LogicVRegister& src1,
   3359                                const LogicVRegister& src2, Condition cond) {
   3360   dst.ClearForWrite(vform);
   3361   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3362     bool result = false;
   3363     T op1 = src1.Float<T>(i);
   3364     T op2 = src2.Float<T>(i);
   3365     T nan_result = FPProcessNaNs(op1, op2);
   3366     if (!std::isnan(nan_result)) {
   3367       switch (cond) {
   3368         case eq:
   3369           result = (op1 == op2);
   3370           break;
   3371         case ge:
   3372           result = (op1 >= op2);
   3373           break;
   3374         case gt:
   3375           result = (op1 > op2);
   3376           break;
   3377         case le:
   3378           result = (op1 <= op2);
   3379           break;
   3380         case lt:
   3381           result = (op1 < op2);
   3382           break;
   3383         default:
   3384           UNREACHABLE();
   3385       }
   3386     }
   3387     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   3388   }
   3389   return dst;
   3390 }
   3391 
   3392 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
   3393                                const LogicVRegister& src1,
   3394                                const LogicVRegister& src2, Condition cond) {
   3395   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3396     fcmp<float>(vform, dst, src1, src2, cond);
   3397   } else {
   3398     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3399     fcmp<double>(vform, dst, src1, src2, cond);
   3400   }
   3401   return dst;
   3402 }
   3403 
   3404 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
   3405                                     const LogicVRegister& src, Condition cond) {
   3406   SimVRegister temp;
   3407   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3408     LogicVRegister zero_reg =
   3409         dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
   3410     fcmp<float>(vform, dst, src, zero_reg, cond);
   3411   } else {
   3412     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3413     LogicVRegister zero_reg =
   3414         dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
   3415     fcmp<double>(vform, dst, src, zero_reg, cond);
   3416   }
   3417   return dst;
   3418 }
   3419 
   3420 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
   3421                                   const LogicVRegister& src1,
   3422                                   const LogicVRegister& src2, Condition cond) {
   3423   SimVRegister temp1, temp2;
   3424   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3425     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
   3426     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
   3427     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
   3428   } else {
   3429     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3430     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
   3431     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
   3432     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
   3433   }
   3434   return dst;
   3435 }
   3436 
   3437 template <typename T>
   3438 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
   3439                                const LogicVRegister& src1,
   3440                                const LogicVRegister& src2) {
   3441   dst.ClearForWrite(vform);
   3442   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3443     T op1 = src1.Float<T>(i);
   3444     T op2 = src2.Float<T>(i);
   3445     T acc = dst.Float<T>(i);
   3446     T result = FPMulAdd(acc, op1, op2);
   3447     dst.SetFloat(i, result);
   3448   }
   3449   return dst;
   3450 }
   3451 
   3452 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
   3453                                const LogicVRegister& src1,
   3454                                const LogicVRegister& src2) {
   3455   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3456     fmla<float>(vform, dst, src1, src2);
   3457   } else {
   3458     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3459     fmla<double>(vform, dst, src1, src2);
   3460   }
   3461   return dst;
   3462 }
   3463 
   3464 template <typename T>
   3465 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
   3466                                const LogicVRegister& src1,
   3467                                const LogicVRegister& src2) {
   3468   dst.ClearForWrite(vform);
   3469   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3470     T op1 = -src1.Float<T>(i);
   3471     T op2 = src2.Float<T>(i);
   3472     T acc = dst.Float<T>(i);
   3473     T result = FPMulAdd(acc, op1, op2);
   3474     dst.SetFloat(i, result);
   3475   }
   3476   return dst;
   3477 }
   3478 
   3479 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
   3480                                const LogicVRegister& src1,
   3481                                const LogicVRegister& src2) {
   3482   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3483     fmls<float>(vform, dst, src1, src2);
   3484   } else {
   3485     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3486     fmls<double>(vform, dst, src1, src2);
   3487   }
   3488   return dst;
   3489 }
   3490 
   3491 template <typename T>
   3492 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
   3493                                const LogicVRegister& src) {
   3494   dst.ClearForWrite(vform);
   3495   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3496     T op = src.Float<T>(i);
   3497     op = -op;
   3498     dst.SetFloat(i, op);
   3499   }
   3500   return dst;
   3501 }
   3502 
   3503 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
   3504                                const LogicVRegister& src) {
   3505   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3506     fneg<float>(vform, dst, src);
   3507   } else {
   3508     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3509     fneg<double>(vform, dst, src);
   3510   }
   3511   return dst;
   3512 }
   3513 
   3514 template <typename T>
   3515 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
   3516                                 const LogicVRegister& src) {
   3517   dst.ClearForWrite(vform);
   3518   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3519     T op = src.Float<T>(i);
   3520     if (copysign(1.0, op) < 0.0) {
   3521       op = -op;
   3522     }
   3523     dst.SetFloat(i, op);
   3524   }
   3525   return dst;
   3526 }
   3527 
   3528 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
   3529                                 const LogicVRegister& src) {
   3530   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3531     fabs_<float>(vform, dst, src);
   3532   } else {
   3533     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3534     fabs_<double>(vform, dst, src);
   3535   }
   3536   return dst;
   3537 }
   3538 
   3539 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
   3540                                const LogicVRegister& src1,
   3541                                const LogicVRegister& src2) {
   3542   SimVRegister temp;
   3543   fsub(vform, temp, src1, src2);
   3544   fabs_(vform, dst, temp);
   3545   return dst;
   3546 }
   3547 
   3548 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
   3549                                 const LogicVRegister& src) {
   3550   dst.ClearForWrite(vform);
   3551   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3552     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3553       float result = FPSqrt(src.Float<float>(i));
   3554       dst.SetFloat(i, result);
   3555     }
   3556   } else {
   3557     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3558     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3559       double result = FPSqrt(src.Float<double>(i));
   3560       dst.SetFloat(i, result);
   3561     }
   3562   }
   3563   return dst;
   3564 }
   3565 
   3566 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                             \
   3567   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
   3568                                 const LogicVRegister& src1,             \
   3569                                 const LogicVRegister& src2) {           \
   3570     SimVRegister temp1, temp2;                                          \
   3571     uzp1(vform, temp1, src1, src2);                                     \
   3572     uzp2(vform, temp2, src1, src2);                                     \
   3573     FN(vform, dst, temp1, temp2);                                       \
   3574     return dst;                                                         \
   3575   }                                                                     \
   3576                                                                         \
   3577   LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
   3578                                 const LogicVRegister& src) {            \
   3579     if (vform == kFormatS) {                                            \
   3580       float result = OP(src.Float<float>(0), src.Float<float>(1));      \
   3581       dst.SetFloat(0, result);                                          \
   3582     } else {                                                            \
   3583       DCHECK_EQ(vform, kFormatD);                                       \
   3584       double result = OP(src.Float<double>(0), src.Float<double>(1));   \
   3585       dst.SetFloat(0, result);                                          \
   3586     }                                                                   \
   3587     dst.ClearForWrite(vform);                                           \
   3588     return dst;                                                         \
   3589   }
   3590 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
   3591 #undef DEFINE_NEON_FP_PAIR_OP
   3592 
   3593 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
   3594                                    const LogicVRegister& src, FPMinMaxOp Op) {
   3595   DCHECK_EQ(vform, kFormat4S);
   3596   USE(vform);
   3597   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
   3598   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
   3599   float result = (this->*Op)(result1, result2);
   3600   dst.ClearForWrite(kFormatS);
   3601   dst.SetFloat<float>(0, result);
   3602   return dst;
   3603 }
   3604 
   3605 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
   3606                                 const LogicVRegister& src) {
   3607   return FMinMaxV(vform, dst, src, &Simulator::FPMax);
   3608 }
   3609 
   3610 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
   3611                                 const LogicVRegister& src) {
   3612   return FMinMaxV(vform, dst, src, &Simulator::FPMin);
   3613 }
   3614 
   3615 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
   3616                                   const LogicVRegister& src) {
   3617   return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
   3618 }
   3619 
   3620 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
   3621                                   const LogicVRegister& src) {
   3622   return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
   3623 }
   3624 
   3625 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
   3626                                const LogicVRegister& src1,
   3627                                const LogicVRegister& src2, int index) {
   3628   dst.ClearForWrite(vform);
   3629   SimVRegister temp;
   3630   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3631     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   3632     fmul<float>(vform, dst, src1, index_reg);
   3633   } else {
   3634     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3635     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   3636     fmul<double>(vform, dst, src1, index_reg);
   3637   }
   3638   return dst;
   3639 }
   3640 
   3641 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
   3642                                const LogicVRegister& src1,
   3643                                const LogicVRegister& src2, int index) {
   3644   dst.ClearForWrite(vform);
   3645   SimVRegister temp;
   3646   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3647     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   3648     fmla<float>(vform, dst, src1, index_reg);
   3649   } else {
   3650     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3651     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   3652     fmla<double>(vform, dst, src1, index_reg);
   3653   }
   3654   return dst;
   3655 }
   3656 
   3657 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
   3658                                const LogicVRegister& src1,
   3659                                const LogicVRegister& src2, int index) {
   3660   dst.ClearForWrite(vform);
   3661   SimVRegister temp;
   3662   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3663     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   3664     fmls<float>(vform, dst, src1, index_reg);
   3665   } else {
   3666     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3667     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   3668     fmls<double>(vform, dst, src1, index_reg);
   3669   }
   3670   return dst;
   3671 }
   3672 
   3673 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
   3674                                 const LogicVRegister& src1,
   3675                                 const LogicVRegister& src2, int index) {
   3676   dst.ClearForWrite(vform);
   3677   SimVRegister temp;
   3678   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3679     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   3680     fmulx<float>(vform, dst, src1, index_reg);
   3681 
   3682   } else {
   3683     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3684     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   3685     fmulx<double>(vform, dst, src1, index_reg);
   3686   }
   3687   return dst;
   3688 }
   3689 
   3690 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
   3691                                 const LogicVRegister& src,
   3692                                 FPRounding rounding_mode,
   3693                                 bool inexact_exception) {
   3694   dst.ClearForWrite(vform);
   3695   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3696     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3697       float input = src.Float<float>(i);
   3698       float rounded = FPRoundInt(input, rounding_mode);
   3699       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   3700         FPProcessException();
   3701       }
   3702       dst.SetFloat<float>(i, rounded);
   3703     }
   3704   } else {
   3705     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3706     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3707       double input = src.Float<double>(i);
   3708       double rounded = FPRoundInt(input, rounding_mode);
   3709       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   3710         FPProcessException();
   3711       }
   3712       dst.SetFloat<double>(i, rounded);
   3713     }
   3714   }
   3715   return dst;
   3716 }
   3717 
   3718 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
   3719                                 const LogicVRegister& src,
   3720                                 FPRounding rounding_mode, int fbits) {
   3721   dst.ClearForWrite(vform);
   3722   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3723     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3724       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   3725       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
   3726     }
   3727   } else {
   3728     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3729     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3730       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   3731       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
   3732     }
   3733   }
   3734   return dst;
   3735 }
   3736 
   3737 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
   3738                                 const LogicVRegister& src,
   3739                                 FPRounding rounding_mode, int fbits) {
   3740   dst.ClearForWrite(vform);
   3741   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3742     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3743       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   3744       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
   3745     }
   3746   } else {
   3747     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3748     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3749       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   3750       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
   3751     }
   3752   }
   3753   return dst;
   3754 }
   3755 
   3756 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
   3757                                 const LogicVRegister& src) {
   3758   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3759     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   3760       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
   3761     }
   3762   } else {
   3763     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3764     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   3765       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
   3766     }
   3767   }
   3768   return dst;
   3769 }
   3770 
   3771 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
   3772                                  const LogicVRegister& src) {
   3773   int lane_count = LaneCountFromFormat(vform);
   3774   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3775     for (int i = 0; i < lane_count; i++) {
   3776       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
   3777     }
   3778   } else {
   3779     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3780     for (int i = 0; i < lane_count; i++) {
   3781       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
   3782     }
   3783   }
   3784   return dst;
   3785 }
   3786 
   3787 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
   3788                                 const LogicVRegister& src) {
   3789   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
   3790     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3791       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
   3792     }
   3793   } else {
   3794     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
   3795     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3796       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
   3797     }
   3798   }
   3799   return dst;
   3800 }
   3801 
   3802 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
   3803                                  const LogicVRegister& src) {
   3804   int lane_count = LaneCountFromFormat(vform) / 2;
   3805   if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
   3806     for (int i = lane_count - 1; i >= 0; i--) {
   3807       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
   3808     }
   3809   } else {
   3810     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
   3811     for (int i = lane_count - 1; i >= 0; i--) {
   3812       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
   3813     }
   3814   }
   3815   return dst;
   3816 }
   3817 
   3818 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
   3819                                  const LogicVRegister& src) {
   3820   dst.ClearForWrite(vform);
   3821   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
   3822   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3823     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
   3824   }
   3825   return dst;
   3826 }
   3827 
   3828 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
   3829                                   const LogicVRegister& src) {
   3830   DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
   3831   int lane_count = LaneCountFromFormat(vform) / 2;
   3832   for (int i = lane_count - 1; i >= 0; i--) {
   3833     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
   3834   }
   3835   return dst;
   3836 }
   3837 
   3838 // Based on reference C function recip_sqrt_estimate from ARM ARM.
   3839 double Simulator::recip_sqrt_estimate(double a) {
   3840   int q0, q1, s;
   3841   double r;
   3842   if (a < 0.5) {
   3843     q0 = static_cast<int>(a * 512.0);
   3844     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
   3845   } else {
   3846     q1 = static_cast<int>(a * 256.0);
   3847     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
   3848   }
   3849   s = static_cast<int>(256.0 * r + 0.5);
   3850   return static_cast<double>(s) / 256.0;
   3851 }
   3852 
   3853 namespace {
   3854 
   3855 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
   3856   return unsigned_bitextract_64(start_bit, end_bit, val);
   3857 }
   3858 
   3859 }  // anonymous namespace
   3860 
   3861 template <typename T>
   3862 T Simulator::FPRecipSqrtEstimate(T op) {
   3863   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
   3864                 "T must be a float or double");
   3865 
   3866   if (std::isnan(op)) {
   3867     return FPProcessNaN(op);
   3868   } else if (op == 0.0) {
   3869     if (copysign(1.0, op) < 0.0) {
   3870       return kFP64NegativeInfinity;
   3871     } else {
   3872       return kFP64PositiveInfinity;
   3873     }
   3874   } else if (copysign(1.0, op) < 0.0) {
   3875     FPProcessException();
   3876     return FPDefaultNaN<T>();
   3877   } else if (std::isinf(op)) {
   3878     return 0.0;
   3879   } else {
   3880     uint64_t fraction;
   3881     int32_t exp, result_exp;
   3882 
   3883     if (sizeof(T) == sizeof(float)) {
   3884       exp = static_cast<int32_t>(float_exp(op));
   3885       fraction = float_mantissa(op);
   3886       fraction <<= 29;
   3887     } else {
   3888       exp = static_cast<int32_t>(double_exp(op));
   3889       fraction = double_mantissa(op);
   3890     }
   3891 
   3892     if (exp == 0) {
   3893       while (Bits(fraction, 51, 51) == 0) {
   3894         fraction = Bits(fraction, 50, 0) << 1;
   3895         exp -= 1;
   3896       }
   3897       fraction = Bits(fraction, 50, 0) << 1;
   3898     }
   3899 
   3900     double scaled;
   3901     if (Bits(exp, 0, 0) == 0) {
   3902       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   3903     } else {
   3904       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
   3905     }
   3906 
   3907     if (sizeof(T) == sizeof(float)) {
   3908       result_exp = (380 - exp) / 2;
   3909     } else {
   3910       result_exp = (3068 - exp) / 2;
   3911     }
   3912 
   3913     uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
   3914 
   3915     if (sizeof(T) == sizeof(float)) {
   3916       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   3917       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
   3918       return float_pack(0, exp_bits, est_bits);
   3919     } else {
   3920       return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
   3921     }
   3922   }
   3923 }
   3924 
   3925 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
   3926                                   const LogicVRegister& src) {
   3927   dst.ClearForWrite(vform);
   3928   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   3929     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3930       float input = src.Float<float>(i);
   3931       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
   3932     }
   3933   } else {
   3934     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   3935     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3936       double input = src.Float<double>(i);
   3937       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
   3938     }
   3939   }
   3940   return dst;
   3941 }
   3942 
   3943 template <typename T>
   3944 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   3945   static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
   3946                 "T must be a float or double");
   3947   uint32_t sign;
   3948 
   3949   if (sizeof(T) == sizeof(float)) {
   3950     sign = float_sign(op);
   3951   } else {
   3952     sign = double_sign(op);
   3953   }
   3954 
   3955   if (std::isnan(op)) {
   3956     return FPProcessNaN(op);
   3957   } else if (std::isinf(op)) {
   3958     return (sign == 1) ? -0.0 : 0.0;
   3959   } else if (op == 0.0) {
   3960     FPProcessException();  // FPExc_DivideByZero exception.
   3961     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   3962   } else if (((sizeof(T) == sizeof(float)) &&
   3963               (std::fabs(op) < std::pow(2.0, -128.0))) ||
   3964              ((sizeof(T) == sizeof(double)) &&
   3965               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
   3966     bool overflow_to_inf = false;
   3967     switch (rounding) {
   3968       case FPTieEven:
   3969         overflow_to_inf = true;
   3970         break;
   3971       case FPPositiveInfinity:
   3972         overflow_to_inf = (sign == 0);
   3973         break;
   3974       case FPNegativeInfinity:
   3975         overflow_to_inf = (sign == 1);
   3976         break;
   3977       case FPZero:
   3978         overflow_to_inf = false;
   3979         break;
   3980       default:
   3981         break;
   3982     }
   3983     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
   3984     if (overflow_to_inf) {
   3985       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   3986     } else {
   3987       // Return FPMaxNormal(sign).
   3988       if (sizeof(T) == sizeof(float)) {
   3989         return float_pack(sign, 0xFE, 0x07FFFFF);
   3990       } else {
   3991         return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
   3992       }
   3993     }
   3994   } else {
   3995     uint64_t fraction;
   3996     int32_t exp, result_exp;
   3997     uint32_t sign;
   3998 
   3999     if (sizeof(T) == sizeof(float)) {
   4000       sign = float_sign(op);
   4001       exp = static_cast<int32_t>(float_exp(op));
   4002       fraction = float_mantissa(op);
   4003       fraction <<= 29;
   4004     } else {
   4005       sign = double_sign(op);
   4006       exp = static_cast<int32_t>(double_exp(op));
   4007       fraction = double_mantissa(op);
   4008     }
   4009 
   4010     if (exp == 0) {
   4011       if (Bits(fraction, 51, 51) == 0) {
   4012         exp -= 1;
   4013         fraction = Bits(fraction, 49, 0) << 2;
   4014       } else {
   4015         fraction = Bits(fraction, 50, 0) << 1;
   4016       }
   4017     }
   4018 
   4019     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   4020 
   4021     if (sizeof(T) == sizeof(float)) {
   4022       result_exp = 253 - exp;
   4023     } else {
   4024       result_exp = 2045 - exp;
   4025     }
   4026 
   4027     double estimate = recip_estimate(scaled);
   4028 
   4029     fraction = double_mantissa(estimate);
   4030     if (result_exp == 0) {
   4031       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
   4032     } else if (result_exp == -1) {
   4033       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
   4034       result_exp = 0;
   4035     }
   4036     if (sizeof(T) == sizeof(float)) {
   4037       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   4038       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
   4039       return float_pack(sign, exp_bits, frac_bits);
   4040     } else {
   4041       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
   4042     }
   4043   }
   4044 }
   4045 
   4046 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
   4047                                  const LogicVRegister& src, FPRounding round) {
   4048   dst.ClearForWrite(vform);
   4049   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   4050     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4051       float input = src.Float<float>(i);
   4052       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
   4053     }
   4054   } else {
   4055     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   4056     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4057       double input = src.Float<double>(i);
   4058       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
   4059     }
   4060   }
   4061   return dst;
   4062 }
   4063 
   4064 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
   4065                                   const LogicVRegister& src) {
   4066   dst.ClearForWrite(vform);
   4067   uint64_t operand;
   4068   uint32_t result;
   4069   double dp_operand, dp_result;
   4070   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4071     operand = src.Uint(vform, i);
   4072     if (operand <= 0x3FFFFFFF) {
   4073       result = 0xFFFFFFFF;
   4074     } else {
   4075       dp_operand = operand * std::pow(2.0, -32);
   4076       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
   4077       result = static_cast<uint32_t>(dp_result);
   4078     }
   4079     dst.SetUint(vform, i, result);
   4080   }
   4081   return dst;
   4082 }
   4083 
   4084 // Based on reference C function recip_estimate from ARM ARM.
   4085 double Simulator::recip_estimate(double a) {
   4086   int q, s;
   4087   double r;
   4088   q = static_cast<int>(a * 512.0);
   4089   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
   4090   s = static_cast<int>(256.0 * r + 0.5);
   4091   return static_cast<double>(s) / 256.0;
   4092 }
   4093 
   4094 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
   4095                                  const LogicVRegister& src) {
   4096   dst.ClearForWrite(vform);
   4097   uint64_t operand;
   4098   uint32_t result;
   4099   double dp_operand, dp_result;
   4100   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4101     operand = src.Uint(vform, i);
   4102     if (operand <= 0x7FFFFFFF) {
   4103       result = 0xFFFFFFFF;
   4104     } else {
   4105       dp_operand = operand * std::pow(2.0, -32);
   4106       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
   4107       result = static_cast<uint32_t>(dp_result);
   4108     }
   4109     dst.SetUint(vform, i, result);
   4110   }
   4111   return dst;
   4112 }
   4113 
   4114 template <typename T>
   4115 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
   4116                                  const LogicVRegister& src) {
   4117   dst.ClearForWrite(vform);
   4118   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4119     T op = src.Float<T>(i);
   4120     T result;
   4121     if (std::isnan(op)) {
   4122       result = FPProcessNaN(op);
   4123     } else {
   4124       int exp;
   4125       uint32_t sign;
   4126       if (sizeof(T) == sizeof(float)) {
   4127         sign = float_sign(op);
   4128         exp = static_cast<int>(float_exp(op));
   4129         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
   4130         result = float_pack(sign, exp, 0);
   4131       } else {
   4132         sign = double_sign(op);
   4133         exp = static_cast<int>(double_exp(op));
   4134         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
   4135         result = double_pack(sign, exp, 0);
   4136       }
   4137     }
   4138     dst.SetFloat(i, result);
   4139   }
   4140   return dst;
   4141 }
   4142 
   4143 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
   4144                                  const LogicVRegister& src) {
   4145   if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   4146     frecpx<float>(vform, dst, src);
   4147   } else {
   4148     DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   4149     frecpx<double>(vform, dst, src);
   4150   }
   4151   return dst;
   4152 }
   4153 
   4154 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
   4155                                 const LogicVRegister& src, int fbits,
   4156                                 FPRounding round) {
   4157   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4158     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   4159       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
   4160       dst.SetFloat<float>(i, result);
   4161     } else {
   4162       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   4163       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
   4164       dst.SetFloat<double>(i, result);
   4165     }
   4166   }
   4167   return dst;
   4168 }
   4169 
   4170 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
   4171                                 const LogicVRegister& src, int fbits,
   4172                                 FPRounding round) {
   4173   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4174     if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
   4175       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
   4176       dst.SetFloat<float>(i, result);
   4177     } else {
   4178       DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
   4179       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
   4180       dst.SetFloat<double>(i, result);
   4181     }
   4182   }
   4183   return dst;
   4184 }
   4185 
   4186 #endif  // USE_SIMULATOR
   4187 
   4188 }  // namespace internal
   4189 }  // namespace v8
   4190 
   4191 #endif  // V8_TARGET_ARCH_ARM64
   4192