Home | History | Annotate | Download | only in a64
      1 // Copyright 2015, ARM Limited
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifdef VIXL_INCLUDE_SIMULATOR
     28 
     29 #include <cmath>
     30 #include "vixl/a64/simulator-a64.h"
     31 
     32 namespace vixl {
     33 
     34 template<> double Simulator::FPDefaultNaN<double>() {
     35   return kFP64DefaultNaN;
     36 }
     37 
     38 
     39 template<> float Simulator::FPDefaultNaN<float>() {
     40   return kFP32DefaultNaN;
     41 }
     42 
     43 // See FPRound for a description of this function.
     44 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
     45                                      uint64_t mantissa, FPRounding round_mode) {
     46   int64_t bits =
     47       FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
     48                                                                  exponent,
     49                                                                  mantissa,
     50                                                                  round_mode);
     51   return rawbits_to_double(bits);
     52 }
     53 
     54 
     55 // See FPRound for a description of this function.
     56 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
     57                                    uint64_t mantissa, FPRounding round_mode) {
     58   int32_t bits =
     59       FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
     60                                                                exponent,
     61                                                                mantissa,
     62                                                                round_mode);
     63   return rawbits_to_float(bits);
     64 }
     65 
     66 
     67 // See FPRound for a description of this function.
     68 static inline float16 FPRoundToFloat16(int64_t sign,
     69                                        int64_t exponent,
     70                                        uint64_t mantissa,
     71                                        FPRounding round_mode) {
     72   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
     73       sign, exponent, mantissa, round_mode);
     74 }
     75 
     76 
     77 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
     78   if (src >= 0) {
     79     return UFixedToDouble(src, fbits, round);
     80   } else {
     81     // This works for all negative values, including INT64_MIN.
     82     return -UFixedToDouble(-src, fbits, round);
     83   }
     84 }
     85 
     86 
     87 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
     88   // An input of 0 is a special case because the result is effectively
     89   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
     90   if (src == 0) {
     91     return 0.0;
     92   }
     93 
     94   // Calculate the exponent. The highest significant bit will have the value
     95   // 2^exponent.
     96   const int highest_significant_bit = 63 - CountLeadingZeros(src);
     97   const int64_t exponent = highest_significant_bit - fbits;
     98 
     99   return FPRoundToDouble(0, exponent, src, round);
    100 }
    101 
    102 
    103 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
    104   if (src >= 0) {
    105     return UFixedToFloat(src, fbits, round);
    106   } else {
    107     // This works for all negative values, including INT64_MIN.
    108     return -UFixedToFloat(-src, fbits, round);
    109   }
    110 }
    111 
    112 
    113 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
    114   // An input of 0 is a special case because the result is effectively
    115   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    116   if (src == 0) {
    117     return 0.0f;
    118   }
    119 
    120   // Calculate the exponent. The highest significant bit will have the value
    121   // 2^exponent.
    122   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    123   const int32_t exponent = highest_significant_bit - fbits;
    124 
    125   return FPRoundToFloat(0, exponent, src, round);
    126 }
    127 
    128 
    129 double Simulator::FPToDouble(float value) {
    130   switch (std::fpclassify(value)) {
    131     case FP_NAN: {
    132       if (IsSignallingNaN(value)) {
    133         FPProcessException();
    134       }
    135       if (DN()) return kFP64DefaultNaN;
    136 
    137       // Convert NaNs as the processor would:
    138       //  - The sign is propagated.
    139       //  - The payload (mantissa) is transferred entirely, except that the top
    140       //    bit is forced to '1', making the result a quiet NaN. The unused
    141       //    (low-order) payload bits are set to 0.
    142       uint32_t raw = float_to_rawbits(value);
    143 
    144       uint64_t sign = raw >> 31;
    145       uint64_t exponent = (1 << 11) - 1;
    146       uint64_t payload = unsigned_bitextract_64(21, 0, raw);
    147       payload <<= (52 - 23);  // The unused low-order bits should be 0.
    148       payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
    149 
    150       return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
    151     }
    152 
    153     case FP_ZERO:
    154     case FP_NORMAL:
    155     case FP_SUBNORMAL:
    156     case FP_INFINITE: {
    157       // All other inputs are preserved in a standard cast, because every value
    158       // representable using an IEEE-754 float is also representable using an
    159       // IEEE-754 double.
    160       return static_cast<double>(value);
    161     }
    162   }
    163 
    164   VIXL_UNREACHABLE();
    165   return static_cast<double>(value);
    166 }
    167 
    168 
    169 float Simulator::FPToFloat(float16 value) {
    170   uint32_t sign = value >> 15;
    171   uint32_t exponent = unsigned_bitextract_32(
    172       kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
    173       value);
    174   uint32_t mantissa = unsigned_bitextract_32(
    175       kFloat16MantissaBits - 1, 0, value);
    176 
    177   switch (float16classify(value)) {
    178     case FP_ZERO:
    179       return (sign == 0) ? 0.0f : -0.0f;
    180 
    181     case FP_INFINITE:
    182       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
    183 
    184     case FP_SUBNORMAL: {
    185       // Calculate shift required to put mantissa into the most-significant bits
    186       // of the destination mantissa.
    187       int shift = CountLeadingZeros(mantissa << (32 - 10));
    188 
    189       // Shift mantissa and discard implicit '1'.
    190       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
    191       mantissa &= (1 << kFloatMantissaBits) - 1;
    192 
    193       // Adjust the exponent for the shift applied, and rebias.
    194       exponent = exponent - shift + (-15 + 127);
    195       break;
    196     }
    197 
    198     case FP_NAN:
    199       if (IsSignallingNaN(value)) {
    200         FPProcessException();
    201       }
    202       if (DN()) return kFP32DefaultNaN;
    203 
    204       // Convert NaNs as the processor would:
    205       //  - The sign is propagated.
    206       //  - The payload (mantissa) is transferred entirely, except that the top
    207       //    bit is forced to '1', making the result a quiet NaN. The unused
    208       //    (low-order) payload bits are set to 0.
    209       exponent = (1 << kFloatExponentBits) - 1;
    210 
    211       // Increase bits in mantissa, making low-order bits 0.
    212       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    213       mantissa |= 1 << 22;  // Force a quiet NaN.
    214       break;
    215 
    216     case FP_NORMAL:
    217       // Increase bits in mantissa, making low-order bits 0.
    218       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    219 
    220       // Change exponent bias.
    221       exponent += (-15 + 127);
    222       break;
    223 
    224     default: VIXL_UNREACHABLE();
    225   }
    226   return rawbits_to_float((sign << 31) |
    227                           (exponent << kFloatMantissaBits) |
    228                           mantissa);
    229 }
    230 
    231 
    232 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
    233   // Only the FPTieEven rounding mode is implemented.
    234   VIXL_ASSERT(round_mode == FPTieEven);
    235   USE(round_mode);
    236 
    237   uint32_t raw = float_to_rawbits(value);
    238   int32_t sign = raw >> 31;
    239   int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
    240   uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
    241 
    242   switch (std::fpclassify(value)) {
    243     case FP_NAN: {
    244       if (IsSignallingNaN(value)) {
    245         FPProcessException();
    246       }
    247       if (DN()) return kFP16DefaultNaN;
    248 
    249       // Convert NaNs as the processor would:
    250       //  - The sign is propagated.
    251       //  - The payload (mantissa) is transferred as much as possible, except
    252       //    that the top bit is forced to '1', making the result a quiet NaN.
    253       float16 result = (sign == 0) ? kFP16PositiveInfinity
    254                                    : kFP16NegativeInfinity;
    255       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
    256       result |= (1 << 9);  // Force a quiet NaN;
    257       return result;
    258     }
    259 
    260     case FP_ZERO:
    261       return (sign == 0) ? 0 : 0x8000;
    262 
    263     case FP_INFINITE:
    264       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    265 
    266     case FP_NORMAL:
    267     case FP_SUBNORMAL: {
    268       // Convert float-to-half as the processor would, assuming that FPCR.FZ
    269       // (flush-to-zero) is not set.
    270 
    271       // Add the implicit '1' bit to the mantissa.
    272       mantissa += (1 << 23);
    273       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    274     }
    275   }
    276 
    277   VIXL_UNREACHABLE();
    278   return 0;
    279 }
    280 
    281 
    282 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
    283   // Only the FPTieEven rounding mode is implemented.
    284   VIXL_ASSERT(round_mode == FPTieEven);
    285   USE(round_mode);
    286 
    287   uint64_t raw = double_to_rawbits(value);
    288   int32_t sign = raw >> 63;
    289   int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
    290   uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
    291 
    292   switch (std::fpclassify(value)) {
    293     case FP_NAN: {
    294       if (IsSignallingNaN(value)) {
    295         FPProcessException();
    296       }
    297       if (DN()) return kFP16DefaultNaN;
    298 
    299       // Convert NaNs as the processor would:
    300       //  - The sign is propagated.
    301       //  - The payload (mantissa) is transferred as much as possible, except
    302       //    that the top bit is forced to '1', making the result a quiet NaN.
    303       float16 result = (sign == 0) ? kFP16PositiveInfinity
    304                                    : kFP16NegativeInfinity;
    305       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
    306       result |= (1 << 9);  // Force a quiet NaN;
    307       return result;
    308     }
    309 
    310     case FP_ZERO:
    311       return (sign == 0) ? 0 : 0x8000;
    312 
    313     case FP_INFINITE:
    314       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    315 
    316     case FP_NORMAL:
    317     case FP_SUBNORMAL: {
    318       // Convert double-to-half as the processor would, assuming that FPCR.FZ
    319       // (flush-to-zero) is not set.
    320 
    321       // Add the implicit '1' bit to the mantissa.
    322       mantissa += (UINT64_C(1) << 52);
    323       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    324     }
    325   }
    326 
    327   VIXL_UNREACHABLE();
    328   return 0;
    329 }
    330 
    331 
    332 float Simulator::FPToFloat(double value, FPRounding round_mode) {
    333   // Only the FPTieEven rounding mode is implemented.
    334   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
    335   USE(round_mode);
    336 
    337   switch (std::fpclassify(value)) {
    338     case FP_NAN: {
    339       if (IsSignallingNaN(value)) {
    340         FPProcessException();
    341       }
    342       if (DN()) return kFP32DefaultNaN;
    343 
    344       // Convert NaNs as the processor would:
    345       //  - The sign is propagated.
    346       //  - The payload (mantissa) is transferred as much as possible, except
    347       //    that the top bit is forced to '1', making the result a quiet NaN.
    348       uint64_t raw = double_to_rawbits(value);
    349 
    350       uint32_t sign = raw >> 63;
    351       uint32_t exponent = (1 << 8) - 1;
    352       uint32_t payload =
    353           static_cast<uint32_t>(unsigned_bitextract_64(50, 52 - 23, raw));
    354       payload |= (1 << 22);   // Force a quiet NaN.
    355 
    356       return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
    357     }
    358 
    359     case FP_ZERO:
    360     case FP_INFINITE: {
    361       // In a C++ cast, any value representable in the target type will be
    362       // unchanged. This is always the case for +/-0.0 and infinities.
    363       return static_cast<float>(value);
    364     }
    365 
    366     case FP_NORMAL:
    367     case FP_SUBNORMAL: {
    368       // Convert double-to-float as the processor would, assuming that FPCR.FZ
    369       // (flush-to-zero) is not set.
    370       uint64_t raw = double_to_rawbits(value);
    371       // Extract the IEEE-754 double components.
    372       uint32_t sign = raw >> 63;
    373       // Extract the exponent and remove the IEEE-754 encoding bias.
    374       int32_t exponent =
    375           static_cast<int32_t>(unsigned_bitextract_64(62, 52, raw)) - 1023;
    376       // Extract the mantissa and add the implicit '1' bit.
    377       uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
    378       if (std::fpclassify(value) == FP_NORMAL) {
    379         mantissa |= (UINT64_C(1) << 52);
    380       }
    381       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
    382     }
    383   }
    384 
    385   VIXL_UNREACHABLE();
    386   return value;
    387 }
    388 
    389 
    390 void Simulator::ld1(VectorFormat vform,
    391                     LogicVRegister dst,
    392                     uint64_t addr) {
    393   dst.ClearForWrite(vform);
    394   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    395     dst.ReadUintFromMem(vform, i, addr);
    396     addr += LaneSizeInBytesFromFormat(vform);
    397   }
    398 }
    399 
    400 
    401 void Simulator::ld1(VectorFormat vform,
    402                     LogicVRegister dst,
    403                     int index,
    404                     uint64_t addr) {
    405   dst.ReadUintFromMem(vform, index, addr);
    406 }
    407 
    408 
    409 void Simulator::ld1r(VectorFormat vform,
    410                      LogicVRegister dst,
    411                      uint64_t addr) {
    412   dst.ClearForWrite(vform);
    413   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    414     dst.ReadUintFromMem(vform, i, addr);
    415   }
    416 }
    417 
    418 
    419 void Simulator::ld2(VectorFormat vform,
    420                     LogicVRegister dst1,
    421                     LogicVRegister dst2,
    422                     uint64_t addr1) {
    423   dst1.ClearForWrite(vform);
    424   dst2.ClearForWrite(vform);
    425   int esize = LaneSizeInBytesFromFormat(vform);
    426   uint64_t addr2 = addr1 + esize;
    427   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    428     dst1.ReadUintFromMem(vform, i, addr1);
    429     dst2.ReadUintFromMem(vform, i, addr2);
    430     addr1 += 2 * esize;
    431     addr2 += 2 * esize;
    432   }
    433 }
    434 
    435 
    436 void Simulator::ld2(VectorFormat vform,
    437                     LogicVRegister dst1,
    438                     LogicVRegister dst2,
    439                     int index,
    440                     uint64_t addr1) {
    441   dst1.ClearForWrite(vform);
    442   dst2.ClearForWrite(vform);
    443   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    444   dst1.ReadUintFromMem(vform, index, addr1);
    445   dst2.ReadUintFromMem(vform, index, addr2);
    446 }
    447 
    448 
    449 void Simulator::ld2r(VectorFormat vform,
    450                      LogicVRegister dst1,
    451                      LogicVRegister dst2,
    452                      uint64_t addr) {
    453   dst1.ClearForWrite(vform);
    454   dst2.ClearForWrite(vform);
    455   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    457     dst1.ReadUintFromMem(vform, i, addr);
    458     dst2.ReadUintFromMem(vform, i, addr2);
    459   }
    460 }
    461 
    462 
    463 void Simulator::ld3(VectorFormat vform,
    464                     LogicVRegister dst1,
    465                     LogicVRegister dst2,
    466                     LogicVRegister dst3,
    467                     uint64_t addr1) {
    468   dst1.ClearForWrite(vform);
    469   dst2.ClearForWrite(vform);
    470   dst3.ClearForWrite(vform);
    471   int esize = LaneSizeInBytesFromFormat(vform);
    472   uint64_t addr2 = addr1 + esize;
    473   uint64_t addr3 = addr2 + esize;
    474   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    475     dst1.ReadUintFromMem(vform, i, addr1);
    476     dst2.ReadUintFromMem(vform, i, addr2);
    477     dst3.ReadUintFromMem(vform, i, addr3);
    478     addr1 += 3 * esize;
    479     addr2 += 3 * esize;
    480     addr3 += 3 * esize;
    481   }
    482 }
    483 
    484 
    485 void Simulator::ld3(VectorFormat vform,
    486                     LogicVRegister dst1,
    487                     LogicVRegister dst2,
    488                     LogicVRegister dst3,
    489                     int index,
    490                     uint64_t addr1) {
    491   dst1.ClearForWrite(vform);
    492   dst2.ClearForWrite(vform);
    493   dst3.ClearForWrite(vform);
    494   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    495   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    496   dst1.ReadUintFromMem(vform, index, addr1);
    497   dst2.ReadUintFromMem(vform, index, addr2);
    498   dst3.ReadUintFromMem(vform, index, addr3);
    499 }
    500 
    501 
    502 void Simulator::ld3r(VectorFormat vform,
    503                      LogicVRegister dst1,
    504                      LogicVRegister dst2,
    505                      LogicVRegister dst3,
    506                      uint64_t addr) {
    507   dst1.ClearForWrite(vform);
    508   dst2.ClearForWrite(vform);
    509   dst3.ClearForWrite(vform);
    510   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    511   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    512   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    513     dst1.ReadUintFromMem(vform, i, addr);
    514     dst2.ReadUintFromMem(vform, i, addr2);
    515     dst3.ReadUintFromMem(vform, i, addr3);
    516   }
    517 }
    518 
    519 
    520 void Simulator::ld4(VectorFormat vform,
    521                     LogicVRegister dst1,
    522                     LogicVRegister dst2,
    523                     LogicVRegister dst3,
    524                     LogicVRegister dst4,
    525                     uint64_t addr1) {
    526   dst1.ClearForWrite(vform);
    527   dst2.ClearForWrite(vform);
    528   dst3.ClearForWrite(vform);
    529   dst4.ClearForWrite(vform);
    530   int esize = LaneSizeInBytesFromFormat(vform);
    531   uint64_t addr2 = addr1 + esize;
    532   uint64_t addr3 = addr2 + esize;
    533   uint64_t addr4 = addr3 + esize;
    534   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    535     dst1.ReadUintFromMem(vform, i, addr1);
    536     dst2.ReadUintFromMem(vform, i, addr2);
    537     dst3.ReadUintFromMem(vform, i, addr3);
    538     dst4.ReadUintFromMem(vform, i, addr4);
    539     addr1 += 4 * esize;
    540     addr2 += 4 * esize;
    541     addr3 += 4 * esize;
    542     addr4 += 4 * esize;
    543   }
    544 }
    545 
    546 
    547 void Simulator::ld4(VectorFormat vform,
    548                     LogicVRegister dst1,
    549                     LogicVRegister dst2,
    550                     LogicVRegister dst3,
    551                     LogicVRegister dst4,
    552                     int index,
    553                     uint64_t addr1) {
    554   dst1.ClearForWrite(vform);
    555   dst2.ClearForWrite(vform);
    556   dst3.ClearForWrite(vform);
    557   dst4.ClearForWrite(vform);
    558   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    559   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    560   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    561   dst1.ReadUintFromMem(vform, index, addr1);
    562   dst2.ReadUintFromMem(vform, index, addr2);
    563   dst3.ReadUintFromMem(vform, index, addr3);
    564   dst4.ReadUintFromMem(vform, index, addr4);
    565 }
    566 
    567 
    568 void Simulator::ld4r(VectorFormat vform,
    569                      LogicVRegister dst1,
    570                      LogicVRegister dst2,
    571                      LogicVRegister dst3,
    572                      LogicVRegister dst4,
    573                      uint64_t addr) {
    574   dst1.ClearForWrite(vform);
    575   dst2.ClearForWrite(vform);
    576   dst3.ClearForWrite(vform);
    577   dst4.ClearForWrite(vform);
    578   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    579   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    580   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    581   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    582     dst1.ReadUintFromMem(vform, i, addr);
    583     dst2.ReadUintFromMem(vform, i, addr2);
    584     dst3.ReadUintFromMem(vform, i, addr3);
    585     dst4.ReadUintFromMem(vform, i, addr4);
    586   }
    587 }
    588 
    589 
    590 void Simulator::st1(VectorFormat vform,
    591                     LogicVRegister src,
    592                     uint64_t addr) {
    593   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    594     src.WriteUintToMem(vform, i, addr);
    595     addr += LaneSizeInBytesFromFormat(vform);
    596   }
    597 }
    598 
    599 
    600 void Simulator::st1(VectorFormat vform,
    601                     LogicVRegister src,
    602                     int index,
    603                     uint64_t addr) {
    604   src.WriteUintToMem(vform, index, addr);
    605 }
    606 
    607 
    608 void Simulator::st2(VectorFormat vform,
    609                     LogicVRegister dst,
    610                     LogicVRegister dst2,
    611                     uint64_t addr) {
    612   int esize = LaneSizeInBytesFromFormat(vform);
    613   uint64_t addr2 = addr + esize;
    614   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    615     dst.WriteUintToMem(vform, i, addr);
    616     dst2.WriteUintToMem(vform, i, addr2);
    617     addr += 2 * esize;
    618     addr2 += 2 * esize;
    619   }
    620 }
    621 
    622 
    623 void Simulator::st2(VectorFormat vform,
    624                     LogicVRegister dst,
    625                     LogicVRegister dst2,
    626                     int index,
    627                     uint64_t addr) {
    628   int esize = LaneSizeInBytesFromFormat(vform);
    629   dst.WriteUintToMem(vform, index, addr);
    630   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    631 }
    632 
    633 
    634 void Simulator::st3(VectorFormat vform,
    635                     LogicVRegister dst,
    636                     LogicVRegister dst2,
    637                     LogicVRegister dst3,
    638                     uint64_t addr) {
    639   int esize = LaneSizeInBytesFromFormat(vform);
    640   uint64_t addr2 = addr + esize;
    641   uint64_t addr3 = addr2 + esize;
    642   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    643     dst.WriteUintToMem(vform, i, addr);
    644     dst2.WriteUintToMem(vform, i, addr2);
    645     dst3.WriteUintToMem(vform, i, addr3);
    646     addr += 3 * esize;
    647     addr2 += 3 * esize;
    648     addr3 += 3 * esize;
    649   }
    650 }
    651 
    652 
    653 void Simulator::st3(VectorFormat vform,
    654                     LogicVRegister dst,
    655                     LogicVRegister dst2,
    656                     LogicVRegister dst3,
    657                     int index,
    658                     uint64_t addr) {
    659   int esize = LaneSizeInBytesFromFormat(vform);
    660   dst.WriteUintToMem(vform, index, addr);
    661   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    662   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    663 }
    664 
    665 
    666 void Simulator::st4(VectorFormat vform,
    667                     LogicVRegister dst,
    668                     LogicVRegister dst2,
    669                     LogicVRegister dst3,
    670                     LogicVRegister dst4,
    671                     uint64_t addr) {
    672   int esize = LaneSizeInBytesFromFormat(vform);
    673   uint64_t addr2 = addr + esize;
    674   uint64_t addr3 = addr2 + esize;
    675   uint64_t addr4 = addr3 + esize;
    676   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    677     dst.WriteUintToMem(vform, i, addr);
    678     dst2.WriteUintToMem(vform, i, addr2);
    679     dst3.WriteUintToMem(vform, i, addr3);
    680     dst4.WriteUintToMem(vform, i, addr4);
    681     addr += 4 * esize;
    682     addr2 += 4 * esize;
    683     addr3 += 4 * esize;
    684     addr4 += 4 * esize;
    685   }
    686 }
    687 
    688 
    689 void Simulator::st4(VectorFormat vform,
    690                     LogicVRegister dst,
    691                     LogicVRegister dst2,
    692                     LogicVRegister dst3,
    693                     LogicVRegister dst4,
    694                     int index,
    695                     uint64_t addr) {
    696   int esize = LaneSizeInBytesFromFormat(vform);
    697   dst.WriteUintToMem(vform, index, addr);
    698   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    699   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    700   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
    701 }
    702 
    703 
    704 LogicVRegister Simulator::cmp(VectorFormat vform,
    705                               LogicVRegister dst,
    706                               const LogicVRegister& src1,
    707                               const LogicVRegister& src2,
    708                               Condition cond) {
    709   dst.ClearForWrite(vform);
    710   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    711     int64_t  sa = src1.Int(vform, i);
    712     int64_t  sb = src2.Int(vform, i);
    713     uint64_t ua = src1.Uint(vform, i);
    714     uint64_t ub = src2.Uint(vform, i);
    715     bool result = false;
    716     switch (cond) {
    717       case eq: result = (ua == ub); break;
    718       case ge: result = (sa >= sb); break;
    719       case gt: result = (sa > sb) ; break;
    720       case hi: result = (ua > ub) ; break;
    721       case hs: result = (ua >= ub); break;
    722       case lt: result = (sa < sb) ; break;
    723       case le: result = (sa <= sb); break;
    724       default: VIXL_UNREACHABLE(); break;
    725     }
    726     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
    727   }
    728   return dst;
    729 }
    730 
    731 
    732 LogicVRegister Simulator::cmp(VectorFormat vform,
    733                               LogicVRegister dst,
    734                               const LogicVRegister& src1,
    735                               int imm,
    736                               Condition cond) {
    737   SimVRegister temp;
    738   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
    739   return cmp(vform, dst, src1, imm_reg, cond);
    740 }
    741 
    742 
    743 LogicVRegister Simulator::cmptst(VectorFormat vform,
    744                                  LogicVRegister dst,
    745                                  const LogicVRegister& src1,
    746                                  const LogicVRegister& src2) {
    747   dst.ClearForWrite(vform);
    748   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    749     uint64_t ua = src1.Uint(vform, i);
    750     uint64_t ub = src2.Uint(vform, i);
    751     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
    752   }
    753   return dst;
    754 }
    755 
    756 
    757 LogicVRegister Simulator::add(VectorFormat vform,
    758                               LogicVRegister dst,
    759                               const LogicVRegister& src1,
    760                               const LogicVRegister& src2) {
    761   dst.ClearForWrite(vform);
    762   // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
    763   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    764     // Test for unsigned saturation.
    765     uint64_t ua = src1.UintLeftJustified(vform, i);
    766     uint64_t ub = src2.UintLeftJustified(vform, i);
    767     uint64_t ur = ua + ub;
    768     if (ur < ua) {
    769       dst.SetUnsignedSat(i, true);
    770     }
    771 
    772     // Test for signed saturation.
    773     int64_t sa = src1.IntLeftJustified(vform, i);
    774     int64_t sb = src2.IntLeftJustified(vform, i);
    775     int64_t sr = sa + sb;
    776     // If the signs of the operands are the same, but different from the result,
    777     // there was an overflow.
    778     if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
    779       dst.SetSignedSat(i, sa >= 0);
    780     }
    781 
    782     dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
    783   }
    784   return dst;
    785 }
    786 
    787 
    788 LogicVRegister Simulator::addp(VectorFormat vform,
    789                                LogicVRegister dst,
    790                                const LogicVRegister& src1,
    791                                const LogicVRegister& src2) {
    792   SimVRegister temp1, temp2;
    793   uzp1(vform, temp1, src1, src2);
    794   uzp2(vform, temp2, src1, src2);
    795   add(vform, dst, temp1, temp2);
    796   return dst;
    797 }
    798 
    799 
    800 LogicVRegister Simulator::mla(VectorFormat vform,
    801                               LogicVRegister dst,
    802                               const LogicVRegister& src1,
    803                               const LogicVRegister& src2) {
    804   SimVRegister temp;
    805   mul(vform, temp, src1, src2);
    806   add(vform, dst, dst, temp);
    807   return dst;
    808 }
    809 
    810 
    811 LogicVRegister Simulator::mls(VectorFormat vform,
    812                               LogicVRegister dst,
    813                               const LogicVRegister& src1,
    814                               const LogicVRegister& src2) {
    815   SimVRegister temp;
    816   mul(vform, temp, src1, src2);
    817   sub(vform, dst, dst, temp);
    818   return dst;
    819 }
    820 
    821 
    822 LogicVRegister Simulator::mul(VectorFormat vform,
    823                               LogicVRegister dst,
    824                               const LogicVRegister& src1,
    825                               const LogicVRegister& src2) {
    826   dst.ClearForWrite(vform);
    827   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    828     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
    829   }
    830   return dst;
    831 }
    832 
    833 
    834 LogicVRegister Simulator::mul(VectorFormat vform,
    835                               LogicVRegister dst,
    836                               const LogicVRegister& src1,
    837                               const LogicVRegister& src2,
    838                               int index) {
    839   SimVRegister temp;
    840   VectorFormat indexform = VectorFormatFillQ(vform);
    841   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
    842 }
    843 
    844 
    845 LogicVRegister Simulator::mla(VectorFormat vform,
    846                               LogicVRegister dst,
    847                               const LogicVRegister& src1,
    848                               const LogicVRegister& src2,
    849                               int index) {
    850   SimVRegister temp;
    851   VectorFormat indexform = VectorFormatFillQ(vform);
    852   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
    853 }
    854 
    855 
    856 LogicVRegister Simulator::mls(VectorFormat vform,
    857                               LogicVRegister dst,
    858                               const LogicVRegister& src1,
    859                               const LogicVRegister& src2,
    860                               int index) {
    861   SimVRegister temp;
    862   VectorFormat indexform = VectorFormatFillQ(vform);
    863   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
    864 }
    865 
    866 
    867 LogicVRegister Simulator::smull(VectorFormat vform,
    868                                 LogicVRegister dst,
    869                                 const LogicVRegister& src1,
    870                                 const LogicVRegister& src2,
    871                                 int index) {
    872   SimVRegister temp;
    873   VectorFormat indexform =
    874                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    875   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    876 }
    877 
    878 
    879 LogicVRegister Simulator::smull2(VectorFormat vform,
    880                                 LogicVRegister dst,
    881                                 const LogicVRegister& src1,
    882                                 const LogicVRegister& src2,
    883                                 int index) {
    884   SimVRegister temp;
    885   VectorFormat indexform =
    886                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    887   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    888 }
    889 
    890 
    891 LogicVRegister Simulator::umull(VectorFormat vform,
    892                                 LogicVRegister dst,
    893                                 const LogicVRegister& src1,
    894                                 const LogicVRegister& src2,
    895                                 int index) {
    896   SimVRegister temp;
    897   VectorFormat indexform =
    898                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    899   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    900 }
    901 
    902 
    903 LogicVRegister Simulator::umull2(VectorFormat vform,
    904                                 LogicVRegister dst,
    905                                 const LogicVRegister& src1,
    906                                 const LogicVRegister& src2,
    907                                 int index) {
    908   SimVRegister temp;
    909   VectorFormat indexform =
    910                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    911   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    912 }
    913 
    914 
    915 LogicVRegister Simulator::smlal(VectorFormat vform,
    916                                 LogicVRegister dst,
    917                                 const LogicVRegister& src1,
    918                                 const LogicVRegister& src2,
    919                                 int index) {
    920   SimVRegister temp;
    921   VectorFormat indexform =
    922                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    923   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    924 }
    925 
    926 
    927 LogicVRegister Simulator::smlal2(VectorFormat vform,
    928                                 LogicVRegister dst,
    929                                 const LogicVRegister& src1,
    930                                 const LogicVRegister& src2,
    931                                 int index) {
    932   SimVRegister temp;
    933   VectorFormat indexform =
    934                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    935   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    936 }
    937 
    938 
    939 LogicVRegister Simulator::umlal(VectorFormat vform,
    940                                 LogicVRegister dst,
    941                                 const LogicVRegister& src1,
    942                                 const LogicVRegister& src2,
    943                                 int index) {
    944   SimVRegister temp;
    945   VectorFormat indexform =
    946                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    947   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    948 }
    949 
    950 
    951 LogicVRegister Simulator::umlal2(VectorFormat vform,
    952                                 LogicVRegister dst,
    953                                 const LogicVRegister& src1,
    954                                 const LogicVRegister& src2,
    955                                 int index) {
    956   SimVRegister temp;
    957   VectorFormat indexform =
    958                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    959   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    960 }
    961 
    962 
    963 LogicVRegister Simulator::smlsl(VectorFormat vform,
    964                                 LogicVRegister dst,
    965                                 const LogicVRegister& src1,
    966                                 const LogicVRegister& src2,
    967                                 int index) {
    968   SimVRegister temp;
    969   VectorFormat indexform =
    970                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    971   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    972 }
    973 
    974 
    975 LogicVRegister Simulator::smlsl2(VectorFormat vform,
    976                                 LogicVRegister dst,
    977                                 const LogicVRegister& src1,
    978                                 const LogicVRegister& src2,
    979                                 int index) {
    980   SimVRegister temp;
    981   VectorFormat indexform =
    982                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    983   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    984 }
    985 
    986 
    987 LogicVRegister Simulator::umlsl(VectorFormat vform,
    988                                 LogicVRegister dst,
    989                                 const LogicVRegister& src1,
    990                                 const LogicVRegister& src2,
    991                                 int index) {
    992   SimVRegister temp;
    993   VectorFormat indexform =
    994                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    995   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    996 }
    997 
    998 
    999 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   1000                                 LogicVRegister dst,
   1001                                 const LogicVRegister& src1,
   1002                                 const LogicVRegister& src2,
   1003                                 int index) {
   1004   SimVRegister temp;
   1005   VectorFormat indexform =
   1006                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1007   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1008 }
   1009 
   1010 
   1011 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   1012                                   LogicVRegister dst,
   1013                                   const LogicVRegister& src1,
   1014                                   const LogicVRegister& src2,
   1015                                   int index) {
   1016   SimVRegister temp;
   1017   VectorFormat indexform =
   1018       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1019   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1020 }
   1021 
   1022 
   1023 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   1024                                   LogicVRegister dst,
   1025                                   const LogicVRegister& src1,
   1026                                   const LogicVRegister& src2,
   1027                                   int index) {
   1028   SimVRegister temp;
   1029   VectorFormat indexform =
   1030       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1031   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1032 }
   1033 
   1034 
   1035 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   1036                                   LogicVRegister dst,
   1037                                   const LogicVRegister& src1,
   1038                                   const LogicVRegister& src2,
   1039                                   int index) {
   1040   SimVRegister temp;
   1041   VectorFormat indexform =
   1042       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1043   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1044 }
   1045 
   1046 
   1047 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   1048                                   LogicVRegister dst,
   1049                                   const LogicVRegister& src1,
   1050                                   const LogicVRegister& src2,
   1051                                   int index) {
   1052   SimVRegister temp;
   1053   VectorFormat indexform =
   1054       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1055   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1056 }
   1057 
   1058 
   1059 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   1060                                   LogicVRegister dst,
   1061                                   const LogicVRegister& src1,
   1062                                   const LogicVRegister& src2,
   1063                                   int index) {
   1064   SimVRegister temp;
   1065   VectorFormat indexform =
   1066       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1067   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1068 }
   1069 
   1070 
   1071 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   1072                                   LogicVRegister dst,
   1073                                   const LogicVRegister& src1,
   1074                                   const LogicVRegister& src2,
   1075                                   int index) {
   1076   SimVRegister temp;
   1077   VectorFormat indexform =
   1078       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1079   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1080 }
   1081 
   1082 
   1083 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   1084                                   LogicVRegister dst,
   1085                                   const LogicVRegister& src1,
   1086                                   const LogicVRegister& src2,
   1087                                   int index) {
   1088   SimVRegister temp;
   1089   VectorFormat indexform = VectorFormatFillQ(vform);
   1090   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1091 }
   1092 
   1093 
   1094 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   1095                                   LogicVRegister dst,
   1096                                   const LogicVRegister& src1,
   1097                                   const LogicVRegister& src2,
   1098                                   int index) {
   1099   SimVRegister temp;
   1100   VectorFormat indexform = VectorFormatFillQ(vform);
   1101   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1102 }
   1103 
   1104 
   1105 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
   1106   uint16_t result = 0;
   1107   uint16_t extended_op2 = op2;
   1108   for (int i = 0; i < 8; ++i) {
   1109     if ((op1 >> i) & 1) {
   1110       result = result ^ (extended_op2 << i);
   1111     }
   1112   }
   1113   return result;
   1114 }
   1115 
   1116 
   1117 LogicVRegister Simulator::pmul(VectorFormat vform,
   1118                                LogicVRegister dst,
   1119                                const LogicVRegister& src1,
   1120                                const LogicVRegister& src2) {
   1121   dst.ClearForWrite(vform);
   1122   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1123     dst.SetUint(vform, i,
   1124                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
   1125   }
   1126   return dst;
   1127 }
   1128 
   1129 
   1130 LogicVRegister Simulator::pmull(VectorFormat vform,
   1131                                LogicVRegister dst,
   1132                                const LogicVRegister& src1,
   1133                                const LogicVRegister& src2) {
   1134   VectorFormat vform_src = VectorFormatHalfWidth(vform);
   1135   dst.ClearForWrite(vform);
   1136   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1137     dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
   1138                                          src2.Uint(vform_src, i)));
   1139   }
   1140   return dst;
   1141 }
   1142 
   1143 
   1144 LogicVRegister Simulator::pmull2(VectorFormat vform,
   1145                                 LogicVRegister dst,
   1146                                 const LogicVRegister& src1,
   1147                                 const LogicVRegister& src2) {
   1148   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
   1149   dst.ClearForWrite(vform);
   1150   int lane_count = LaneCountFromFormat(vform);
   1151   for (int i = 0; i < lane_count; i++) {
   1152     dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
   1153                                          src2.Uint(vform_src, lane_count + i)));
   1154   }
   1155   return dst;
   1156 }
   1157 
   1158 
   1159 LogicVRegister Simulator::sub(VectorFormat vform,
   1160                               LogicVRegister dst,
   1161                               const LogicVRegister& src1,
   1162                               const LogicVRegister& src2) {
   1163   dst.ClearForWrite(vform);
   1164   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1165     // Test for unsigned saturation.
   1166     if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
   1167       dst.SetUnsignedSat(i, false);
   1168     }
   1169 
   1170     // Test for signed saturation.
   1171     int64_t sa = src1.IntLeftJustified(vform, i);
   1172     int64_t sb = src2.IntLeftJustified(vform, i);
   1173     int64_t sr = sa - sb;
   1174     // If the signs of the operands are different, and the sign of the first
   1175     // operand doesn't match the result, there was an overflow.
   1176     if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
   1177       dst.SetSignedSat(i, sr < 0);
   1178     }
   1179 
   1180     dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
   1181   }
   1182   return dst;
   1183 }
   1184 
   1185 
   1186 LogicVRegister Simulator::and_(VectorFormat vform,
   1187                                LogicVRegister dst,
   1188                                const LogicVRegister& src1,
   1189                                const LogicVRegister& src2) {
   1190   dst.ClearForWrite(vform);
   1191   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1192     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
   1193   }
   1194   return dst;
   1195 }
   1196 
   1197 
   1198 LogicVRegister Simulator::orr(VectorFormat vform,
   1199                               LogicVRegister dst,
   1200                               const LogicVRegister& src1,
   1201                               const LogicVRegister& src2) {
   1202   dst.ClearForWrite(vform);
   1203   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1204     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
   1205   }
   1206   return dst;
   1207 }
   1208 
   1209 
   1210 LogicVRegister Simulator::orn(VectorFormat vform,
   1211                               LogicVRegister dst,
   1212                               const LogicVRegister& src1,
   1213                               const LogicVRegister& src2) {
   1214   dst.ClearForWrite(vform);
   1215   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1216     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
   1217   }
   1218   return dst;
   1219 }
   1220 
   1221 
   1222 LogicVRegister Simulator::eor(VectorFormat vform,
   1223                               LogicVRegister dst,
   1224                               const LogicVRegister& src1,
   1225                               const LogicVRegister& src2) {
   1226   dst.ClearForWrite(vform);
   1227   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1228     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
   1229   }
   1230   return dst;
   1231 }
   1232 
   1233 
   1234 LogicVRegister Simulator::bic(VectorFormat vform,
   1235                               LogicVRegister dst,
   1236                               const LogicVRegister& src1,
   1237                               const LogicVRegister& src2) {
   1238   dst.ClearForWrite(vform);
   1239   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1240     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
   1241   }
   1242   return dst;
   1243 }
   1244 
   1245 
   1246 LogicVRegister Simulator::bic(VectorFormat vform,
   1247                               LogicVRegister dst,
   1248                               const LogicVRegister& src,
   1249                               uint64_t imm) {
   1250   uint64_t result[16];
   1251   int laneCount = LaneCountFromFormat(vform);
   1252   for (int i = 0; i < laneCount; ++i) {
   1253     result[i] = src.Uint(vform, i) & ~imm;
   1254   }
   1255   dst.ClearForWrite(vform);
   1256   for (int i = 0; i < laneCount; ++i) {
   1257     dst.SetUint(vform, i, result[i]);
   1258   }
   1259   return dst;
   1260 }
   1261 
   1262 
   1263 LogicVRegister Simulator::bif(VectorFormat vform,
   1264                               LogicVRegister dst,
   1265                               const LogicVRegister& src1,
   1266                               const LogicVRegister& src2) {
   1267   dst.ClearForWrite(vform);
   1268   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1269     uint64_t operand1 = dst.Uint(vform, i);
   1270     uint64_t operand2 = ~src2.Uint(vform, i);
   1271     uint64_t operand3 = src1.Uint(vform, i);
   1272     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1273     dst.SetUint(vform, i, result);
   1274   }
   1275   return dst;
   1276 }
   1277 
   1278 
   1279 LogicVRegister Simulator::bit(VectorFormat vform,
   1280                               LogicVRegister dst,
   1281                               const LogicVRegister& src1,
   1282                               const LogicVRegister& src2) {
   1283   dst.ClearForWrite(vform);
   1284   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1285     uint64_t operand1 = dst.Uint(vform, i);
   1286     uint64_t operand2 = src2.Uint(vform, i);
   1287     uint64_t operand3 = src1.Uint(vform, i);
   1288     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1289     dst.SetUint(vform, i, result);
   1290   }
   1291   return dst;
   1292 }
   1293 
   1294 
   1295 LogicVRegister Simulator::bsl(VectorFormat vform,
   1296                               LogicVRegister dst,
   1297                               const LogicVRegister& src1,
   1298                               const LogicVRegister& src2) {
   1299   dst.ClearForWrite(vform);
   1300   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1301     uint64_t operand1 = src2.Uint(vform, i);
   1302     uint64_t operand2 = dst.Uint(vform, i);
   1303     uint64_t operand3 = src1.Uint(vform, i);
   1304     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1305     dst.SetUint(vform, i, result);
   1306   }
   1307   return dst;
   1308 }
   1309 
   1310 
   1311 LogicVRegister Simulator::sminmax(VectorFormat vform,
   1312                                   LogicVRegister dst,
   1313                                   const LogicVRegister& src1,
   1314                                   const LogicVRegister& src2,
   1315                                   bool max) {
   1316   dst.ClearForWrite(vform);
   1317   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1318     int64_t src1_val = src1.Int(vform, i);
   1319     int64_t src2_val = src2.Int(vform, i);
   1320     int64_t dst_val;
   1321     if (max == true) {
   1322       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1323     } else {
   1324       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1325     }
   1326     dst.SetInt(vform, i, dst_val);
   1327   }
   1328   return dst;
   1329 }
   1330 
   1331 
   1332 LogicVRegister Simulator::smax(VectorFormat vform,
   1333                                LogicVRegister dst,
   1334                                const LogicVRegister& src1,
   1335                                const LogicVRegister& src2) {
   1336   return sminmax(vform, dst, src1, src2, true);
   1337 }
   1338 
   1339 
   1340 LogicVRegister Simulator::smin(VectorFormat vform,
   1341                                LogicVRegister dst,
   1342                                const LogicVRegister& src1,
   1343                                const LogicVRegister& src2) {
   1344   return sminmax(vform, dst, src1, src2, false);
   1345 }
   1346 
   1347 
   1348 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
   1349                                    LogicVRegister dst,
   1350                                    int dst_index,
   1351                                    const LogicVRegister& src,
   1352                                    bool max) {
   1353   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1354     int64_t src1_val = src.Int(vform, i);
   1355     int64_t src2_val = src.Int(vform, i + 1);
   1356     int64_t dst_val;
   1357     if (max == true) {
   1358       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1359     } else {
   1360       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1361     }
   1362     dst.SetInt(vform, dst_index + (i >> 1), dst_val);
   1363   }
   1364   return dst;
   1365 }
   1366 
   1367 
   1368 LogicVRegister Simulator::smaxp(VectorFormat vform,
   1369                                 LogicVRegister dst,
   1370                                 const LogicVRegister& src1,
   1371                                 const LogicVRegister& src2) {
   1372   dst.ClearForWrite(vform);
   1373   sminmaxp(vform, dst, 0, src1, true);
   1374   sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
   1375   return dst;
   1376 }
   1377 
   1378 
   1379 LogicVRegister Simulator::sminp(VectorFormat vform,
   1380                                 LogicVRegister dst,
   1381                                 const LogicVRegister& src1,
   1382                                 const LogicVRegister& src2) {
   1383   dst.ClearForWrite(vform);
   1384   sminmaxp(vform, dst, 0, src1, false);
   1385   sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
   1386   return dst;
   1387 }
   1388 
   1389 
   1390 LogicVRegister Simulator::addp(VectorFormat vform,
   1391                                LogicVRegister dst,
   1392                                const LogicVRegister& src) {
   1393   VIXL_ASSERT(vform == kFormatD);
   1394 
   1395   int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
   1396   dst.ClearForWrite(vform);
   1397   dst.SetInt(vform, 0, dst_val);
   1398   return dst;
   1399 }
   1400 
   1401 
   1402 LogicVRegister Simulator::addv(VectorFormat vform,
   1403                                LogicVRegister dst,
   1404                                const LogicVRegister& src) {
   1405   VectorFormat vform_dst
   1406     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
   1407 
   1408 
   1409   int64_t dst_val = 0;
   1410   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1411     dst_val += src.Int(vform, i);
   1412   }
   1413 
   1414   dst.ClearForWrite(vform_dst);
   1415   dst.SetInt(vform_dst, 0, dst_val);
   1416   return dst;
   1417 }
   1418 
   1419 
   1420 LogicVRegister Simulator::saddlv(VectorFormat vform,
   1421                                  LogicVRegister dst,
   1422                                  const LogicVRegister& src) {
   1423   VectorFormat vform_dst
   1424     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1425 
   1426   int64_t dst_val = 0;
   1427   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1428     dst_val += src.Int(vform, i);
   1429   }
   1430 
   1431   dst.ClearForWrite(vform_dst);
   1432   dst.SetInt(vform_dst, 0, dst_val);
   1433   return dst;
   1434 }
   1435 
   1436 
   1437 LogicVRegister Simulator::uaddlv(VectorFormat vform,
   1438                                  LogicVRegister dst,
   1439                                  const LogicVRegister& src) {
   1440   VectorFormat vform_dst
   1441     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1442 
   1443   uint64_t dst_val = 0;
   1444   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1445     dst_val += src.Uint(vform, i);
   1446   }
   1447 
   1448   dst.ClearForWrite(vform_dst);
   1449   dst.SetUint(vform_dst, 0, dst_val);
   1450   return dst;
   1451 }
   1452 
   1453 
   1454 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
   1455                                    LogicVRegister dst,
   1456                                    const LogicVRegister& src,
   1457                                    bool max) {
   1458   dst.ClearForWrite(vform);
   1459   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   1460   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1461     dst.SetInt(vform, i, 0);
   1462     int64_t src_val = src.Int(vform, i);
   1463     if (max == true) {
   1464       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1465     } else {
   1466       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1467     }
   1468   }
   1469   dst.SetInt(vform, 0, dst_val);
   1470   return dst;
   1471 }
   1472 
   1473 
   1474 LogicVRegister Simulator::smaxv(VectorFormat vform,
   1475                                 LogicVRegister dst,
   1476                                 const LogicVRegister& src) {
   1477   sminmaxv(vform, dst, src, true);
   1478   return dst;
   1479 }
   1480 
   1481 
   1482 LogicVRegister Simulator::sminv(VectorFormat vform,
   1483                                 LogicVRegister dst,
   1484                                 const LogicVRegister& src) {
   1485   sminmaxv(vform, dst, src, false);
   1486   return dst;
   1487 }
   1488 
   1489 
   1490 LogicVRegister Simulator::uminmax(VectorFormat vform,
   1491                                   LogicVRegister dst,
   1492                                   const LogicVRegister& src1,
   1493                                   const LogicVRegister& src2,
   1494                                   bool max) {
   1495   dst.ClearForWrite(vform);
   1496   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1497     uint64_t src1_val = src1.Uint(vform, i);
   1498     uint64_t src2_val = src2.Uint(vform, i);
   1499     uint64_t dst_val;
   1500     if (max == true) {
   1501       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1502     } else {
   1503       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1504     }
   1505     dst.SetUint(vform, i, dst_val);
   1506   }
   1507   return dst;
   1508 }
   1509 
   1510 
   1511 LogicVRegister Simulator::umax(VectorFormat vform,
   1512                                LogicVRegister dst,
   1513                                const LogicVRegister& src1,
   1514                                const LogicVRegister& src2) {
   1515   return uminmax(vform, dst, src1, src2, true);
   1516 }
   1517 
   1518 
   1519 LogicVRegister Simulator::umin(VectorFormat vform,
   1520                                LogicVRegister dst,
   1521                                const LogicVRegister& src1,
   1522                                const LogicVRegister& src2) {
   1523   return uminmax(vform, dst, src1, src2, false);
   1524 }
   1525 
   1526 
   1527 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
   1528                                    LogicVRegister dst,
   1529                                    int dst_index,
   1530                                    const LogicVRegister& src,
   1531                                    bool max) {
   1532   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1533     uint64_t src1_val = src.Uint(vform, i);
   1534     uint64_t src2_val = src.Uint(vform, i + 1);
   1535     uint64_t dst_val;
   1536     if (max == true) {
   1537       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1538     } else {
   1539       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1540     }
   1541     dst.SetUint(vform, dst_index + (i >> 1), dst_val);
   1542   }
   1543   return dst;
   1544 }
   1545 
   1546 
   1547 LogicVRegister Simulator::umaxp(VectorFormat vform,
   1548                                 LogicVRegister dst,
   1549                                 const LogicVRegister& src1,
   1550                                 const LogicVRegister& src2) {
   1551   dst.ClearForWrite(vform);
   1552   uminmaxp(vform, dst, 0, src1, true);
   1553   uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
   1554   return dst;
   1555 }
   1556 
   1557 
   1558 LogicVRegister Simulator::uminp(VectorFormat vform,
   1559                                 LogicVRegister dst,
   1560                                 const LogicVRegister& src1,
   1561                                 const LogicVRegister& src2) {
   1562   dst.ClearForWrite(vform);
   1563   uminmaxp(vform, dst, 0, src1, false);
   1564   uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
   1565   return dst;
   1566 }
   1567 
   1568 
   1569 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
   1570                                    LogicVRegister dst,
   1571                                    const LogicVRegister& src,
   1572                                    bool max) {
   1573   dst.ClearForWrite(vform);
   1574   uint64_t dst_val = max ? 0 : UINT64_MAX;
   1575   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1576     dst.SetUint(vform, i, 0);
   1577     uint64_t src_val = src.Uint(vform, i);
   1578     if (max == true) {
   1579       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1580     } else {
   1581       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1582     }
   1583   }
   1584   dst.SetUint(vform, 0, dst_val);
   1585   return dst;
   1586 }
   1587 
   1588 
   1589 LogicVRegister Simulator::umaxv(VectorFormat vform,
   1590                                 LogicVRegister dst,
   1591                                 const LogicVRegister& src) {
   1592   uminmaxv(vform, dst, src, true);
   1593   return dst;
   1594 }
   1595 
   1596 
   1597 LogicVRegister Simulator::uminv(VectorFormat vform,
   1598                                 LogicVRegister dst,
   1599                                 const LogicVRegister& src) {
   1600   uminmaxv(vform, dst, src, false);
   1601   return dst;
   1602 }
   1603 
   1604 
   1605 LogicVRegister Simulator::shl(VectorFormat vform,
   1606                               LogicVRegister dst,
   1607                               const LogicVRegister& src,
   1608                               int shift) {
   1609   VIXL_ASSERT(shift >= 0);
   1610   SimVRegister temp;
   1611   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1612   return ushl(vform, dst, src, shiftreg);
   1613 }
   1614 
   1615 
   1616 LogicVRegister Simulator::sshll(VectorFormat vform,
   1617                                 LogicVRegister dst,
   1618                                 const LogicVRegister& src,
   1619                                 int shift) {
   1620   VIXL_ASSERT(shift >= 0);
   1621   SimVRegister temp1, temp2;
   1622   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1623   LogicVRegister extendedreg = sxtl(vform, temp2, src);
   1624   return sshl(vform, dst, extendedreg, shiftreg);
   1625 }
   1626 
   1627 
   1628 LogicVRegister Simulator::sshll2(VectorFormat vform,
   1629                                  LogicVRegister dst,
   1630                                  const LogicVRegister& src,
   1631                                  int shift) {
   1632   VIXL_ASSERT(shift >= 0);
   1633   SimVRegister temp1, temp2;
   1634   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1635   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
   1636   return sshl(vform, dst, extendedreg, shiftreg);
   1637 }
   1638 
   1639 
   1640 LogicVRegister Simulator::shll(VectorFormat vform,
   1641                                LogicVRegister dst,
   1642                                const LogicVRegister& src) {
   1643   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1644   return sshll(vform, dst, src, shift);
   1645 }
   1646 
   1647 
   1648 LogicVRegister Simulator::shll2(VectorFormat vform,
   1649                                 LogicVRegister dst,
   1650                                 const LogicVRegister& src) {
   1651   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1652   return sshll2(vform, dst, src, shift);
   1653 }
   1654 
   1655 
   1656 LogicVRegister Simulator::ushll(VectorFormat vform,
   1657                                 LogicVRegister dst,
   1658                                 const LogicVRegister& src,
   1659                                 int shift) {
   1660   VIXL_ASSERT(shift >= 0);
   1661   SimVRegister temp1, temp2;
   1662   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1663   LogicVRegister extendedreg = uxtl(vform, temp2, src);
   1664   return ushl(vform, dst, extendedreg, shiftreg);
   1665 }
   1666 
   1667 
   1668 LogicVRegister Simulator::ushll2(VectorFormat vform,
   1669                                  LogicVRegister dst,
   1670                                  const LogicVRegister& src,
   1671                                  int shift) {
   1672   VIXL_ASSERT(shift >= 0);
   1673   SimVRegister temp1, temp2;
   1674   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1675   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
   1676   return ushl(vform, dst, extendedreg, shiftreg);
   1677 }
   1678 
   1679 
   1680 LogicVRegister Simulator::sli(VectorFormat vform,
   1681                               LogicVRegister dst,
   1682                               const LogicVRegister& src,
   1683                               int shift) {
   1684   dst.ClearForWrite(vform);
   1685   int laneCount = LaneCountFromFormat(vform);
   1686   for (int i = 0; i < laneCount; i++) {
   1687     uint64_t src_lane = src.Uint(vform, i);
   1688     uint64_t dst_lane = dst.Uint(vform, i);
   1689     uint64_t shifted = src_lane << shift;
   1690     uint64_t mask = MaxUintFromFormat(vform) << shift;
   1691     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1692   }
   1693   return dst;
   1694 }
   1695 
   1696 
   1697 LogicVRegister Simulator::sqshl(VectorFormat vform,
   1698                                 LogicVRegister dst,
   1699                                 const LogicVRegister& src,
   1700                                 int shift) {
   1701   VIXL_ASSERT(shift >= 0);
   1702   SimVRegister temp;
   1703   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1704   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
   1705 }
   1706 
   1707 
   1708 LogicVRegister Simulator::uqshl(VectorFormat vform,
   1709                                 LogicVRegister dst,
   1710                                 const LogicVRegister& src,
   1711                                 int shift) {
   1712   VIXL_ASSERT(shift >= 0);
   1713   SimVRegister temp;
   1714   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1715   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1716 }
   1717 
   1718 
   1719 LogicVRegister Simulator::sqshlu(VectorFormat vform,
   1720                                  LogicVRegister dst,
   1721                                  const LogicVRegister& src,
   1722                                  int shift) {
   1723   VIXL_ASSERT(shift >= 0);
   1724   SimVRegister temp;
   1725   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1726   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1727 }
   1728 
   1729 
   1730 LogicVRegister Simulator::sri(VectorFormat vform,
   1731                               LogicVRegister dst,
   1732                               const LogicVRegister& src,
   1733                               int shift) {
   1734   dst.ClearForWrite(vform);
   1735   int laneCount = LaneCountFromFormat(vform);
   1736   VIXL_ASSERT((shift > 0) &&
   1737               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
   1738   for (int i = 0; i < laneCount; i++) {
   1739     uint64_t src_lane = src.Uint(vform, i);
   1740     uint64_t dst_lane = dst.Uint(vform, i);
   1741     uint64_t shifted;
   1742     uint64_t mask;
   1743     if (shift == 64) {
   1744       shifted = 0;
   1745       mask = 0;
   1746     } else {
   1747       shifted = src_lane >> shift;
   1748       mask = MaxUintFromFormat(vform) >> shift;
   1749     }
   1750     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1751   }
   1752   return dst;
   1753 }
   1754 
   1755 
   1756 LogicVRegister Simulator::ushr(VectorFormat vform,
   1757                                LogicVRegister dst,
   1758                                const LogicVRegister& src,
   1759                                int shift) {
   1760   VIXL_ASSERT(shift >= 0);
   1761   SimVRegister temp;
   1762   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1763   return ushl(vform, dst, src, shiftreg);
   1764 }
   1765 
   1766 
   1767 LogicVRegister Simulator::sshr(VectorFormat vform,
   1768                                LogicVRegister dst,
   1769                                const LogicVRegister& src,
   1770                                int shift) {
   1771   VIXL_ASSERT(shift >= 0);
   1772   SimVRegister temp;
   1773   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1774   return sshl(vform, dst, src, shiftreg);
   1775 }
   1776 
   1777 
   1778 LogicVRegister Simulator::ssra(VectorFormat vform,
   1779                                LogicVRegister dst,
   1780                                const LogicVRegister& src,
   1781                                int shift) {
   1782   SimVRegister temp;
   1783   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
   1784   return add(vform, dst, dst, shifted_reg);
   1785 }
   1786 
   1787 
   1788 LogicVRegister Simulator::usra(VectorFormat vform,
   1789                                LogicVRegister dst,
   1790                                const LogicVRegister& src,
   1791                                int shift) {
   1792   SimVRegister temp;
   1793   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
   1794   return add(vform, dst, dst, shifted_reg);
   1795 }
   1796 
   1797 
   1798 LogicVRegister Simulator::srsra(VectorFormat vform,
   1799                                 LogicVRegister dst,
   1800                                 const LogicVRegister& src,
   1801                                 int shift) {
   1802   SimVRegister temp;
   1803   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
   1804   return add(vform, dst, dst, shifted_reg);
   1805 }
   1806 
   1807 
   1808 LogicVRegister Simulator::ursra(VectorFormat vform,
   1809                                 LogicVRegister dst,
   1810                                 const LogicVRegister& src,
   1811                                 int shift) {
   1812   SimVRegister temp;
   1813   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
   1814   return add(vform, dst, dst, shifted_reg);
   1815 }
   1816 
   1817 
   1818 LogicVRegister Simulator::cls(VectorFormat vform,
   1819                               LogicVRegister dst,
   1820                               const LogicVRegister& src) {
   1821   uint64_t result[16];
   1822   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1823   int laneCount = LaneCountFromFormat(vform);
   1824   for (int i = 0; i < laneCount; i++) {
   1825     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
   1826   }
   1827 
   1828   dst.ClearForWrite(vform);
   1829   for (int i = 0; i < laneCount; ++i) {
   1830     dst.SetUint(vform, i, result[i]);
   1831   }
   1832   return dst;
   1833 }
   1834 
   1835 
   1836 LogicVRegister Simulator::clz(VectorFormat vform,
   1837                               LogicVRegister dst,
   1838                               const LogicVRegister& src) {
   1839   uint64_t result[16];
   1840   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1841   int laneCount = LaneCountFromFormat(vform);
   1842   for (int i = 0; i < laneCount; i++) {
   1843     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
   1844   }
   1845 
   1846   dst.ClearForWrite(vform);
   1847   for (int i = 0; i < laneCount; ++i) {
   1848     dst.SetUint(vform, i, result[i]);
   1849   }
   1850   return dst;
   1851 }
   1852 
   1853 
   1854 LogicVRegister Simulator::cnt(VectorFormat vform,
   1855                               LogicVRegister dst,
   1856                               const LogicVRegister& src) {
   1857   uint64_t result[16];
   1858   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1859   int laneCount = LaneCountFromFormat(vform);
   1860   for (int i = 0; i < laneCount; i++) {
   1861     uint64_t value = src.Uint(vform, i);
   1862     result[i] = 0;
   1863     for (int j = 0; j < laneSizeInBits; j++) {
   1864       result[i] += (value & 1);
   1865       value >>= 1;
   1866     }
   1867   }
   1868 
   1869   dst.ClearForWrite(vform);
   1870   for (int i = 0; i < laneCount; ++i) {
   1871     dst.SetUint(vform, i, result[i]);
   1872   }
   1873   return dst;
   1874 }
   1875 
   1876 
   1877 LogicVRegister Simulator::sshl(VectorFormat vform,
   1878                                LogicVRegister dst,
   1879                                const LogicVRegister& src1,
   1880                                const LogicVRegister& src2) {
   1881   dst.ClearForWrite(vform);
   1882   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1883     int8_t shift_val = src2.Int(vform, i);
   1884     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
   1885 
   1886     // Set signed saturation state.
   1887     if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
   1888         (lj_src_val != 0)) {
   1889       dst.SetSignedSat(i, lj_src_val >= 0);
   1890     }
   1891 
   1892     // Set unsigned saturation state.
   1893     if (lj_src_val < 0) {
   1894       dst.SetUnsignedSat(i, false);
   1895     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
   1896                (lj_src_val != 0)) {
   1897       dst.SetUnsignedSat(i, true);
   1898     }
   1899 
   1900     int64_t src_val = src1.Int(vform, i);
   1901     if (shift_val > 63) {
   1902       dst.SetInt(vform, i, 0);
   1903     } else if (shift_val < -63) {
   1904       dst.SetRounding(i, src_val < 0);
   1905       dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
   1906     } else {
   1907       if (shift_val < 0) {
   1908         // Set rounding state. Rounding only needed on right shifts.
   1909         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1910           dst.SetRounding(i, true);
   1911         }
   1912         src_val >>= -shift_val;
   1913       } else {
   1914         src_val <<= shift_val;
   1915       }
   1916       dst.SetInt(vform, i, src_val);
   1917     }
   1918   }
   1919   return dst;
   1920 }
   1921 
   1922 
   1923 LogicVRegister Simulator::ushl(VectorFormat vform,
   1924                                LogicVRegister dst,
   1925                                const LogicVRegister& src1,
   1926                                const LogicVRegister& src2) {
   1927   dst.ClearForWrite(vform);
   1928   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1929     int8_t shift_val = src2.Int(vform, i);
   1930     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
   1931 
   1932     // Set saturation state.
   1933     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
   1934       dst.SetUnsignedSat(i, true);
   1935     }
   1936 
   1937     uint64_t src_val = src1.Uint(vform, i);
   1938     if ((shift_val > 63) || (shift_val < -64)) {
   1939       dst.SetUint(vform, i, 0);
   1940     } else {
   1941       if (shift_val < 0) {
   1942         // Set rounding state. Rounding only needed on right shifts.
   1943         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1944           dst.SetRounding(i, true);
   1945         }
   1946 
   1947         if (shift_val == -64) {
   1948           src_val = 0;
   1949         } else {
   1950           src_val >>= -shift_val;
   1951         }
   1952       } else {
   1953         src_val <<= shift_val;
   1954       }
   1955       dst.SetUint(vform, i, src_val);
   1956     }
   1957   }
   1958   return dst;
   1959 }
   1960 
   1961 
   1962 LogicVRegister Simulator::neg(VectorFormat vform,
   1963                               LogicVRegister dst,
   1964                               const LogicVRegister& src) {
   1965   dst.ClearForWrite(vform);
   1966   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1967     // Test for signed saturation.
   1968     int64_t sa = src.Int(vform, i);
   1969     if (sa == MinIntFromFormat(vform)) {
   1970       dst.SetSignedSat(i, true);
   1971     }
   1972     dst.SetInt(vform, i, -sa);
   1973   }
   1974   return dst;
   1975 }
   1976 
   1977 
   1978 LogicVRegister Simulator::suqadd(VectorFormat vform,
   1979                                  LogicVRegister dst,
   1980                                  const LogicVRegister& src) {
   1981   dst.ClearForWrite(vform);
   1982   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1983     int64_t  sa = dst.IntLeftJustified(vform, i);
   1984     uint64_t ub = src.UintLeftJustified(vform, i);
   1985     int64_t  sr = sa + ub;
   1986 
   1987     if (sr < sa) {  // Test for signed positive saturation.
   1988       dst.SetInt(vform, i, MaxIntFromFormat(vform));
   1989     } else {
   1990       dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
   1991     }
   1992   }
   1993   return dst;
   1994 }
   1995 
   1996 
   1997 LogicVRegister Simulator::usqadd(VectorFormat vform,
   1998                                  LogicVRegister dst,
   1999                                  const LogicVRegister& src) {
   2000   dst.ClearForWrite(vform);
   2001   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2002     uint64_t  ua = dst.UintLeftJustified(vform, i);
   2003     int64_t   sb = src.IntLeftJustified(vform, i);
   2004     uint64_t  ur = ua + sb;
   2005 
   2006     if ((sb > 0) && (ur <= ua)) {
   2007       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
   2008     } else if ((sb < 0) && (ur >= ua)) {
   2009       dst.SetUint(vform, i, 0);                         // Negative saturation.
   2010     } else {
   2011       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
   2012     }
   2013   }
   2014   return dst;
   2015 }
   2016 
   2017 
   2018 LogicVRegister Simulator::abs(VectorFormat vform,
   2019                               LogicVRegister dst,
   2020                               const LogicVRegister& src) {
   2021   dst.ClearForWrite(vform);
   2022   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2023     // Test for signed saturation.
   2024     int64_t sa = src.Int(vform, i);
   2025     if (sa == MinIntFromFormat(vform)) {
   2026       dst.SetSignedSat(i, true);
   2027     }
   2028     if (sa < 0) {
   2029       dst.SetInt(vform, i, -sa);
   2030     } else {
   2031       dst.SetInt(vform, i, sa);
   2032     }
   2033   }
   2034   return dst;
   2035 }
   2036 
   2037 
   2038 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
   2039                                         LogicVRegister dst,
   2040                                         bool dstIsSigned,
   2041                                         const LogicVRegister& src,
   2042                                         bool srcIsSigned) {
   2043   bool upperhalf = false;
   2044   VectorFormat srcform = kFormatUndefined;
   2045   int64_t  ssrc[8];
   2046   uint64_t usrc[8];
   2047 
   2048   switch (dstform) {
   2049     case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
   2050     case kFormat16B: upperhalf = true;  srcform = kFormat8H; break;
   2051     case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
   2052     case kFormat8H : upperhalf = true;  srcform = kFormat4S; break;
   2053     case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
   2054     case kFormat4S : upperhalf = true;  srcform = kFormat2D; break;
   2055     case kFormatB  : upperhalf = false; srcform = kFormatH;  break;
   2056     case kFormatH  : upperhalf = false; srcform = kFormatS;  break;
   2057     case kFormatS  : upperhalf = false; srcform = kFormatD;  break;
   2058     default:VIXL_UNIMPLEMENTED();
   2059   }
   2060 
   2061   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2062     ssrc[i] = src.Int(srcform, i);
   2063     usrc[i] = src.Uint(srcform, i);
   2064   }
   2065 
   2066   int offset;
   2067   if (upperhalf) {
   2068     offset = LaneCountFromFormat(dstform) / 2;
   2069   } else {
   2070     offset = 0;
   2071     dst.ClearForWrite(dstform);
   2072   }
   2073 
   2074   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2075     // Test for signed saturation
   2076     if (ssrc[i] > MaxIntFromFormat(dstform)) {
   2077       dst.SetSignedSat(offset + i, true);
   2078     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
   2079       dst.SetSignedSat(offset + i, false);
   2080     }
   2081 
   2082     // Test for unsigned saturation
   2083     if (srcIsSigned) {
   2084       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
   2085         dst.SetUnsignedSat(offset + i, true);
   2086       } else if (ssrc[i] < 0) {
   2087         dst.SetUnsignedSat(offset + i, false);
   2088       }
   2089     } else {
   2090       if (usrc[i] > MaxUintFromFormat(dstform)) {
   2091         dst.SetUnsignedSat(offset + i, true);
   2092       }
   2093     }
   2094 
   2095     int64_t result;
   2096     if (srcIsSigned) {
   2097       result = ssrc[i] & MaxUintFromFormat(dstform);
   2098     } else {
   2099       result = usrc[i] & MaxUintFromFormat(dstform);
   2100     }
   2101 
   2102     if (dstIsSigned) {
   2103       dst.SetInt(dstform, offset + i, result);
   2104     } else {
   2105       dst.SetUint(dstform, offset + i, result);
   2106     }
   2107   }
   2108   return dst;
   2109 }
   2110 
   2111 
   2112 LogicVRegister Simulator::xtn(VectorFormat vform,
   2113                               LogicVRegister dst,
   2114                               const LogicVRegister& src) {
   2115   return extractnarrow(vform, dst, true, src, true);
   2116 }
   2117 
   2118 
   2119 LogicVRegister Simulator::sqxtn(VectorFormat vform,
   2120                                 LogicVRegister dst,
   2121                                 const LogicVRegister& src) {
   2122   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
   2123 }
   2124 
   2125 
   2126 LogicVRegister Simulator::sqxtun(VectorFormat vform,
   2127                                  LogicVRegister dst,
   2128                                  const LogicVRegister& src) {
   2129   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
   2130 }
   2131 
   2132 
   2133 LogicVRegister Simulator::uqxtn(VectorFormat vform,
   2134                                 LogicVRegister dst,
   2135                                 const LogicVRegister& src) {
   2136   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
   2137 }
   2138 
   2139 
   2140 LogicVRegister Simulator::absdiff(VectorFormat vform,
   2141                                   LogicVRegister dst,
   2142                                   const LogicVRegister& src1,
   2143                                   const LogicVRegister& src2,
   2144                                   bool issigned) {
   2145   dst.ClearForWrite(vform);
   2146   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2147     if (issigned) {
   2148       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
   2149       sr = sr > 0 ? sr : -sr;
   2150       dst.SetInt(vform, i, sr);
   2151     } else {
   2152       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
   2153       sr = sr > 0 ? sr : -sr;
   2154       dst.SetUint(vform, i, sr);
   2155     }
   2156   }
   2157   return dst;
   2158 }
   2159 
   2160 
   2161 LogicVRegister Simulator::saba(VectorFormat vform,
   2162                                LogicVRegister dst,
   2163                                const LogicVRegister& src1,
   2164                                const LogicVRegister& src2) {
   2165   SimVRegister temp;
   2166   dst.ClearForWrite(vform);
   2167   absdiff(vform, temp, src1, src2, true);
   2168   add(vform, dst, dst, temp);
   2169   return dst;
   2170 }
   2171 
   2172 
   2173 LogicVRegister Simulator::uaba(VectorFormat vform,
   2174                                LogicVRegister dst,
   2175                                const LogicVRegister& src1,
   2176                                const LogicVRegister& src2) {
   2177   SimVRegister temp;
   2178   dst.ClearForWrite(vform);
   2179   absdiff(vform, temp, src1, src2, false);
   2180   add(vform, dst, dst, temp);
   2181   return dst;
   2182 }
   2183 
   2184 
   2185 LogicVRegister Simulator::not_(VectorFormat vform,
   2186                                LogicVRegister dst,
   2187                                const LogicVRegister& src) {
   2188   dst.ClearForWrite(vform);
   2189   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2190     dst.SetUint(vform, i, ~src.Uint(vform, i));
   2191   }
   2192   return dst;
   2193 }
   2194 
   2195 
   2196 LogicVRegister Simulator::rbit(VectorFormat vform,
   2197                                LogicVRegister dst,
   2198                                const LogicVRegister& src) {
   2199   uint64_t result[16];
   2200   int laneCount = LaneCountFromFormat(vform);
   2201   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   2202   uint64_t reversed_value;
   2203   uint64_t value;
   2204   for (int i = 0; i < laneCount; i++) {
   2205     value = src.Uint(vform, i);
   2206     reversed_value = 0;
   2207     for (int j = 0; j < laneSizeInBits; j++) {
   2208       reversed_value = (reversed_value << 1) | (value & 1);
   2209       value >>= 1;
   2210     }
   2211     result[i] = reversed_value;
   2212   }
   2213 
   2214   dst.ClearForWrite(vform);
   2215   for (int i = 0; i < laneCount; ++i) {
   2216     dst.SetUint(vform, i, result[i]);
   2217   }
   2218   return dst;
   2219 }
   2220 
   2221 
   2222 LogicVRegister Simulator::rev(VectorFormat vform,
   2223                               LogicVRegister dst,
   2224                               const LogicVRegister& src,
   2225                               int revSize) {
   2226   uint64_t result[16];
   2227   int laneCount = LaneCountFromFormat(vform);
   2228   int laneSize = LaneSizeInBytesFromFormat(vform);
   2229   int lanesPerLoop =  revSize / laneSize;
   2230   for (int i = 0; i < laneCount; i += lanesPerLoop) {
   2231     for (int j = 0; j < lanesPerLoop; j++) {
   2232       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
   2233     }
   2234   }
   2235   dst.ClearForWrite(vform);
   2236   for (int i = 0; i < laneCount; ++i) {
   2237     dst.SetUint(vform, i, result[i]);
   2238   }
   2239   return dst;
   2240 }
   2241 
   2242 
   2243 LogicVRegister Simulator::rev16(VectorFormat vform,
   2244                                 LogicVRegister dst,
   2245                                 const LogicVRegister& src) {
   2246   return rev(vform, dst, src, 2);
   2247 }
   2248 
   2249 
   2250 LogicVRegister Simulator::rev32(VectorFormat vform,
   2251                                 LogicVRegister dst,
   2252                                 const LogicVRegister& src) {
   2253   return rev(vform, dst, src, 4);
   2254 }
   2255 
   2256 
   2257 LogicVRegister Simulator::rev64(VectorFormat vform,
   2258                                 LogicVRegister dst,
   2259                                 const LogicVRegister& src) {
   2260   return rev(vform, dst, src, 8);
   2261 }
   2262 
   2263 
   2264 LogicVRegister Simulator::addlp(VectorFormat vform,
   2265                                  LogicVRegister dst,
   2266                                  const LogicVRegister& src,
   2267                                  bool is_signed,
   2268                                  bool do_accumulate) {
   2269   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
   2270 
   2271   int64_t  sr[16];
   2272   uint64_t ur[16];
   2273 
   2274   int laneCount = LaneCountFromFormat(vform);
   2275   for (int i = 0; i < laneCount; ++i) {
   2276     if (is_signed) {
   2277       sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
   2278     } else {
   2279       ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
   2280     }
   2281   }
   2282 
   2283   dst.ClearForWrite(vform);
   2284   for (int i = 0; i < laneCount; ++i) {
   2285     if (do_accumulate) {
   2286       if (is_signed) {
   2287         dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
   2288       } else {
   2289         dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
   2290       }
   2291     } else {
   2292       if (is_signed) {
   2293         dst.SetInt(vform, i, sr[i]);
   2294       } else {
   2295         dst.SetUint(vform, i, ur[i]);
   2296       }
   2297     }
   2298   }
   2299 
   2300   return dst;
   2301 }
   2302 
   2303 
   2304 LogicVRegister Simulator::saddlp(VectorFormat vform,
   2305                                  LogicVRegister dst,
   2306                                  const LogicVRegister& src) {
   2307   return addlp(vform, dst, src, true, false);
   2308 }
   2309 
   2310 
   2311 LogicVRegister Simulator::uaddlp(VectorFormat vform,
   2312                                  LogicVRegister dst,
   2313                                  const LogicVRegister& src) {
   2314   return addlp(vform, dst, src, false, false);
   2315 }
   2316 
   2317 
   2318 LogicVRegister Simulator::sadalp(VectorFormat vform,
   2319                                  LogicVRegister dst,
   2320                                  const LogicVRegister& src) {
   2321   return addlp(vform, dst, src, true, true);
   2322 }
   2323 
   2324 
   2325 LogicVRegister Simulator::uadalp(VectorFormat vform,
   2326                                  LogicVRegister dst,
   2327                                  const LogicVRegister& src) {
   2328   return addlp(vform, dst, src, false, true);
   2329 }
   2330 
   2331 
   2332 LogicVRegister Simulator::ext(VectorFormat vform,
   2333                               LogicVRegister dst,
   2334                               const LogicVRegister& src1,
   2335                               const LogicVRegister& src2,
   2336                               int index) {
   2337   uint8_t result[16];
   2338   int laneCount = LaneCountFromFormat(vform);
   2339   for (int i = 0; i < laneCount - index; ++i) {
   2340     result[i] = src1.Uint(vform, i + index);
   2341   }
   2342   for (int i = 0; i < index; ++i) {
   2343     result[laneCount - index + i] = src2.Uint(vform, i);
   2344   }
   2345   dst.ClearForWrite(vform);
   2346   for (int i = 0; i < laneCount; ++i) {
   2347     dst.SetUint(vform, i, result[i]);
   2348   }
   2349   return dst;
   2350 }
   2351 
   2352 
   2353 LogicVRegister Simulator::dup_element(VectorFormat vform,
   2354                                       LogicVRegister dst,
   2355                                       const LogicVRegister& src,
   2356                                       int src_index) {
   2357   int laneCount = LaneCountFromFormat(vform);
   2358   uint64_t value = src.Uint(vform, src_index);
   2359   dst.ClearForWrite(vform);
   2360   for (int i = 0; i < laneCount; ++i) {
   2361     dst.SetUint(vform, i, value);
   2362   }
   2363   return dst;
   2364 }
   2365 
   2366 
   2367 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
   2368                                         LogicVRegister dst,
   2369                                         uint64_t imm) {
   2370   int laneCount = LaneCountFromFormat(vform);
   2371   uint64_t value = imm & MaxUintFromFormat(vform);
   2372   dst.ClearForWrite(vform);
   2373   for (int i = 0; i < laneCount; ++i) {
   2374     dst.SetUint(vform, i, value);
   2375   }
   2376   return dst;
   2377 }
   2378 
   2379 
   2380 LogicVRegister Simulator::ins_element(VectorFormat vform,
   2381                                       LogicVRegister dst,
   2382                                       int dst_index,
   2383                                       const LogicVRegister& src,
   2384                                       int src_index) {
   2385   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
   2386   return dst;
   2387 }
   2388 
   2389 
   2390 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
   2391                                         LogicVRegister dst,
   2392                                         int dst_index,
   2393                                         uint64_t imm) {
   2394   uint64_t value = imm & MaxUintFromFormat(vform);
   2395   dst.SetUint(vform, dst_index, value);
   2396   return dst;
   2397 }
   2398 
   2399 
   2400 LogicVRegister Simulator::movi(VectorFormat vform,
   2401                                LogicVRegister dst,
   2402                                uint64_t imm) {
   2403   int laneCount = LaneCountFromFormat(vform);
   2404   dst.ClearForWrite(vform);
   2405   for (int i = 0; i < laneCount; ++i) {
   2406     dst.SetUint(vform, i, imm);
   2407   }
   2408   return dst;
   2409 }
   2410 
   2411 
   2412 LogicVRegister Simulator::mvni(VectorFormat vform,
   2413                                LogicVRegister dst,
   2414                                uint64_t imm) {
   2415   int laneCount = LaneCountFromFormat(vform);
   2416   dst.ClearForWrite(vform);
   2417   for (int i = 0; i < laneCount; ++i) {
   2418     dst.SetUint(vform, i, ~imm);
   2419   }
   2420   return dst;
   2421 }
   2422 
   2423 
   2424 LogicVRegister Simulator::orr(VectorFormat vform,
   2425                               LogicVRegister dst,
   2426                               const LogicVRegister& src,
   2427                               uint64_t imm) {
   2428   uint64_t result[16];
   2429   int laneCount = LaneCountFromFormat(vform);
   2430   for (int i = 0; i < laneCount; ++i) {
   2431     result[i] = src.Uint(vform, i) | imm;
   2432   }
   2433   dst.ClearForWrite(vform);
   2434   for (int i = 0; i < laneCount; ++i) {
   2435     dst.SetUint(vform, i, result[i]);
   2436   }
   2437   return dst;
   2438 }
   2439 
   2440 
   2441 LogicVRegister Simulator::uxtl(VectorFormat vform,
   2442                                LogicVRegister dst,
   2443                                const LogicVRegister& src) {
   2444   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2445 
   2446   dst.ClearForWrite(vform);
   2447   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2448     dst.SetUint(vform, i, src.Uint(vform_half, i));
   2449   }
   2450   return dst;
   2451 }
   2452 
   2453 
   2454 LogicVRegister Simulator::sxtl(VectorFormat vform,
   2455                                LogicVRegister dst,
   2456                                const LogicVRegister& src) {
   2457   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2458 
   2459   dst.ClearForWrite(vform);
   2460   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2461     dst.SetInt(vform, i, src.Int(vform_half, i));
   2462   }
   2463   return dst;
   2464 }
   2465 
   2466 
   2467 LogicVRegister Simulator::uxtl2(VectorFormat vform,
   2468                                 LogicVRegister dst,
   2469                                 const LogicVRegister& src) {
   2470   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2471   int lane_count = LaneCountFromFormat(vform);
   2472 
   2473   dst.ClearForWrite(vform);
   2474   for (int i = 0; i < lane_count; i++) {
   2475     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
   2476   }
   2477   return dst;
   2478 }
   2479 
   2480 
   2481 LogicVRegister Simulator::sxtl2(VectorFormat vform,
   2482                                 LogicVRegister dst,
   2483                                 const LogicVRegister& src) {
   2484   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2485   int lane_count = LaneCountFromFormat(vform);
   2486 
   2487   dst.ClearForWrite(vform);
   2488   for (int i = 0; i < lane_count; i++) {
   2489     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
   2490   }
   2491   return dst;
   2492 }
   2493 
   2494 
   2495 LogicVRegister Simulator::shrn(VectorFormat vform,
   2496                                LogicVRegister dst,
   2497                                const LogicVRegister& src,
   2498                                int shift) {
   2499   SimVRegister temp;
   2500   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
   2501   VectorFormat vform_dst = vform;
   2502   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
   2503   return extractnarrow(vform_dst, dst, false, shifted_src, false);
   2504 }
   2505 
   2506 
   2507 LogicVRegister Simulator::shrn2(VectorFormat vform,
   2508                                 LogicVRegister dst,
   2509                                 const LogicVRegister& src,
   2510                                 int shift) {
   2511   SimVRegister temp;
   2512   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2513   VectorFormat vformdst = vform;
   2514   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
   2515   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2516 }
   2517 
   2518 
   2519 LogicVRegister Simulator::rshrn(VectorFormat vform,
   2520                                 LogicVRegister dst,
   2521                                 const LogicVRegister& src,
   2522                                 int shift) {
   2523   SimVRegister temp;
   2524   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2525   VectorFormat vformdst = vform;
   2526   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2527   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2528 }
   2529 
   2530 
   2531 LogicVRegister Simulator::rshrn2(VectorFormat vform,
   2532                                  LogicVRegister dst,
   2533                                  const LogicVRegister& src,
   2534                                  int shift) {
   2535   SimVRegister temp;
   2536   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2537   VectorFormat vformdst = vform;
   2538   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2539   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2540 }
   2541 
   2542 
   2543 LogicVRegister Simulator::tbl(VectorFormat vform,
   2544                               LogicVRegister dst,
   2545                               const LogicVRegister& tab,
   2546                               const LogicVRegister& ind) {
   2547     movi(vform, dst, 0);
   2548     return tbx(vform, dst, tab, ind);
   2549 }
   2550 
   2551 
   2552 LogicVRegister Simulator::tbl(VectorFormat vform,
   2553                               LogicVRegister dst,
   2554                               const LogicVRegister& tab,
   2555                               const LogicVRegister& tab2,
   2556                               const LogicVRegister& ind) {
   2557     movi(vform, dst, 0);
   2558     return tbx(vform, dst, tab, tab2, ind);
   2559 }
   2560 
   2561 
   2562 LogicVRegister Simulator::tbl(VectorFormat vform,
   2563                               LogicVRegister dst,
   2564                               const LogicVRegister& tab,
   2565                               const LogicVRegister& tab2,
   2566                               const LogicVRegister& tab3,
   2567                               const LogicVRegister& ind) {
   2568     movi(vform, dst, 0);
   2569     return tbx(vform, dst, tab, tab2, tab3, ind);
   2570 }
   2571 
   2572 
   2573 LogicVRegister Simulator::tbl(VectorFormat vform,
   2574                               LogicVRegister dst,
   2575                               const LogicVRegister& tab,
   2576                               const LogicVRegister& tab2,
   2577                               const LogicVRegister& tab3,
   2578                               const LogicVRegister& tab4,
   2579                               const LogicVRegister& ind) {
   2580     movi(vform, dst, 0);
   2581     return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
   2582 }
   2583 
   2584 
   2585 LogicVRegister Simulator::tbx(VectorFormat vform,
   2586                               LogicVRegister dst,
   2587                               const LogicVRegister& tab,
   2588                               const LogicVRegister& ind) {
   2589   dst.ClearForWrite(vform);
   2590   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2591     uint64_t j = ind.Uint(vform, i);
   2592     switch (j >> 4) {
   2593       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2594     }
   2595   }
   2596   return dst;
   2597 }
   2598 
   2599 
   2600 LogicVRegister Simulator::tbx(VectorFormat vform,
   2601                               LogicVRegister dst,
   2602                               const LogicVRegister& tab,
   2603                               const LogicVRegister& tab2,
   2604                               const LogicVRegister& ind) {
   2605   dst.ClearForWrite(vform);
   2606   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2607     uint64_t j = ind.Uint(vform, i);
   2608     switch (j >> 4) {
   2609       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2610       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2611     }
   2612   }
   2613   return dst;
   2614 }
   2615 
   2616 
   2617 LogicVRegister Simulator::tbx(VectorFormat vform,
   2618                               LogicVRegister dst,
   2619                               const LogicVRegister& tab,
   2620                               const LogicVRegister& tab2,
   2621                               const LogicVRegister& tab3,
   2622                               const LogicVRegister& ind) {
   2623   dst.ClearForWrite(vform);
   2624   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2625     uint64_t j = ind.Uint(vform, i);
   2626     switch (j >> 4) {
   2627       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2628       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2629       case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
   2630     }
   2631   }
   2632   return dst;
   2633 }
   2634 
   2635 
   2636 LogicVRegister Simulator::tbx(VectorFormat vform,
   2637                               LogicVRegister dst,
   2638                               const LogicVRegister& tab,
   2639                               const LogicVRegister& tab2,
   2640                               const LogicVRegister& tab3,
   2641                               const LogicVRegister& tab4,
   2642                               const LogicVRegister& ind) {
   2643   dst.ClearForWrite(vform);
   2644   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2645     uint64_t j = ind.Uint(vform, i);
   2646     switch (j >> 4) {
   2647       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2648       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2649       case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
   2650       case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
   2651     }
   2652   }
   2653   return dst;
   2654 }
   2655 
   2656 
   2657 LogicVRegister Simulator::uqshrn(VectorFormat vform,
   2658                                  LogicVRegister dst,
   2659                                  const LogicVRegister& src,
   2660                                  int shift) {
   2661   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2662 }
   2663 
   2664 
   2665 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
   2666                                   LogicVRegister dst,
   2667                                   const LogicVRegister& src,
   2668                                   int shift) {
   2669   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2670 }
   2671 
   2672 
   2673 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
   2674                                   LogicVRegister dst,
   2675                                   const LogicVRegister& src,
   2676                                   int shift) {
   2677   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2678 }
   2679 
   2680 
   2681 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
   2682                                    LogicVRegister dst,
   2683                                    const LogicVRegister& src,
   2684                                    int shift) {
   2685   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2686 }
   2687 
   2688 
   2689 LogicVRegister Simulator::sqshrn(VectorFormat vform,
   2690                                  LogicVRegister dst,
   2691                                  const LogicVRegister& src,
   2692                                  int shift) {
   2693   SimVRegister temp;
   2694   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2695   VectorFormat vformdst = vform;
   2696   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2697   return sqxtn(vformdst, dst, shifted_src);
   2698 }
   2699 
   2700 
   2701 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
   2702                                   LogicVRegister dst,
   2703                                   const LogicVRegister& src,
   2704                                   int shift) {
   2705   SimVRegister temp;
   2706   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2707   VectorFormat vformdst = vform;
   2708   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2709   return sqxtn(vformdst, dst, shifted_src);
   2710 }
   2711 
   2712 
   2713 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
   2714                                   LogicVRegister dst,
   2715                                   const LogicVRegister& src,
   2716                                   int shift) {
   2717   SimVRegister temp;
   2718   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2719   VectorFormat vformdst = vform;
   2720   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2721   return sqxtn(vformdst, dst, shifted_src);
   2722 }
   2723 
   2724 
   2725 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
   2726                                    LogicVRegister dst,
   2727                                    const LogicVRegister& src,
   2728                                    int shift) {
   2729   SimVRegister temp;
   2730   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2731   VectorFormat vformdst = vform;
   2732   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2733   return sqxtn(vformdst, dst, shifted_src);
   2734 }
   2735 
   2736 
   2737 LogicVRegister Simulator::sqshrun(VectorFormat vform,
   2738                                   LogicVRegister dst,
   2739                                   const LogicVRegister& src,
   2740                                   int shift) {
   2741   SimVRegister temp;
   2742   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2743   VectorFormat vformdst = vform;
   2744   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2745   return sqxtun(vformdst, dst, shifted_src);
   2746 }
   2747 
   2748 
   2749 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
   2750                                    LogicVRegister dst,
   2751                                    const LogicVRegister& src,
   2752                                    int shift) {
   2753   SimVRegister temp;
   2754   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2755   VectorFormat vformdst = vform;
   2756   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2757   return sqxtun(vformdst, dst, shifted_src);
   2758 }
   2759 
   2760 
   2761 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
   2762                                    LogicVRegister dst,
   2763                                    const LogicVRegister& src,
   2764                                    int shift) {
   2765   SimVRegister temp;
   2766   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2767   VectorFormat vformdst = vform;
   2768   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2769   return sqxtun(vformdst, dst, shifted_src);
   2770 }
   2771 
   2772 
   2773 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
   2774                                     LogicVRegister dst,
   2775                                     const LogicVRegister& src,
   2776                                     int shift) {
   2777   SimVRegister temp;
   2778   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2779   VectorFormat vformdst = vform;
   2780   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2781   return sqxtun(vformdst, dst, shifted_src);
   2782 }
   2783 
   2784 
   2785 LogicVRegister Simulator::uaddl(VectorFormat vform,
   2786                                 LogicVRegister dst,
   2787                                 const LogicVRegister& src1,
   2788                                 const LogicVRegister& src2) {
   2789   SimVRegister temp1, temp2;
   2790   uxtl(vform, temp1, src1);
   2791   uxtl(vform, temp2, src2);
   2792   add(vform, dst, temp1, temp2);
   2793   return dst;
   2794 }
   2795 
   2796 
   2797 LogicVRegister Simulator::uaddl2(VectorFormat vform,
   2798                                  LogicVRegister dst,
   2799                                  const LogicVRegister& src1,
   2800                                  const LogicVRegister& src2) {
   2801   SimVRegister temp1, temp2;
   2802   uxtl2(vform, temp1, src1);
   2803   uxtl2(vform, temp2, src2);
   2804   add(vform, dst, temp1, temp2);
   2805   return dst;
   2806 }
   2807 
   2808 
   2809 LogicVRegister Simulator::uaddw(VectorFormat vform,
   2810                                 LogicVRegister dst,
   2811                                 const LogicVRegister& src1,
   2812                                 const LogicVRegister& src2) {
   2813   SimVRegister temp;
   2814   uxtl(vform, temp, src2);
   2815   add(vform, dst, src1, temp);
   2816   return dst;
   2817 }
   2818 
   2819 
   2820 LogicVRegister Simulator::uaddw2(VectorFormat vform,
   2821                                  LogicVRegister dst,
   2822                                  const LogicVRegister& src1,
   2823                                  const LogicVRegister& src2) {
   2824   SimVRegister temp;
   2825   uxtl2(vform, temp, src2);
   2826   add(vform, dst, src1, temp);
   2827   return dst;
   2828 }
   2829 
   2830 
   2831 LogicVRegister Simulator::saddl(VectorFormat vform,
   2832                                 LogicVRegister dst,
   2833                                 const LogicVRegister& src1,
   2834                                 const LogicVRegister& src2) {
   2835   SimVRegister temp1, temp2;
   2836   sxtl(vform, temp1, src1);
   2837   sxtl(vform, temp2, src2);
   2838   add(vform, dst, temp1, temp2);
   2839   return dst;
   2840 }
   2841 
   2842 
   2843 LogicVRegister Simulator::saddl2(VectorFormat vform,
   2844                                  LogicVRegister dst,
   2845                                  const LogicVRegister& src1,
   2846                                  const LogicVRegister& src2) {
   2847   SimVRegister temp1, temp2;
   2848   sxtl2(vform, temp1, src1);
   2849   sxtl2(vform, temp2, src2);
   2850   add(vform, dst, temp1, temp2);
   2851   return dst;
   2852 }
   2853 
   2854 
   2855 LogicVRegister Simulator::saddw(VectorFormat vform,
   2856                                 LogicVRegister dst,
   2857                                 const LogicVRegister& src1,
   2858                                 const LogicVRegister& src2) {
   2859   SimVRegister temp;
   2860   sxtl(vform, temp, src2);
   2861   add(vform, dst, src1, temp);
   2862   return dst;
   2863 }
   2864 
   2865 
   2866 LogicVRegister Simulator::saddw2(VectorFormat vform,
   2867                                  LogicVRegister dst,
   2868                                  const LogicVRegister& src1,
   2869                                  const LogicVRegister& src2) {
   2870   SimVRegister temp;
   2871   sxtl2(vform, temp, src2);
   2872   add(vform, dst, src1, temp);
   2873   return dst;
   2874 }
   2875 
   2876 
   2877 LogicVRegister Simulator::usubl(VectorFormat vform,
   2878                                 LogicVRegister dst,
   2879                                 const LogicVRegister& src1,
   2880                                 const LogicVRegister& src2) {
   2881   SimVRegister temp1, temp2;
   2882   uxtl(vform, temp1, src1);
   2883   uxtl(vform, temp2, src2);
   2884   sub(vform, dst, temp1, temp2);
   2885   return dst;
   2886 }
   2887 
   2888 
   2889 LogicVRegister Simulator::usubl2(VectorFormat vform,
   2890                                  LogicVRegister dst,
   2891                                  const LogicVRegister& src1,
   2892                                  const LogicVRegister& src2) {
   2893   SimVRegister temp1, temp2;
   2894   uxtl2(vform, temp1, src1);
   2895   uxtl2(vform, temp2, src2);
   2896   sub(vform, dst, temp1, temp2);
   2897   return dst;
   2898 }
   2899 
   2900 
   2901 LogicVRegister Simulator::usubw(VectorFormat vform,
   2902                                 LogicVRegister dst,
   2903                                 const LogicVRegister& src1,
   2904                                 const LogicVRegister& src2) {
   2905   SimVRegister temp;
   2906   uxtl(vform, temp, src2);
   2907   sub(vform, dst, src1, temp);
   2908   return dst;
   2909 }
   2910 
   2911 
   2912 LogicVRegister Simulator::usubw2(VectorFormat vform,
   2913                                  LogicVRegister dst,
   2914                                  const LogicVRegister& src1,
   2915                                  const LogicVRegister& src2) {
   2916   SimVRegister temp;
   2917   uxtl2(vform, temp, src2);
   2918   sub(vform, dst, src1, temp);
   2919   return dst;
   2920 }
   2921 
   2922 
   2923 LogicVRegister Simulator::ssubl(VectorFormat vform,
   2924                                 LogicVRegister dst,
   2925                                 const LogicVRegister& src1,
   2926                                 const LogicVRegister& src2) {
   2927   SimVRegister temp1, temp2;
   2928   sxtl(vform, temp1, src1);
   2929   sxtl(vform, temp2, src2);
   2930   sub(vform, dst, temp1, temp2);
   2931   return dst;
   2932 }
   2933 
   2934 
   2935 LogicVRegister Simulator::ssubl2(VectorFormat vform,
   2936                                  LogicVRegister dst,
   2937                                  const LogicVRegister& src1,
   2938                                  const LogicVRegister& src2) {
   2939   SimVRegister temp1, temp2;
   2940   sxtl2(vform, temp1, src1);
   2941   sxtl2(vform, temp2, src2);
   2942   sub(vform, dst, temp1, temp2);
   2943   return dst;
   2944 }
   2945 
   2946 
   2947 LogicVRegister Simulator::ssubw(VectorFormat vform,
   2948                                 LogicVRegister dst,
   2949                                 const LogicVRegister& src1,
   2950                                 const LogicVRegister& src2) {
   2951   SimVRegister temp;
   2952   sxtl(vform, temp, src2);
   2953   sub(vform, dst, src1, temp);
   2954   return dst;
   2955 }
   2956 
   2957 
   2958 LogicVRegister Simulator::ssubw2(VectorFormat vform,
   2959                                  LogicVRegister dst,
   2960                                  const LogicVRegister& src1,
   2961                                  const LogicVRegister& src2) {
   2962   SimVRegister temp;
   2963   sxtl2(vform, temp, src2);
   2964   sub(vform, dst, src1, temp);
   2965   return dst;
   2966 }
   2967 
   2968 
   2969 LogicVRegister Simulator::uabal(VectorFormat vform,
   2970                                 LogicVRegister dst,
   2971                                 const LogicVRegister& src1,
   2972                                 const LogicVRegister& src2) {
   2973   SimVRegister temp1, temp2;
   2974   uxtl(vform, temp1, src1);
   2975   uxtl(vform, temp2, src2);
   2976   uaba(vform, dst, temp1, temp2);
   2977   return dst;
   2978 }
   2979 
   2980 
   2981 LogicVRegister Simulator::uabal2(VectorFormat vform,
   2982                                  LogicVRegister dst,
   2983                                  const LogicVRegister& src1,
   2984                                  const LogicVRegister& src2) {
   2985   SimVRegister temp1, temp2;
   2986   uxtl2(vform, temp1, src1);
   2987   uxtl2(vform, temp2, src2);
   2988   uaba(vform, dst, temp1, temp2);
   2989   return dst;
   2990 }
   2991 
   2992 
   2993 LogicVRegister Simulator::sabal(VectorFormat vform,
   2994                                 LogicVRegister dst,
   2995                                 const LogicVRegister& src1,
   2996                                 const LogicVRegister& src2) {
   2997   SimVRegister temp1, temp2;
   2998   sxtl(vform, temp1, src1);
   2999   sxtl(vform, temp2, src2);
   3000   saba(vform, dst, temp1, temp2);
   3001   return dst;
   3002 }
   3003 
   3004 
   3005 LogicVRegister Simulator::sabal2(VectorFormat vform,
   3006                                  LogicVRegister dst,
   3007                                  const LogicVRegister& src1,
   3008                                  const LogicVRegister& src2) {
   3009   SimVRegister temp1, temp2;
   3010   sxtl2(vform, temp1, src1);
   3011   sxtl2(vform, temp2, src2);
   3012   saba(vform, dst, temp1, temp2);
   3013   return dst;
   3014 }
   3015 
   3016 
   3017 LogicVRegister Simulator::uabdl(VectorFormat vform,
   3018                                 LogicVRegister dst,
   3019                                 const LogicVRegister& src1,
   3020                                 const LogicVRegister& src2) {
   3021   SimVRegister temp1, temp2;
   3022   uxtl(vform, temp1, src1);
   3023   uxtl(vform, temp2, src2);
   3024   absdiff(vform, dst, temp1, temp2, false);
   3025   return dst;
   3026 }
   3027 
   3028 
   3029 LogicVRegister Simulator::uabdl2(VectorFormat vform,
   3030                                  LogicVRegister dst,
   3031                                  const LogicVRegister& src1,
   3032                                  const LogicVRegister& src2) {
   3033   SimVRegister temp1, temp2;
   3034   uxtl2(vform, temp1, src1);
   3035   uxtl2(vform, temp2, src2);
   3036   absdiff(vform, dst, temp1, temp2, false);
   3037   return dst;
   3038 }
   3039 
   3040 
   3041 LogicVRegister Simulator::sabdl(VectorFormat vform,
   3042                                 LogicVRegister dst,
   3043                                 const LogicVRegister& src1,
   3044                                 const LogicVRegister& src2) {
   3045   SimVRegister temp1, temp2;
   3046   sxtl(vform, temp1, src1);
   3047   sxtl(vform, temp2, src2);
   3048   absdiff(vform, dst, temp1, temp2, true);
   3049   return dst;
   3050 }
   3051 
   3052 
   3053 LogicVRegister Simulator::sabdl2(VectorFormat vform,
   3054                                  LogicVRegister dst,
   3055                                  const LogicVRegister& src1,
   3056                                  const LogicVRegister& src2) {
   3057   SimVRegister temp1, temp2;
   3058   sxtl2(vform, temp1, src1);
   3059   sxtl2(vform, temp2, src2);
   3060   absdiff(vform, dst, temp1, temp2, true);
   3061   return dst;
   3062 }
   3063 
   3064 
   3065 LogicVRegister Simulator::umull(VectorFormat vform,
   3066                                 LogicVRegister dst,
   3067                                 const LogicVRegister& src1,
   3068                                 const LogicVRegister& src2) {
   3069   SimVRegister temp1, temp2;
   3070   uxtl(vform, temp1, src1);
   3071   uxtl(vform, temp2, src2);
   3072   mul(vform, dst, temp1, temp2);
   3073   return dst;
   3074 }
   3075 
   3076 
   3077 LogicVRegister Simulator::umull2(VectorFormat vform,
   3078                                  LogicVRegister dst,
   3079                                  const LogicVRegister& src1,
   3080                                  const LogicVRegister& src2) {
   3081   SimVRegister temp1, temp2;
   3082   uxtl2(vform, temp1, src1);
   3083   uxtl2(vform, temp2, src2);
   3084   mul(vform, dst, temp1, temp2);
   3085   return dst;
   3086 }
   3087 
   3088 
   3089 LogicVRegister Simulator::smull(VectorFormat vform,
   3090                                 LogicVRegister dst,
   3091                                 const LogicVRegister& src1,
   3092                                 const LogicVRegister& src2) {
   3093   SimVRegister temp1, temp2;
   3094   sxtl(vform, temp1, src1);
   3095   sxtl(vform, temp2, src2);
   3096   mul(vform, dst, temp1, temp2);
   3097   return dst;
   3098 }
   3099 
   3100 
   3101 LogicVRegister Simulator::smull2(VectorFormat vform,
   3102                                  LogicVRegister dst,
   3103                                  const LogicVRegister& src1,
   3104                                  const LogicVRegister& src2) {
   3105   SimVRegister temp1, temp2;
   3106   sxtl2(vform, temp1, src1);
   3107   sxtl2(vform, temp2, src2);
   3108   mul(vform, dst, temp1, temp2);
   3109   return dst;
   3110 }
   3111 
   3112 
   3113 LogicVRegister Simulator::umlsl(VectorFormat vform,
   3114                                 LogicVRegister dst,
   3115                                 const LogicVRegister& src1,
   3116                                 const LogicVRegister& src2) {
   3117   SimVRegister temp1, temp2;
   3118   uxtl(vform, temp1, src1);
   3119   uxtl(vform, temp2, src2);
   3120   mls(vform, dst, temp1, temp2);
   3121   return dst;
   3122 }
   3123 
   3124 
   3125 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   3126                                  LogicVRegister dst,
   3127                                  const LogicVRegister& src1,
   3128                                  const LogicVRegister& src2) {
   3129   SimVRegister temp1, temp2;
   3130   uxtl2(vform, temp1, src1);
   3131   uxtl2(vform, temp2, src2);
   3132   mls(vform, dst, temp1, temp2);
   3133   return dst;
   3134 }
   3135 
   3136 
   3137 LogicVRegister Simulator::smlsl(VectorFormat vform,
   3138                                 LogicVRegister dst,
   3139                                 const LogicVRegister& src1,
   3140                                 const LogicVRegister& src2) {
   3141   SimVRegister temp1, temp2;
   3142   sxtl(vform, temp1, src1);
   3143   sxtl(vform, temp2, src2);
   3144   mls(vform, dst, temp1, temp2);
   3145   return dst;
   3146 }
   3147 
   3148 
   3149 LogicVRegister Simulator::smlsl2(VectorFormat vform,
   3150                                  LogicVRegister dst,
   3151                                  const LogicVRegister& src1,
   3152                                  const LogicVRegister& src2) {
   3153   SimVRegister temp1, temp2;
   3154   sxtl2(vform, temp1, src1);
   3155   sxtl2(vform, temp2, src2);
   3156   mls(vform, dst, temp1, temp2);
   3157   return dst;
   3158 }
   3159 
   3160 
   3161 LogicVRegister Simulator::umlal(VectorFormat vform,
   3162                                 LogicVRegister dst,
   3163                                 const LogicVRegister& src1,
   3164                                 const LogicVRegister& src2) {
   3165   SimVRegister temp1, temp2;
   3166   uxtl(vform, temp1, src1);
   3167   uxtl(vform, temp2, src2);
   3168   mla(vform, dst, temp1, temp2);
   3169   return dst;
   3170 }
   3171 
   3172 
   3173 LogicVRegister Simulator::umlal2(VectorFormat vform,
   3174                                  LogicVRegister dst,
   3175                                  const LogicVRegister& src1,
   3176                                  const LogicVRegister& src2) {
   3177   SimVRegister temp1, temp2;
   3178   uxtl2(vform, temp1, src1);
   3179   uxtl2(vform, temp2, src2);
   3180   mla(vform, dst, temp1, temp2);
   3181   return dst;
   3182 }
   3183 
   3184 
   3185 LogicVRegister Simulator::smlal(VectorFormat vform,
   3186                                 LogicVRegister dst,
   3187                                 const LogicVRegister& src1,
   3188                                 const LogicVRegister& src2) {
   3189   SimVRegister temp1, temp2;
   3190   sxtl(vform, temp1, src1);
   3191   sxtl(vform, temp2, src2);
   3192   mla(vform, dst, temp1, temp2);
   3193   return dst;
   3194 }
   3195 
   3196 
   3197 LogicVRegister Simulator::smlal2(VectorFormat vform,
   3198                                  LogicVRegister dst,
   3199                                  const LogicVRegister& src1,
   3200                                  const LogicVRegister& src2) {
   3201   SimVRegister temp1, temp2;
   3202   sxtl2(vform, temp1, src1);
   3203   sxtl2(vform, temp2, src2);
   3204   mla(vform, dst, temp1, temp2);
   3205   return dst;
   3206 }
   3207 
   3208 
   3209 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   3210                                   LogicVRegister dst,
   3211                                   const LogicVRegister& src1,
   3212                                   const LogicVRegister& src2) {
   3213   SimVRegister temp;
   3214   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3215   return add(vform, dst, dst, product).SignedSaturate(vform);
   3216 }
   3217 
   3218 
   3219 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   3220                                   LogicVRegister dst,
   3221                                   const LogicVRegister& src1,
   3222                                   const LogicVRegister& src2) {
   3223   SimVRegister temp;
   3224   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3225   return add(vform, dst, dst, product).SignedSaturate(vform);
   3226 }
   3227 
   3228 
   3229 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   3230                                   LogicVRegister dst,
   3231                                   const LogicVRegister& src1,
   3232                                   const LogicVRegister& src2) {
   3233   SimVRegister temp;
   3234   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3235   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3236 }
   3237 
   3238 
   3239 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   3240                                   LogicVRegister dst,
   3241                                   const LogicVRegister& src1,
   3242                                   const LogicVRegister& src2) {
   3243   SimVRegister temp;
   3244   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3245   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3246 }
   3247 
   3248 
   3249 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   3250                                   LogicVRegister dst,
   3251                                   const LogicVRegister& src1,
   3252                                   const LogicVRegister& src2) {
   3253   SimVRegister temp;
   3254   LogicVRegister product = smull(vform, temp, src1, src2);
   3255   return add(vform, dst, product, product).SignedSaturate(vform);
   3256 }
   3257 
   3258 
   3259 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   3260                                   LogicVRegister dst,
   3261                                   const LogicVRegister& src1,
   3262                                   const LogicVRegister& src2) {
   3263   SimVRegister temp;
   3264   LogicVRegister product = smull2(vform, temp, src1, src2);
   3265   return add(vform, dst, product, product).SignedSaturate(vform);
   3266 }
   3267 
   3268 
   3269 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   3270                                    LogicVRegister dst,
   3271                                    const LogicVRegister& src1,
   3272                                    const LogicVRegister& src2,
   3273                                    bool round) {
   3274   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   3275   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   3276   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   3277 
   3278   int esize = LaneSizeInBitsFromFormat(vform);
   3279   int round_const = round ? (1 << (esize - 2)) : 0;
   3280   int64_t product;
   3281 
   3282   dst.ClearForWrite(vform);
   3283   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3284     product = src1.Int(vform, i) * src2.Int(vform, i);
   3285     product += round_const;
   3286     product = product >> (esize - 1);
   3287 
   3288     if (product > MaxIntFromFormat(vform)) {
   3289       product = MaxIntFromFormat(vform);
   3290     } else if (product < MinIntFromFormat(vform)) {
   3291       product = MinIntFromFormat(vform);
   3292     }
   3293     dst.SetInt(vform, i, product);
   3294   }
   3295   return dst;
   3296 }
   3297 
   3298 
   3299 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   3300                                   LogicVRegister dst,
   3301                                   const LogicVRegister& src1,
   3302                                   const LogicVRegister& src2) {
   3303   return sqrdmulh(vform, dst, src1, src2, false);
   3304 }
   3305 
   3306 
   3307 LogicVRegister Simulator::addhn(VectorFormat vform,
   3308                                 LogicVRegister dst,
   3309                                 const LogicVRegister& src1,
   3310                                 const LogicVRegister& src2) {
   3311   SimVRegister temp;
   3312   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3313   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3314   return dst;
   3315 }
   3316 
   3317 
   3318 LogicVRegister Simulator::addhn2(VectorFormat vform,
   3319                                  LogicVRegister dst,
   3320                                  const LogicVRegister& src1,
   3321                                  const LogicVRegister& src2) {
   3322   SimVRegister temp;
   3323   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3324   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3325   return dst;
   3326 }
   3327 
   3328 
   3329 LogicVRegister Simulator::raddhn(VectorFormat vform,
   3330                                  LogicVRegister dst,
   3331                                  const LogicVRegister& src1,
   3332                                  const LogicVRegister& src2) {
   3333   SimVRegister temp;
   3334   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3335   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3336   return dst;
   3337 }
   3338 
   3339 
   3340 LogicVRegister Simulator::raddhn2(VectorFormat vform,
   3341                                   LogicVRegister dst,
   3342                                   const LogicVRegister& src1,
   3343                                   const LogicVRegister& src2) {
   3344   SimVRegister temp;
   3345   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3346   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3347   return dst;
   3348 }
   3349 
   3350 
   3351 LogicVRegister Simulator::subhn(VectorFormat vform,
   3352                                 LogicVRegister dst,
   3353                                 const LogicVRegister& src1,
   3354                                 const LogicVRegister& src2) {
   3355   SimVRegister temp;
   3356   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3357   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3358   return dst;
   3359 }
   3360 
   3361 
   3362 LogicVRegister Simulator::subhn2(VectorFormat vform,
   3363                                  LogicVRegister dst,
   3364                                  const LogicVRegister& src1,
   3365                                  const LogicVRegister& src2) {
   3366   SimVRegister temp;
   3367   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3368   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3369   return dst;
   3370 }
   3371 
   3372 
   3373 LogicVRegister Simulator::rsubhn(VectorFormat vform,
   3374                                  LogicVRegister dst,
   3375                                  const LogicVRegister& src1,
   3376                                  const LogicVRegister& src2) {
   3377   SimVRegister temp;
   3378   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3379   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3380   return dst;
   3381 }
   3382 
   3383 
   3384 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
   3385                                   LogicVRegister dst,
   3386                                   const LogicVRegister& src1,
   3387                                   const LogicVRegister& src2) {
   3388   SimVRegister temp;
   3389   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3390   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3391   return dst;
   3392 }
   3393 
   3394 
   3395 LogicVRegister Simulator::trn1(VectorFormat vform,
   3396                                LogicVRegister dst,
   3397                                const LogicVRegister& src1,
   3398                                const LogicVRegister& src2) {
   3399   uint64_t result[16];
   3400   int laneCount = LaneCountFromFormat(vform);
   3401   int pairs = laneCount / 2;
   3402   for (int i = 0; i < pairs; ++i) {
   3403     result[2 * i]       = src1.Uint(vform, 2 * i);
   3404     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   3405   }
   3406 
   3407   dst.ClearForWrite(vform);
   3408   for (int i = 0; i < laneCount; ++i) {
   3409     dst.SetUint(vform, i, result[i]);
   3410   }
   3411   return dst;
   3412 }
   3413 
   3414 
   3415 LogicVRegister Simulator::trn2(VectorFormat vform,
   3416                                LogicVRegister dst,
   3417                                const LogicVRegister& src1,
   3418                                const LogicVRegister& src2) {
   3419   uint64_t result[16];
   3420   int laneCount = LaneCountFromFormat(vform);
   3421   int pairs = laneCount / 2;
   3422   for (int i = 0; i < pairs; ++i) {
   3423     result[2 * i]       = src1.Uint(vform, (2 * i) + 1);
   3424     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   3425   }
   3426 
   3427   dst.ClearForWrite(vform);
   3428   for (int i = 0; i < laneCount; ++i) {
   3429     dst.SetUint(vform, i, result[i]);
   3430   }
   3431   return dst;
   3432 }
   3433 
   3434 
   3435 LogicVRegister Simulator::zip1(VectorFormat vform,
   3436                                LogicVRegister dst,
   3437                                const LogicVRegister& src1,
   3438                                const LogicVRegister& src2) {
   3439   uint64_t result[16];
   3440   int laneCount = LaneCountFromFormat(vform);
   3441   int pairs = laneCount / 2;
   3442   for (int i = 0; i < pairs; ++i) {
   3443     result[2 * i]       = src1.Uint(vform, i);
   3444     result[(2 * i) + 1] = src2.Uint(vform, i);
   3445   }
   3446 
   3447   dst.ClearForWrite(vform);
   3448   for (int i = 0; i < laneCount; ++i) {
   3449     dst.SetUint(vform, i, result[i]);
   3450   }
   3451   return dst;
   3452 }
   3453 
   3454 
   3455 LogicVRegister Simulator::zip2(VectorFormat vform,
   3456                                LogicVRegister dst,
   3457                                const LogicVRegister& src1,
   3458                                const LogicVRegister& src2) {
   3459   uint64_t result[16];
   3460   int laneCount = LaneCountFromFormat(vform);
   3461   int pairs = laneCount / 2;
   3462   for (int i = 0; i < pairs; ++i) {
   3463     result[2 * i]       = src1.Uint(vform, pairs + i);
   3464     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   3465   }
   3466 
   3467   dst.ClearForWrite(vform);
   3468   for (int i = 0; i < laneCount; ++i) {
   3469     dst.SetUint(vform, i, result[i]);
   3470   }
   3471   return dst;
   3472 }
   3473 
   3474 
   3475 LogicVRegister Simulator::uzp1(VectorFormat vform,
   3476                                LogicVRegister dst,
   3477                                const LogicVRegister& src1,
   3478                                const LogicVRegister& src2) {
   3479   uint64_t result[32];
   3480   int laneCount = LaneCountFromFormat(vform);
   3481   for (int i = 0; i < laneCount; ++i) {
   3482     result[i]             = src1.Uint(vform, i);
   3483     result[laneCount + i] = src2.Uint(vform, i);
   3484   }
   3485 
   3486   dst.ClearForWrite(vform);
   3487   for (int i = 0; i < laneCount; ++i) {
   3488     dst.SetUint(vform, i, result[2 * i]);
   3489   }
   3490   return dst;
   3491 }
   3492 
   3493 
   3494 LogicVRegister Simulator::uzp2(VectorFormat vform,
   3495                                LogicVRegister dst,
   3496                                const LogicVRegister& src1,
   3497                                const LogicVRegister& src2) {
   3498   uint64_t result[32];
   3499   int laneCount = LaneCountFromFormat(vform);
   3500   for (int i = 0; i < laneCount; ++i) {
   3501     result[i]             = src1.Uint(vform, i);
   3502     result[laneCount + i] = src2.Uint(vform, i);
   3503   }
   3504 
   3505   dst.ClearForWrite(vform);
   3506   for (int i = 0; i < laneCount; ++i) {
   3507     dst.SetUint(vform, i, result[ (2 * i) + 1]);
   3508   }
   3509   return dst;
   3510 }
   3511 
   3512 
   3513 template <typename T>
   3514 T Simulator::FPAdd(T op1, T op2) {
   3515   T result = FPProcessNaNs(op1, op2);
   3516   if (std::isnan(result)) return result;
   3517 
   3518   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
   3519     // inf + -inf returns the default NaN.
   3520     FPProcessException();
   3521     return FPDefaultNaN<T>();
   3522   } else {
   3523     // Other cases should be handled by standard arithmetic.
   3524     return op1 + op2;
   3525   }
   3526 }
   3527 
   3528 
   3529 template <typename T>
   3530 T Simulator::FPSub(T op1, T op2) {
   3531   // NaNs should be handled elsewhere.
   3532   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3533 
   3534   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
   3535     // inf - inf returns the default NaN.
   3536     FPProcessException();
   3537     return FPDefaultNaN<T>();
   3538   } else {
   3539     // Other cases should be handled by standard arithmetic.
   3540     return op1 - op2;
   3541   }
   3542 }
   3543 
   3544 
   3545 template <typename T>
   3546 T Simulator::FPMul(T op1, T op2) {
   3547   // NaNs should be handled elsewhere.
   3548   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3549 
   3550   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3551     // inf * 0.0 returns the default NaN.
   3552     FPProcessException();
   3553     return FPDefaultNaN<T>();
   3554   } else {
   3555     // Other cases should be handled by standard arithmetic.
   3556     return op1 * op2;
   3557   }
   3558 }
   3559 
   3560 
   3561 template<typename T>
   3562 T Simulator::FPMulx(T op1, T op2) {
   3563   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3564     // inf * 0.0 returns +/-2.0.
   3565     T two = 2.0;
   3566     return copysign(1.0, op1) * copysign(1.0, op2) * two;
   3567   }
   3568   return FPMul(op1, op2);
   3569 }
   3570 
   3571 
   3572 template<typename T>
   3573 T Simulator::FPMulAdd(T a, T op1, T op2) {
   3574   T result = FPProcessNaNs3(a, op1, op2);
   3575 
   3576   T sign_a = copysign(1.0, a);
   3577   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
   3578   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
   3579   bool operation_generates_nan =
   3580       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
   3581       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
   3582       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
   3583 
   3584   if (std::isnan(result)) {
   3585     // Generated NaNs override quiet NaNs propagated from a.
   3586     if (operation_generates_nan && IsQuietNaN(a)) {
   3587       FPProcessException();
   3588       return FPDefaultNaN<T>();
   3589     } else {
   3590       return result;
   3591     }
   3592   }
   3593 
   3594   // If the operation would produce a NaN, return the default NaN.
   3595   if (operation_generates_nan) {
   3596     FPProcessException();
   3597     return FPDefaultNaN<T>();
   3598   }
   3599 
   3600   // Work around broken fma implementations for exact zero results: The sign of
   3601   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   3602   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
   3603     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
   3604   }
   3605 
   3606   result = FusedMultiplyAdd(op1, op2, a);
   3607   VIXL_ASSERT(!std::isnan(result));
   3608 
   3609   // Work around broken fma implementations for rounded zero results: If a is
   3610   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   3611   if ((a == 0.0) && (result == 0.0)) {
   3612     return copysign(0.0, sign_prod);
   3613   }
   3614 
   3615   return result;
   3616 }
   3617 
   3618 
   3619 template <typename T>
   3620 T Simulator::FPDiv(T op1, T op2) {
   3621   // NaNs should be handled elsewhere.
   3622   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3623 
   3624   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
   3625     // inf / inf and 0.0 / 0.0 return the default NaN.
   3626     FPProcessException();
   3627     return FPDefaultNaN<T>();
   3628   } else {
   3629     if (op2 == 0.0) FPProcessException();
   3630 
   3631     // Other cases should be handled by standard arithmetic.
   3632     return op1 / op2;
   3633   }
   3634 }
   3635 
   3636 
   3637 template <typename T>
   3638 T Simulator::FPSqrt(T op) {
   3639   if (std::isnan(op)) {
   3640     return FPProcessNaN(op);
   3641   } else if (op < 0.0) {
   3642     FPProcessException();
   3643     return FPDefaultNaN<T>();
   3644   } else {
   3645     return sqrt(op);
   3646   }
   3647 }
   3648 
   3649 
   3650 template <typename T>
   3651 T Simulator::FPMax(T a, T b) {
   3652   T result = FPProcessNaNs(a, b);
   3653   if (std::isnan(result)) return result;
   3654 
   3655   if ((a == 0.0) && (b == 0.0) &&
   3656       (copysign(1.0, a) != copysign(1.0, b))) {
   3657     // a and b are zero, and the sign differs: return +0.0.
   3658     return 0.0;
   3659   } else {
   3660     return (a > b) ? a : b;
   3661   }
   3662 }
   3663 
   3664 
   3665 template <typename T>
   3666 T Simulator::FPMaxNM(T a, T b) {
   3667   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3668     a = kFP64NegativeInfinity;
   3669   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3670     b = kFP64NegativeInfinity;
   3671   }
   3672 
   3673   T result = FPProcessNaNs(a, b);
   3674   return std::isnan(result) ? result : FPMax(a, b);
   3675 }
   3676 
   3677 
   3678 template <typename T>
   3679 T Simulator::FPMin(T a, T b) {
   3680   T result = FPProcessNaNs(a, b);
   3681   if (std::isnan(result)) return result;
   3682 
   3683   if ((a == 0.0) && (b == 0.0) &&
   3684       (copysign(1.0, a) != copysign(1.0, b))) {
   3685     // a and b are zero, and the sign differs: return -0.0.
   3686     return -0.0;
   3687   } else {
   3688     return (a < b) ? a : b;
   3689   }
   3690 }
   3691 
   3692 
   3693 template <typename T>
   3694 T Simulator::FPMinNM(T a, T b) {
   3695   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3696     a = kFP64PositiveInfinity;
   3697   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3698     b = kFP64PositiveInfinity;
   3699   }
   3700 
   3701   T result = FPProcessNaNs(a, b);
   3702   return std::isnan(result) ? result : FPMin(a, b);
   3703 }
   3704 
   3705 
   3706 template <typename T>
   3707 T Simulator::FPRecipStepFused(T op1, T op2) {
   3708   const T two = 2.0;
   3709   if ((std::isinf(op1) && (op2 == 0.0))
   3710       || ((op1 == 0.0) && (std::isinf(op2)))) {
   3711     return two;
   3712   } else if (std::isinf(op1) || std::isinf(op2)) {
   3713     // Return +inf if signs match, otherwise -inf.
   3714     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3715                                           : kFP64NegativeInfinity;
   3716   } else {
   3717     return FusedMultiplyAdd(op1, op2, two);
   3718   }
   3719 }
   3720 
   3721 
   3722 template <typename T>
   3723 T Simulator::FPRSqrtStepFused(T op1, T op2) {
   3724   const T one_point_five = 1.5;
   3725   const T two = 2.0;
   3726 
   3727   if ((std::isinf(op1) && (op2 == 0.0))
   3728       || ((op1 == 0.0) && (std::isinf(op2)))) {
   3729     return one_point_five;
   3730   } else if (std::isinf(op1) || std::isinf(op2)) {
   3731     // Return +inf if signs match, otherwise -inf.
   3732     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3733                                           : kFP64NegativeInfinity;
   3734   } else {
   3735     // The multiply-add-halve operation must be fully fused, so avoid interim
   3736     // rounding by checking which operand can be losslessly divided by two
   3737     // before doing the multiply-add.
   3738     if (std::isnormal(op1 / two)) {
   3739       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
   3740     } else if (std::isnormal(op2 / two)) {
   3741       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
   3742     } else {
   3743       // Neither operand is normal after halving: the result is dominated by
   3744       // the addition term, so just return that.
   3745       return one_point_five;
   3746     }
   3747   }
   3748 }
   3749 
   3750 
   3751 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
   3752   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   3753       (value == kFP64NegativeInfinity)) {
   3754     return value;
   3755   } else if (std::isnan(value)) {
   3756     return FPProcessNaN(value);
   3757   }
   3758 
   3759   double int_result = std::floor(value);
   3760   double error = value - int_result;
   3761   switch (round_mode) {
   3762     case FPTieAway: {
   3763       // Take care of correctly handling the range ]-0.5, -0.0], which must
   3764       // yield -0.0.
   3765       if ((-0.5 < value) && (value < 0.0)) {
   3766         int_result = -0.0;
   3767 
   3768       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
   3769         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3770         // result is positive, round up.
   3771         int_result++;
   3772       }
   3773       break;
   3774     }
   3775     case FPTieEven: {
   3776       // Take care of correctly handling the range [-0.5, -0.0], which must
   3777       // yield -0.0.
   3778       if ((-0.5 <= value) && (value < 0.0)) {
   3779         int_result = -0.0;
   3780 
   3781       // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3782       // result is odd, round up.
   3783       } else if ((error > 0.5) ||
   3784           ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
   3785         int_result++;
   3786       }
   3787       break;
   3788     }
   3789     case FPZero: {
   3790       // If value>0 then we take floor(value)
   3791       // otherwise, ceil(value).
   3792       if (value < 0) {
   3793          int_result = ceil(value);
   3794       }
   3795       break;
   3796     }
   3797     case FPNegativeInfinity: {
   3798       // We always use floor(value).
   3799       break;
   3800     }
   3801     case FPPositiveInfinity: {
   3802       // Take care of correctly handling the range ]-1.0, -0.0], which must
   3803       // yield -0.0.
   3804       if ((-1.0 < value) && (value < 0.0)) {
   3805         int_result = -0.0;
   3806 
   3807       // If the error is non-zero, round up.
   3808       } else if (error > 0.0) {
   3809         int_result++;
   3810       }
   3811       break;
   3812     }
   3813     default: VIXL_UNIMPLEMENTED();
   3814   }
   3815   return int_result;
   3816 }
   3817 
   3818 
   3819 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
   3820   value = FPRoundInt(value, rmode);
   3821   if (value >= kWMaxInt) {
   3822     return kWMaxInt;
   3823   } else if (value < kWMinInt) {
   3824     return kWMinInt;
   3825   }
   3826   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
   3827 }
   3828 
   3829 
   3830 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
   3831   value = FPRoundInt(value, rmode);
   3832   if (value >= kXMaxInt) {
   3833     return kXMaxInt;
   3834   } else if (value < kXMinInt) {
   3835     return kXMinInt;
   3836   }
   3837   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
   3838 }
   3839 
   3840 
   3841 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
   3842   value = FPRoundInt(value, rmode);
   3843   if (value >= kWMaxUInt) {
   3844     return kWMaxUInt;
   3845   } else if (value < 0.0) {
   3846     return 0;
   3847   }
   3848   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
   3849 }
   3850 
   3851 
   3852 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
   3853   value = FPRoundInt(value, rmode);
   3854   if (value >= kXMaxUInt) {
   3855     return kXMaxUInt;
   3856   } else if (value < 0.0) {
   3857     return 0;
   3858   }
   3859   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
   3860 }
   3861 
   3862 
   3863 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
   3864 template <typename T>                                            \
   3865 LogicVRegister Simulator::FN(VectorFormat vform,                 \
   3866                              LogicVRegister dst,                 \
   3867                              const LogicVRegister& src1,         \
   3868                              const LogicVRegister& src2) {       \
   3869   dst.ClearForWrite(vform);                                      \
   3870   for (int i = 0; i < LaneCountFromFormat(vform); i++) {         \
   3871     T op1 = src1.Float<T>(i);                                    \
   3872     T op2 = src2.Float<T>(i);                                    \
   3873     T result;                                                    \
   3874     if (PROCNAN) {                                               \
   3875       result = FPProcessNaNs(op1, op2);                          \
   3876       if (!std::isnan(result)) {                                      \
   3877         result = OP(op1, op2);                                   \
   3878       }                                                          \
   3879     } else {                                                     \
   3880       result = OP(op1, op2);                                     \
   3881     }                                                            \
   3882     dst.SetFloat(i, result);                                     \
   3883   }                                                              \
   3884   return dst;                                                    \
   3885 }                                                                \
   3886                                                                  \
   3887 LogicVRegister Simulator::FN(VectorFormat vform,                 \
   3888                              LogicVRegister dst,                 \
   3889                              const LogicVRegister& src1,         \
   3890                              const LogicVRegister& src2) {       \
   3891   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {            \
   3892     FN<float>(vform, dst, src1, src2);                           \
   3893   } else {                                                       \
   3894     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);   \
   3895     FN<double>(vform, dst, src1, src2);                          \
   3896   }                                                              \
   3897   return dst;                                                    \
   3898 }
   3899 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
   3900 #undef DEFINE_NEON_FP_VECTOR_OP
   3901 
   3902 
   3903 LogicVRegister Simulator::fnmul(VectorFormat vform,
   3904                                 LogicVRegister dst,
   3905                                 const LogicVRegister& src1,
   3906                                 const LogicVRegister& src2) {
   3907   SimVRegister temp;
   3908   LogicVRegister product = fmul(vform, temp, src1, src2);
   3909   return fneg(vform, dst, product);
   3910 }
   3911 
   3912 
   3913 template <typename T>
   3914 LogicVRegister Simulator::frecps(VectorFormat vform,
   3915                                  LogicVRegister dst,
   3916                                  const LogicVRegister& src1,
   3917                                  const LogicVRegister& src2) {
   3918   dst.ClearForWrite(vform);
   3919   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3920     T op1 = -src1.Float<T>(i);
   3921     T op2 = src2.Float<T>(i);
   3922     T result = FPProcessNaNs(op1, op2);
   3923     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
   3924   }
   3925   return dst;
   3926 }
   3927 
   3928 
   3929 LogicVRegister Simulator::frecps(VectorFormat vform,
   3930                                  LogicVRegister dst,
   3931                                  const LogicVRegister& src1,
   3932                                  const LogicVRegister& src2) {
   3933   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   3934     frecps<float>(vform, dst, src1, src2);
   3935   } else {
   3936     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   3937     frecps<double>(vform, dst, src1, src2);
   3938   }
   3939   return dst;
   3940 }
   3941 
   3942 
   3943 template <typename T>
   3944 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   3945                                   LogicVRegister dst,
   3946                                   const LogicVRegister& src1,
   3947                                   const LogicVRegister& src2) {
   3948   dst.ClearForWrite(vform);
   3949   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3950     T op1 = -src1.Float<T>(i);
   3951     T op2 = src2.Float<T>(i);
   3952     T result = FPProcessNaNs(op1, op2);
   3953     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
   3954   }
   3955   return dst;
   3956 }
   3957 
   3958 
   3959 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   3960                                   LogicVRegister dst,
   3961                                   const LogicVRegister& src1,
   3962                                   const LogicVRegister& src2) {
   3963   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   3964     frsqrts<float>(vform, dst, src1, src2);
   3965   } else {
   3966     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   3967     frsqrts<double>(vform, dst, src1, src2);
   3968   }
   3969   return dst;
   3970 }
   3971 
   3972 
   3973 template <typename T>
   3974 LogicVRegister Simulator::fcmp(VectorFormat vform,
   3975                                LogicVRegister dst,
   3976                                const LogicVRegister& src1,
   3977                                const LogicVRegister& src2,
   3978                                Condition cond) {
   3979   dst.ClearForWrite(vform);
   3980   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3981     bool result = false;
   3982     T op1 = src1.Float<T>(i);
   3983     T op2 = src2.Float<T>(i);
   3984     T nan_result = FPProcessNaNs(op1, op2);
   3985     if (!std::isnan(nan_result)) {
   3986       switch (cond) {
   3987         case eq: result = (op1 == op2); break;
   3988         case ge: result = (op1 >= op2); break;
   3989         case gt: result = (op1 > op2) ; break;
   3990         case le: result = (op1 <= op2); break;
   3991         case lt: result = (op1 < op2) ; break;
   3992         default: VIXL_UNREACHABLE(); break;
   3993       }
   3994     }
   3995     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   3996   }
   3997   return dst;
   3998 }
   3999 
   4000 
   4001 LogicVRegister Simulator::fcmp(VectorFormat vform,
   4002                                LogicVRegister dst,
   4003                                const LogicVRegister& src1,
   4004                                const LogicVRegister& src2,
   4005                                Condition cond) {
   4006   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4007     fcmp<float>(vform, dst, src1, src2, cond);
   4008   } else {
   4009     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4010     fcmp<double>(vform, dst, src1, src2, cond);
   4011   }
   4012   return dst;
   4013 }
   4014 
   4015 
   4016 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
   4017                                     LogicVRegister dst,
   4018                                     const LogicVRegister& src,
   4019                                     Condition cond) {
   4020   SimVRegister temp;
   4021   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4022     LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
   4023     fcmp<float>(vform, dst, src, zero_reg, cond);
   4024   } else {
   4025     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4026     LogicVRegister zero_reg = dup_immediate(vform, temp,
   4027                                             double_to_rawbits(0.0));
   4028     fcmp<double>(vform, dst, src, zero_reg, cond);
   4029   }
   4030   return dst;
   4031 }
   4032 
   4033 
   4034 LogicVRegister Simulator::fabscmp(VectorFormat vform,
   4035                                   LogicVRegister dst,
   4036                                   const LogicVRegister& src1,
   4037                                   const LogicVRegister& src2,
   4038                                   Condition cond) {
   4039   SimVRegister temp1, temp2;
   4040   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4041     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
   4042     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
   4043     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
   4044   } else {
   4045     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4046     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
   4047     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
   4048     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
   4049   }
   4050   return dst;
   4051 }
   4052 
   4053 
   4054 template <typename T>
   4055 LogicVRegister Simulator::fmla(VectorFormat vform,
   4056                                LogicVRegister dst,
   4057                                const LogicVRegister& src1,
   4058                                const LogicVRegister& src2) {
   4059   dst.ClearForWrite(vform);
   4060   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4061     T op1 = src1.Float<T>(i);
   4062     T op2 = src2.Float<T>(i);
   4063     T acc = dst.Float<T>(i);
   4064     T result = FPMulAdd(acc, op1, op2);
   4065     dst.SetFloat(i, result);
   4066   }
   4067   return dst;
   4068 }
   4069 
   4070 
   4071 LogicVRegister Simulator::fmla(VectorFormat vform,
   4072                                LogicVRegister dst,
   4073                                const LogicVRegister& src1,
   4074                                const LogicVRegister& src2) {
   4075   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4076     fmla<float>(vform, dst, src1, src2);
   4077   } else {
   4078     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4079     fmla<double>(vform, dst, src1, src2);
   4080   }
   4081   return dst;
   4082 }
   4083 
   4084 
   4085 template <typename T>
   4086 LogicVRegister Simulator::fmls(VectorFormat vform,
   4087                                LogicVRegister dst,
   4088                                const LogicVRegister& src1,
   4089                                const LogicVRegister& src2) {
   4090   dst.ClearForWrite(vform);
   4091   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4092     T op1 = -src1.Float<T>(i);
   4093     T op2 = src2.Float<T>(i);
   4094     T acc = dst.Float<T>(i);
   4095     T result = FPMulAdd(acc, op1, op2);
   4096     dst.SetFloat(i, result);
   4097   }
   4098   return dst;
   4099 }
   4100 
   4101 
   4102 LogicVRegister Simulator::fmls(VectorFormat vform,
   4103                                LogicVRegister dst,
   4104                                const LogicVRegister& src1,
   4105                                const LogicVRegister& src2) {
   4106   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4107     fmls<float>(vform, dst, src1, src2);
   4108   } else {
   4109     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4110     fmls<double>(vform, dst, src1, src2);
   4111   }
   4112   return dst;
   4113 }
   4114 
   4115 
   4116 template <typename T>
   4117 LogicVRegister Simulator::fneg(VectorFormat vform,
   4118                                LogicVRegister dst,
   4119                                const LogicVRegister& src) {
   4120   dst.ClearForWrite(vform);
   4121   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4122     T op = src.Float<T>(i);
   4123     op = -op;
   4124     dst.SetFloat(i, op);
   4125   }
   4126   return dst;
   4127 }
   4128 
   4129 
   4130 LogicVRegister Simulator::fneg(VectorFormat vform,
   4131                                LogicVRegister dst,
   4132                                const LogicVRegister& src) {
   4133   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4134     fneg<float>(vform, dst, src);
   4135   } else {
   4136     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4137     fneg<double>(vform, dst, src);
   4138   }
   4139   return dst;
   4140 }
   4141 
   4142 
   4143 template <typename T>
   4144 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4145                                 LogicVRegister dst,
   4146                                 const LogicVRegister& src) {
   4147   dst.ClearForWrite(vform);
   4148   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4149     T op = src.Float<T>(i);
   4150     if (copysign(1.0, op) < 0.0) {
   4151       op = -op;
   4152     }
   4153     dst.SetFloat(i, op);
   4154   }
   4155   return dst;
   4156 }
   4157 
   4158 
   4159 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4160                                 LogicVRegister dst,
   4161                                 const LogicVRegister& src) {
   4162   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4163     fabs_<float>(vform, dst, src);
   4164   } else {
   4165     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4166     fabs_<double>(vform, dst, src);
   4167   }
   4168   return dst;
   4169 }
   4170 
   4171 
   4172 LogicVRegister Simulator::fabd(VectorFormat vform,
   4173                                LogicVRegister dst,
   4174                                const LogicVRegister& src1,
   4175                                const LogicVRegister& src2) {
   4176   SimVRegister temp;
   4177   fsub(vform, temp, src1, src2);
   4178   fabs_(vform, dst, temp);
   4179   return dst;
   4180 }
   4181 
   4182 
   4183 LogicVRegister Simulator::fsqrt(VectorFormat vform,
   4184                                 LogicVRegister dst,
   4185                                 const LogicVRegister& src) {
   4186   dst.ClearForWrite(vform);
   4187   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4188     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4189       float result = FPSqrt(src.Float<float>(i));
   4190       dst.SetFloat(i, result);
   4191     }
   4192   } else {
   4193     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4194     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4195       double result = FPSqrt(src.Float<double>(i));
   4196       dst.SetFloat(i, result);
   4197     }
   4198   }
   4199   return dst;
   4200 }
   4201 
   4202 
   4203 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                          \
   4204 LogicVRegister Simulator::FNP(VectorFormat vform,                    \
   4205                               LogicVRegister dst,                    \
   4206                               const LogicVRegister& src1,            \
   4207                               const LogicVRegister& src2) {          \
   4208   SimVRegister temp1, temp2;                                         \
   4209   uzp1(vform, temp1, src1, src2);                                    \
   4210   uzp2(vform, temp2, src1, src2);                                    \
   4211   FN(vform, dst, temp1, temp2);                                      \
   4212   return dst;                                                        \
   4213 }                                                                    \
   4214                                                                      \
   4215 LogicVRegister Simulator::FNP(VectorFormat vform,                    \
   4216                               LogicVRegister dst,                    \
   4217                               const LogicVRegister& src) {           \
   4218   if (vform == kFormatS) {                                           \
   4219     float result = OP(src.Float<float>(0), src.Float<float>(1));     \
   4220     dst.SetFloat(0, result);                                         \
   4221   } else {                                                           \
   4222     VIXL_ASSERT(vform == kFormatD);                                  \
   4223     double result = OP(src.Float<double>(0), src.Float<double>(1));  \
   4224     dst.SetFloat(0, result);                                         \
   4225   }                                                                  \
   4226   dst.ClearForWrite(vform);                                          \
   4227   return dst;                                                        \
   4228 }
   4229 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
   4230 #undef DEFINE_NEON_FP_PAIR_OP
   4231 
   4232 
   4233 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
   4234                                    LogicVRegister dst,
   4235                                    const LogicVRegister& src,
   4236                                    FPMinMaxOp Op) {
   4237   VIXL_ASSERT(vform == kFormat4S);
   4238   USE(vform);
   4239   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
   4240   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
   4241   float result = (this->*Op)(result1, result2);
   4242   dst.ClearForWrite(kFormatS);
   4243   dst.SetFloat<float>(0, result);
   4244   return dst;
   4245 }
   4246 
   4247 
   4248 LogicVRegister Simulator::fmaxv(VectorFormat vform,
   4249                                 LogicVRegister dst,
   4250                                 const LogicVRegister& src) {
   4251   return fminmaxv(vform, dst, src, &Simulator::FPMax);
   4252 }
   4253 
   4254 
   4255 LogicVRegister Simulator::fminv(VectorFormat vform,
   4256                                 LogicVRegister dst,
   4257                                 const LogicVRegister& src) {
   4258   return fminmaxv(vform, dst, src, &Simulator::FPMin);
   4259 }
   4260 
   4261 
   4262 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
   4263                                  LogicVRegister dst,
   4264                                  const LogicVRegister& src) {
   4265   return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
   4266 }
   4267 
   4268 
   4269 LogicVRegister Simulator::fminnmv(VectorFormat vform,
   4270                                   LogicVRegister dst,
   4271                                   const LogicVRegister& src) {
   4272   return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
   4273 }
   4274 
   4275 
   4276 LogicVRegister Simulator::fmul(VectorFormat vform,
   4277                                LogicVRegister dst,
   4278                                const LogicVRegister& src1,
   4279                                const LogicVRegister& src2,
   4280                                int index) {
   4281   dst.ClearForWrite(vform);
   4282   SimVRegister temp;
   4283   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4284     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4285     fmul<float>(vform, dst, src1, index_reg);
   4286 
   4287   } else {
   4288     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4289     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4290     fmul<double>(vform, dst, src1, index_reg);
   4291   }
   4292   return dst;
   4293 }
   4294 
   4295 
   4296 LogicVRegister Simulator::fmla(VectorFormat vform,
   4297                                LogicVRegister dst,
   4298                                const LogicVRegister& src1,
   4299                                const LogicVRegister& src2,
   4300                                int index) {
   4301   dst.ClearForWrite(vform);
   4302   SimVRegister temp;
   4303   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4304     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4305     fmla<float>(vform, dst, src1, index_reg);
   4306 
   4307   } else {
   4308     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4309     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4310     fmla<double>(vform, dst, src1, index_reg);
   4311   }
   4312   return dst;
   4313 }
   4314 
   4315 
   4316 LogicVRegister Simulator::fmls(VectorFormat vform,
   4317                                LogicVRegister dst,
   4318                                const LogicVRegister& src1,
   4319                                const LogicVRegister& src2,
   4320                                int index) {
   4321   dst.ClearForWrite(vform);
   4322   SimVRegister temp;
   4323   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4324     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4325     fmls<float>(vform, dst, src1, index_reg);
   4326 
   4327   } else {
   4328     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4329     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4330     fmls<double>(vform, dst, src1, index_reg);
   4331   }
   4332   return dst;
   4333 }
   4334 
   4335 
   4336 LogicVRegister Simulator::fmulx(VectorFormat vform,
   4337                                 LogicVRegister dst,
   4338                                 const LogicVRegister& src1,
   4339                                 const LogicVRegister& src2,
   4340                                 int index) {
   4341   dst.ClearForWrite(vform);
   4342   SimVRegister temp;
   4343   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4344     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4345     fmulx<float>(vform, dst, src1, index_reg);
   4346 
   4347   } else {
   4348     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4349     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4350     fmulx<double>(vform, dst, src1, index_reg);
   4351   }
   4352   return dst;
   4353 }
   4354 
   4355 
   4356 LogicVRegister Simulator::frint(VectorFormat vform,
   4357                                 LogicVRegister dst,
   4358                                 const LogicVRegister& src,
   4359                                 FPRounding rounding_mode,
   4360                                 bool inexact_exception) {
   4361   dst.ClearForWrite(vform);
   4362   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4363     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4364       float input = src.Float<float>(i);
   4365       float rounded = FPRoundInt(input, rounding_mode);
   4366       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4367         FPProcessException();
   4368       }
   4369       dst.SetFloat<float>(i, rounded);
   4370     }
   4371   } else {
   4372     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4373     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4374       double input = src.Float<double>(i);
   4375       double rounded = FPRoundInt(input, rounding_mode);
   4376       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4377         FPProcessException();
   4378       }
   4379       dst.SetFloat<double>(i, rounded);
   4380     }
   4381   }
   4382   return dst;
   4383 }
   4384 
   4385 
   4386 LogicVRegister Simulator::fcvts(VectorFormat vform,
   4387                                 LogicVRegister dst,
   4388                                 const LogicVRegister& src,
   4389                                 FPRounding rounding_mode,
   4390                                 int fbits) {
   4391   dst.ClearForWrite(vform);
   4392   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4393     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4394       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4395       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
   4396     }
   4397   } else {
   4398     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4399     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4400       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4401       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
   4402     }
   4403   }
   4404   return dst;
   4405 }
   4406 
   4407 
   4408 LogicVRegister Simulator::fcvtu(VectorFormat vform,
   4409                                 LogicVRegister dst,
   4410                                 const LogicVRegister& src,
   4411                                 FPRounding rounding_mode,
   4412                                 int fbits) {
   4413   dst.ClearForWrite(vform);
   4414   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4415     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4416       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4417       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
   4418     }
   4419   } else {
   4420     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4421     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4422       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4423       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
   4424     }
   4425   }
   4426   return dst;
   4427 }
   4428 
   4429 
   4430 LogicVRegister Simulator::fcvtl(VectorFormat vform,
   4431                                 LogicVRegister dst,
   4432                                 const LogicVRegister& src) {
   4433   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4434     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4435       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
   4436     }
   4437   } else {
   4438     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4439     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4440       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
   4441     }
   4442   }
   4443   return dst;
   4444 }
   4445 
   4446 
   4447 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
   4448                                  LogicVRegister dst,
   4449                                  const LogicVRegister& src) {
   4450   int lane_count = LaneCountFromFormat(vform);
   4451   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4452     for (int i = 0; i < lane_count; i++) {
   4453       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
   4454     }
   4455   } else {
   4456     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4457     for (int i = 0; i < lane_count; i++) {
   4458       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
   4459     }
   4460   }
   4461   return dst;
   4462 }
   4463 
   4464 
   4465 LogicVRegister Simulator::fcvtn(VectorFormat vform,
   4466                                 LogicVRegister dst,
   4467                                 const LogicVRegister& src) {
   4468   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4469     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4470       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
   4471     }
   4472   } else {
   4473     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4474     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4475       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
   4476     }
   4477   }
   4478   return dst;
   4479 }
   4480 
   4481 
   4482 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
   4483                                  LogicVRegister dst,
   4484                                  const LogicVRegister& src) {
   4485   int lane_count = LaneCountFromFormat(vform) / 2;
   4486   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4487     for (int i = lane_count - 1; i >= 0; i--) {
   4488       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
   4489     }
   4490   } else {
   4491     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4492     for (int i = lane_count - 1; i >= 0; i--) {
   4493       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
   4494     }
   4495   }
   4496   return dst;
   4497 }
   4498 
   4499 
   4500 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
   4501                                  LogicVRegister dst,
   4502                                  const LogicVRegister& src) {
   4503   dst.ClearForWrite(vform);
   4504   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4505   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4506     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4507   }
   4508   return dst;
   4509 }
   4510 
   4511 
   4512 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
   4513                                   LogicVRegister dst,
   4514                                   const LogicVRegister& src) {
   4515   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4516   int lane_count = LaneCountFromFormat(vform) / 2;
   4517   for (int i = lane_count - 1; i >= 0; i--) {
   4518     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4519   }
   4520   return dst;
   4521 }
   4522 
   4523 
   4524 // Based on reference C function recip_sqrt_estimate from ARM ARM.
   4525 double Simulator::recip_sqrt_estimate(double a) {
   4526   int q0, q1, s;
   4527   double r;
   4528   if (a < 0.5) {
   4529     q0 = static_cast<int>(a * 512.0);
   4530     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
   4531   } else  {
   4532     q1 = static_cast<int>(a * 256.0);
   4533     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
   4534   }
   4535   s = static_cast<int>(256.0 * r + 0.5);
   4536   return static_cast<double>(s) / 256.0;
   4537 }
   4538 
   4539 
   4540 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
   4541   return unsigned_bitextract_64(start_bit, end_bit, val);
   4542 }
   4543 
   4544 
   4545 template <typename T>
   4546 T Simulator::FPRecipSqrtEstimate(T op) {
   4547   if (std::isnan(op)) {
   4548     return FPProcessNaN(op);
   4549   } else if (op == 0.0) {
   4550     if (copysign(1.0, op) < 0.0) {
   4551       return kFP64NegativeInfinity;
   4552     } else {
   4553       return kFP64PositiveInfinity;
   4554     }
   4555   } else if (copysign(1.0, op) < 0.0) {
   4556     FPProcessException();
   4557     return FPDefaultNaN<T>();
   4558   } else if (std::isinf(op)) {
   4559     return 0.0;
   4560   } else {
   4561     uint64_t fraction;
   4562     int exp, result_exp;
   4563 
   4564     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4565       exp = float_exp(op);
   4566       fraction = float_mantissa(op);
   4567       fraction <<= 29;
   4568     } else {
   4569       exp = double_exp(op);
   4570       fraction = double_mantissa(op);
   4571     }
   4572 
   4573     if (exp == 0) {
   4574       while (Bits(fraction, 51, 51) == 0) {
   4575         fraction = Bits(fraction, 50, 0) << 1;
   4576         exp -= 1;
   4577       }
   4578       fraction = Bits(fraction, 50, 0) << 1;
   4579     }
   4580 
   4581     double scaled;
   4582     if (Bits(exp, 0, 0) == 0) {
   4583       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   4584     } else {
   4585       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
   4586     }
   4587 
   4588     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4589       result_exp = (380 - exp) / 2;
   4590     } else {
   4591       result_exp = (3068 - exp) / 2;
   4592     }
   4593 
   4594     uint64_t estimate = double_to_rawbits(recip_sqrt_estimate(scaled));
   4595 
   4596     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4597       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   4598       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
   4599       return float_pack(0, exp_bits, est_bits);
   4600     } else {
   4601       return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
   4602     }
   4603   }
   4604 }
   4605 
   4606 
   4607 LogicVRegister Simulator::frsqrte(VectorFormat vform,
   4608                                   LogicVRegister dst,
   4609                                   const LogicVRegister& src) {
   4610   dst.ClearForWrite(vform);
   4611   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4612     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4613       float input = src.Float<float>(i);
   4614       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
   4615     }
   4616   } else {
   4617     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4618     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4619       double input = src.Float<double>(i);
   4620       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
   4621     }
   4622   }
   4623   return dst;
   4624 }
   4625 
   4626 template <typename T>
   4627 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   4628   uint32_t sign;
   4629 
   4630   if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4631     sign = float_sign(op);
   4632   } else {
   4633     sign = double_sign(op);
   4634   }
   4635 
   4636   if (std::isnan(op)) {
   4637     return FPProcessNaN(op);
   4638   } else if (std::isinf(op)) {
   4639     return (sign == 1) ? -0.0 : 0.0;
   4640   } else if (op == 0.0) {
   4641     FPProcessException();  // FPExc_DivideByZero exception.
   4642     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4643   } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
   4644               (std::fabs(op) < std::pow(2.0, -128.0))) ||
   4645              ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
   4646               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
   4647     bool overflow_to_inf = false;
   4648     switch (rounding) {
   4649       case FPTieEven: overflow_to_inf = true; break;
   4650       case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
   4651       case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
   4652       case FPZero: overflow_to_inf = false; break;
   4653       default: break;
   4654     }
   4655     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
   4656     if (overflow_to_inf) {
   4657       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4658     } else {
   4659       // Return FPMaxNormal(sign).
   4660       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4661         return float_pack(sign, 0xfe, 0x07fffff);
   4662       } else {
   4663         return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
   4664       }
   4665     }
   4666   } else {
   4667     uint64_t fraction;
   4668     int exp, result_exp;
   4669     uint32_t sign;
   4670 
   4671     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4672       sign = float_sign(op);
   4673       exp = float_exp(op);
   4674       fraction = float_mantissa(op);
   4675       fraction <<= 29;
   4676     } else {
   4677       sign = double_sign(op);
   4678       exp = double_exp(op);
   4679       fraction = double_mantissa(op);
   4680     }
   4681 
   4682     if (exp == 0) {
   4683       if (Bits(fraction, 51, 51) == 0) {
   4684         exp -= 1;
   4685         fraction = Bits(fraction, 49, 0) << 2;
   4686       } else {
   4687         fraction = Bits(fraction, 50, 0) << 1;
   4688       }
   4689     }
   4690 
   4691     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   4692 
   4693     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4694       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
   4695     } else {
   4696       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
   4697     }
   4698 
   4699     double estimate = recip_estimate(scaled);
   4700 
   4701     fraction = double_mantissa(estimate);
   4702     if (result_exp == 0) {
   4703       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
   4704     } else if (result_exp == -1) {
   4705       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
   4706       result_exp = 0;
   4707     }
   4708     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4709       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   4710       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
   4711       return float_pack(sign, exp_bits, frac_bits);
   4712     } else {
   4713       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
   4714     }
   4715   }
   4716 }
   4717 
   4718 
   4719 LogicVRegister Simulator::frecpe(VectorFormat vform,
   4720                                  LogicVRegister dst,
   4721                                  const LogicVRegister& src,
   4722                                  FPRounding round) {
   4723   dst.ClearForWrite(vform);
   4724   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4725     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4726       float input = src.Float<float>(i);
   4727       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
   4728     }
   4729   } else {
   4730     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4731     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4732       double input = src.Float<double>(i);
   4733       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
   4734     }
   4735   }
   4736   return dst;
   4737 }
   4738 
   4739 
   4740 LogicVRegister Simulator::ursqrte(VectorFormat vform,
   4741                                   LogicVRegister dst,
   4742                                   const LogicVRegister& src) {
   4743   dst.ClearForWrite(vform);
   4744   uint64_t operand;
   4745   uint32_t result;
   4746   double dp_operand, dp_result;
   4747   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4748     operand = src.Uint(vform, i);
   4749     if (operand <= 0x3FFFFFFF) {
   4750       result = 0xFFFFFFFF;
   4751     } else {
   4752       dp_operand = operand * std::pow(2.0, -32);
   4753       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
   4754       result = static_cast<uint32_t>(dp_result);
   4755     }
   4756     dst.SetUint(vform, i, result);
   4757   }
   4758   return dst;
   4759 }
   4760 
   4761 
   4762 // Based on reference C function recip_estimate from ARM ARM.
   4763 double Simulator::recip_estimate(double a) {
   4764   int q, s;
   4765   double r;
   4766   q = static_cast<int>(a * 512.0);
   4767   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
   4768   s = static_cast<int>(256.0 * r + 0.5);
   4769   return static_cast<double>(s) / 256.0;
   4770 }
   4771 
   4772 
   4773 LogicVRegister Simulator::urecpe(VectorFormat vform,
   4774                                  LogicVRegister dst,
   4775                                  const LogicVRegister& src) {
   4776   dst.ClearForWrite(vform);
   4777   uint64_t operand;
   4778   uint32_t result;
   4779   double dp_operand, dp_result;
   4780   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4781     operand = src.Uint(vform, i);
   4782     if (operand <= 0x7FFFFFFF) {
   4783       result = 0xFFFFFFFF;
   4784     } else {
   4785       dp_operand = operand * std::pow(2.0, -32);
   4786       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
   4787       result = static_cast<uint32_t>(dp_result);
   4788     }
   4789     dst.SetUint(vform, i, result);
   4790   }
   4791   return dst;
   4792 }
   4793 
   4794 template <typename T>
   4795 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4796                                  LogicVRegister dst,
   4797                                  const LogicVRegister& src) {
   4798   dst.ClearForWrite(vform);
   4799   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4800     T op = src.Float<T>(i);
   4801     T result;
   4802     if (std::isnan(op)) {
   4803        result = FPProcessNaN(op);
   4804     } else {
   4805       int exp;
   4806       uint32_t sign;
   4807       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4808         sign = float_sign(op);
   4809         exp = float_exp(op);
   4810         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
   4811         result = float_pack(sign, exp, 0);
   4812       } else {
   4813         sign = double_sign(op);
   4814         exp = double_exp(op);
   4815         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
   4816         result = double_pack(sign, exp, 0);
   4817       }
   4818     }
   4819     dst.SetFloat(i, result);
   4820   }
   4821   return dst;
   4822 }
   4823 
   4824 
   4825 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4826                                  LogicVRegister dst,
   4827                                  const LogicVRegister& src) {
   4828   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4829     frecpx<float>(vform, dst, src);
   4830   } else {
   4831     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4832     frecpx<double>(vform, dst, src);
   4833   }
   4834   return dst;
   4835 }
   4836 
   4837 LogicVRegister Simulator::scvtf(VectorFormat vform,
   4838                                 LogicVRegister dst,
   4839                                 const LogicVRegister& src,
   4840                                 int fbits,
   4841                                 FPRounding round) {
   4842   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4843     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4844       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
   4845       dst.SetFloat<float>(i, result);
   4846     } else {
   4847       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4848       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
   4849       dst.SetFloat<double>(i, result);
   4850     }
   4851   }
   4852   return dst;
   4853 }
   4854 
   4855 
   4856 LogicVRegister Simulator::ucvtf(VectorFormat vform,
   4857                                 LogicVRegister dst,
   4858                                 const LogicVRegister& src,
   4859                                 int fbits,
   4860                                 FPRounding round) {
   4861   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4862     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4863       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
   4864       dst.SetFloat<float>(i, result);
   4865     } else {
   4866       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4867       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
   4868       dst.SetFloat<double>(i, result);
   4869     }
   4870   }
   4871   return dst;
   4872 }
   4873 
   4874 
   4875 }  // namespace vixl
   4876 
   4877 #endif  // VIXL_INCLUDE_SIMULATOR
   4878