Home | History | Annotate | Download | only in a64
      1 // Copyright 2015, ARM Limited
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #include <cmath>
     28 #include "vixl/a64/simulator-a64.h"
     29 
     30 namespace vixl {
     31 
     32 template<> double Simulator::FPDefaultNaN<double>() {
     33   return kFP64DefaultNaN;
     34 }
     35 
     36 
     37 template<> float Simulator::FPDefaultNaN<float>() {
     38   return kFP32DefaultNaN;
     39 }
     40 
     41 // See FPRound for a description of this function.
     42 static inline double FPRoundToDouble(int64_t sign, int64_t exponent,
     43                                      uint64_t mantissa, FPRounding round_mode) {
     44   int64_t bits =
     45       FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
     46                                                                  exponent,
     47                                                                  mantissa,
     48                                                                  round_mode);
     49   return rawbits_to_double(bits);
     50 }
     51 
     52 
     53 // See FPRound for a description of this function.
     54 static inline float FPRoundToFloat(int64_t sign, int64_t exponent,
     55                                    uint64_t mantissa, FPRounding round_mode) {
     56   int32_t bits =
     57       FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
     58                                                                exponent,
     59                                                                mantissa,
     60                                                                round_mode);
     61   return rawbits_to_float(bits);
     62 }
     63 
     64 
     65 // See FPRound for a description of this function.
     66 static inline float16 FPRoundToFloat16(int64_t sign,
     67                                        int64_t exponent,
     68                                        uint64_t mantissa,
     69                                        FPRounding round_mode) {
     70   return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
     71       sign, exponent, mantissa, round_mode);
     72 }
     73 
     74 
     75 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
     76   if (src >= 0) {
     77     return UFixedToDouble(src, fbits, round);
     78   } else {
     79     // This works for all negative values, including INT64_MIN.
     80     return -UFixedToDouble(-src, fbits, round);
     81   }
     82 }
     83 
     84 
     85 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
     86   // An input of 0 is a special case because the result is effectively
     87   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
     88   if (src == 0) {
     89     return 0.0;
     90   }
     91 
     92   // Calculate the exponent. The highest significant bit will have the value
     93   // 2^exponent.
     94   const int highest_significant_bit = 63 - CountLeadingZeros(src);
     95   const int64_t exponent = highest_significant_bit - fbits;
     96 
     97   return FPRoundToDouble(0, exponent, src, round);
     98 }
     99 
    100 
    101 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
    102   if (src >= 0) {
    103     return UFixedToFloat(src, fbits, round);
    104   } else {
    105     // This works for all negative values, including INT64_MIN.
    106     return -UFixedToFloat(-src, fbits, round);
    107   }
    108 }
    109 
    110 
    111 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
    112   // An input of 0 is a special case because the result is effectively
    113   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    114   if (src == 0) {
    115     return 0.0f;
    116   }
    117 
    118   // Calculate the exponent. The highest significant bit will have the value
    119   // 2^exponent.
    120   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    121   const int32_t exponent = highest_significant_bit - fbits;
    122 
    123   return FPRoundToFloat(0, exponent, src, round);
    124 }
    125 
    126 
    127 double Simulator::FPToDouble(float value) {
    128   switch (std::fpclassify(value)) {
    129     case FP_NAN: {
    130       if (IsSignallingNaN(value)) {
    131         FPProcessException();
    132       }
    133       if (DN()) return kFP64DefaultNaN;
    134 
    135       // Convert NaNs as the processor would:
    136       //  - The sign is propagated.
    137       //  - The payload (mantissa) is transferred entirely, except that the top
    138       //    bit is forced to '1', making the result a quiet NaN. The unused
    139       //    (low-order) payload bits are set to 0.
    140       uint32_t raw = float_to_rawbits(value);
    141 
    142       uint64_t sign = raw >> 31;
    143       uint64_t exponent = (1 << 11) - 1;
    144       uint64_t payload = unsigned_bitextract_64(21, 0, raw);
    145       payload <<= (52 - 23);  // The unused low-order bits should be 0.
    146       payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
    147 
    148       return rawbits_to_double((sign << 63) | (exponent << 52) | payload);
    149     }
    150 
    151     case FP_ZERO:
    152     case FP_NORMAL:
    153     case FP_SUBNORMAL:
    154     case FP_INFINITE: {
    155       // All other inputs are preserved in a standard cast, because every value
    156       // representable using an IEEE-754 float is also representable using an
    157       // IEEE-754 double.
    158       return static_cast<double>(value);
    159     }
    160   }
    161 
    162   VIXL_UNREACHABLE();
    163   return static_cast<double>(value);
    164 }
    165 
    166 
    167 float Simulator::FPToFloat(float16 value) {
    168   uint32_t sign = value >> 15;
    169   uint32_t exponent = unsigned_bitextract_32(
    170       kFloat16MantissaBits + kFloat16ExponentBits - 1, kFloat16MantissaBits,
    171       value);
    172   uint32_t mantissa = unsigned_bitextract_32(
    173       kFloat16MantissaBits - 1, 0, value);
    174 
    175   switch (float16classify(value)) {
    176     case FP_ZERO:
    177       return (sign == 0) ? 0.0f : -0.0f;
    178 
    179     case FP_INFINITE:
    180       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
    181 
    182     case FP_SUBNORMAL: {
    183       // Calculate shift required to put mantissa into the most-significant bits
    184       // of the destination mantissa.
    185       int shift = CountLeadingZeros(mantissa << (32 - 10));
    186 
    187       // Shift mantissa and discard implicit '1'.
    188       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
    189       mantissa &= (1 << kFloatMantissaBits) - 1;
    190 
    191       // Adjust the exponent for the shift applied, and rebias.
    192       exponent = exponent - shift + (-15 + 127);
    193       break;
    194     }
    195 
    196     case FP_NAN:
    197       if (IsSignallingNaN(value)) {
    198         FPProcessException();
    199       }
    200       if (DN()) return kFP32DefaultNaN;
    201 
    202       // Convert NaNs as the processor would:
    203       //  - The sign is propagated.
    204       //  - The payload (mantissa) is transferred entirely, except that the top
    205       //    bit is forced to '1', making the result a quiet NaN. The unused
    206       //    (low-order) payload bits are set to 0.
    207       exponent = (1 << kFloatExponentBits) - 1;
    208 
    209       // Increase bits in mantissa, making low-order bits 0.
    210       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    211       mantissa |= 1 << 22;  // Force a quiet NaN.
    212       break;
    213 
    214     case FP_NORMAL:
    215       // Increase bits in mantissa, making low-order bits 0.
    216       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    217 
    218       // Change exponent bias.
    219       exponent += (-15 + 127);
    220       break;
    221 
    222     default: VIXL_UNREACHABLE();
    223   }
    224   return rawbits_to_float((sign << 31) |
    225                           (exponent << kFloatMantissaBits) |
    226                           mantissa);
    227 }
    228 
    229 
    230 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
    231   // Only the FPTieEven rounding mode is implemented.
    232   VIXL_ASSERT(round_mode == FPTieEven);
    233   USE(round_mode);
    234 
    235   uint32_t raw = float_to_rawbits(value);
    236   int32_t sign = raw >> 31;
    237   int32_t exponent = unsigned_bitextract_32(30, 23, raw) - 127;
    238   uint32_t mantissa = unsigned_bitextract_32(22, 0, raw);
    239 
    240   switch (std::fpclassify(value)) {
    241     case FP_NAN: {
    242       if (IsSignallingNaN(value)) {
    243         FPProcessException();
    244       }
    245       if (DN()) return kFP16DefaultNaN;
    246 
    247       // Convert NaNs as the processor would:
    248       //  - The sign is propagated.
    249       //  - The payload (mantissa) is transferred as much as possible, except
    250       //    that the top bit is forced to '1', making the result a quiet NaN.
    251       float16 result = (sign == 0) ? kFP16PositiveInfinity
    252                                    : kFP16NegativeInfinity;
    253       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
    254       result |= (1 << 9);  // Force a quiet NaN;
    255       return result;
    256     }
    257 
    258     case FP_ZERO:
    259       return (sign == 0) ? 0 : 0x8000;
    260 
    261     case FP_INFINITE:
    262       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    263 
    264     case FP_NORMAL:
    265     case FP_SUBNORMAL: {
    266       // Convert float-to-half as the processor would, assuming that FPCR.FZ
    267       // (flush-to-zero) is not set.
    268 
    269       // Add the implicit '1' bit to the mantissa.
    270       mantissa += (1 << 23);
    271       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    272     }
    273   }
    274 
    275   VIXL_UNREACHABLE();
    276   return 0;
    277 }
    278 
    279 
    280 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
    281   // Only the FPTieEven rounding mode is implemented.
    282   VIXL_ASSERT(round_mode == FPTieEven);
    283   USE(round_mode);
    284 
    285   uint64_t raw = double_to_rawbits(value);
    286   int32_t sign = raw >> 63;
    287   int64_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
    288   uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
    289 
    290   switch (std::fpclassify(value)) {
    291     case FP_NAN: {
    292       if (IsSignallingNaN(value)) {
    293         FPProcessException();
    294       }
    295       if (DN()) return kFP16DefaultNaN;
    296 
    297       // Convert NaNs as the processor would:
    298       //  - The sign is propagated.
    299       //  - The payload (mantissa) is transferred as much as possible, except
    300       //    that the top bit is forced to '1', making the result a quiet NaN.
    301       float16 result = (sign == 0) ? kFP16PositiveInfinity
    302                                    : kFP16NegativeInfinity;
    303       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
    304       result |= (1 << 9);  // Force a quiet NaN;
    305       return result;
    306     }
    307 
    308     case FP_ZERO:
    309       return (sign == 0) ? 0 : 0x8000;
    310 
    311     case FP_INFINITE:
    312       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    313 
    314     case FP_NORMAL:
    315     case FP_SUBNORMAL: {
    316       // Convert double-to-half as the processor would, assuming that FPCR.FZ
    317       // (flush-to-zero) is not set.
    318 
    319       // Add the implicit '1' bit to the mantissa.
    320       mantissa += (UINT64_C(1) << 52);
    321       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    322     }
    323   }
    324 
    325   VIXL_UNREACHABLE();
    326   return 0;
    327 }
    328 
    329 
    330 float Simulator::FPToFloat(double value, FPRounding round_mode) {
    331   // Only the FPTieEven rounding mode is implemented.
    332   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
    333   USE(round_mode);
    334 
    335   switch (std::fpclassify(value)) {
    336     case FP_NAN: {
    337       if (IsSignallingNaN(value)) {
    338         FPProcessException();
    339       }
    340       if (DN()) return kFP32DefaultNaN;
    341 
    342       // Convert NaNs as the processor would:
    343       //  - The sign is propagated.
    344       //  - The payload (mantissa) is transferred as much as possible, except
    345       //    that the top bit is forced to '1', making the result a quiet NaN.
    346       uint64_t raw = double_to_rawbits(value);
    347 
    348       uint32_t sign = raw >> 63;
    349       uint32_t exponent = (1 << 8) - 1;
    350       uint32_t payload = unsigned_bitextract_64(50, 52 - 23, raw);
    351       payload |= (1 << 22);   // Force a quiet NaN.
    352 
    353       return rawbits_to_float((sign << 31) | (exponent << 23) | payload);
    354     }
    355 
    356     case FP_ZERO:
    357     case FP_INFINITE: {
    358       // In a C++ cast, any value representable in the target type will be
    359       // unchanged. This is always the case for +/-0.0 and infinities.
    360       return static_cast<float>(value);
    361     }
    362 
    363     case FP_NORMAL:
    364     case FP_SUBNORMAL: {
    365       // Convert double-to-float as the processor would, assuming that FPCR.FZ
    366       // (flush-to-zero) is not set.
    367       uint64_t raw = double_to_rawbits(value);
    368       // Extract the IEEE-754 double components.
    369       uint32_t sign = raw >> 63;
    370       // Extract the exponent and remove the IEEE-754 encoding bias.
    371       int32_t exponent = unsigned_bitextract_64(62, 52, raw) - 1023;
    372       // Extract the mantissa and add the implicit '1' bit.
    373       uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
    374       if (std::fpclassify(value) == FP_NORMAL) {
    375         mantissa |= (UINT64_C(1) << 52);
    376       }
    377       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
    378     }
    379   }
    380 
    381   VIXL_UNREACHABLE();
    382   return value;
    383 }
    384 
    385 
    386 void Simulator::ld1(VectorFormat vform,
    387                     LogicVRegister dst,
    388                     uint64_t addr) {
    389   dst.ClearForWrite(vform);
    390   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    391     dst.ReadUintFromMem(vform, i, addr);
    392     addr += LaneSizeInBytesFromFormat(vform);
    393   }
    394 }
    395 
    396 
    397 void Simulator::ld1(VectorFormat vform,
    398                     LogicVRegister dst,
    399                     int index,
    400                     uint64_t addr) {
    401   dst.ReadUintFromMem(vform, index, addr);
    402 }
    403 
    404 
    405 void Simulator::ld1r(VectorFormat vform,
    406                      LogicVRegister dst,
    407                      uint64_t addr) {
    408   dst.ClearForWrite(vform);
    409   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    410     dst.ReadUintFromMem(vform, i, addr);
    411   }
    412 }
    413 
    414 
    415 void Simulator::ld2(VectorFormat vform,
    416                     LogicVRegister dst1,
    417                     LogicVRegister dst2,
    418                     uint64_t addr1) {
    419   dst1.ClearForWrite(vform);
    420   dst2.ClearForWrite(vform);
    421   int esize = LaneSizeInBytesFromFormat(vform);
    422   uint64_t addr2 = addr1 + esize;
    423   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    424     dst1.ReadUintFromMem(vform, i, addr1);
    425     dst2.ReadUintFromMem(vform, i, addr2);
    426     addr1 += 2 * esize;
    427     addr2 += 2 * esize;
    428   }
    429 }
    430 
    431 
    432 void Simulator::ld2(VectorFormat vform,
    433                     LogicVRegister dst1,
    434                     LogicVRegister dst2,
    435                     int index,
    436                     uint64_t addr1) {
    437   dst1.ClearForWrite(vform);
    438   dst2.ClearForWrite(vform);
    439   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    440   dst1.ReadUintFromMem(vform, index, addr1);
    441   dst2.ReadUintFromMem(vform, index, addr2);
    442 }
    443 
    444 
    445 void Simulator::ld2r(VectorFormat vform,
    446                      LogicVRegister dst1,
    447                      LogicVRegister dst2,
    448                      uint64_t addr) {
    449   dst1.ClearForWrite(vform);
    450   dst2.ClearForWrite(vform);
    451   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    452   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    453     dst1.ReadUintFromMem(vform, i, addr);
    454     dst2.ReadUintFromMem(vform, i, addr2);
    455   }
    456 }
    457 
    458 
    459 void Simulator::ld3(VectorFormat vform,
    460                     LogicVRegister dst1,
    461                     LogicVRegister dst2,
    462                     LogicVRegister dst3,
    463                     uint64_t addr1) {
    464   dst1.ClearForWrite(vform);
    465   dst2.ClearForWrite(vform);
    466   dst3.ClearForWrite(vform);
    467   int esize = LaneSizeInBytesFromFormat(vform);
    468   uint64_t addr2 = addr1 + esize;
    469   uint64_t addr3 = addr2 + esize;
    470   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    471     dst1.ReadUintFromMem(vform, i, addr1);
    472     dst2.ReadUintFromMem(vform, i, addr2);
    473     dst3.ReadUintFromMem(vform, i, addr3);
    474     addr1 += 3 * esize;
    475     addr2 += 3 * esize;
    476     addr3 += 3 * esize;
    477   }
    478 }
    479 
    480 
    481 void Simulator::ld3(VectorFormat vform,
    482                     LogicVRegister dst1,
    483                     LogicVRegister dst2,
    484                     LogicVRegister dst3,
    485                     int index,
    486                     uint64_t addr1) {
    487   dst1.ClearForWrite(vform);
    488   dst2.ClearForWrite(vform);
    489   dst3.ClearForWrite(vform);
    490   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    491   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    492   dst1.ReadUintFromMem(vform, index, addr1);
    493   dst2.ReadUintFromMem(vform, index, addr2);
    494   dst3.ReadUintFromMem(vform, index, addr3);
    495 }
    496 
    497 
    498 void Simulator::ld3r(VectorFormat vform,
    499                      LogicVRegister dst1,
    500                      LogicVRegister dst2,
    501                      LogicVRegister dst3,
    502                      uint64_t addr) {
    503   dst1.ClearForWrite(vform);
    504   dst2.ClearForWrite(vform);
    505   dst3.ClearForWrite(vform);
    506   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    507   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    508   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    509     dst1.ReadUintFromMem(vform, i, addr);
    510     dst2.ReadUintFromMem(vform, i, addr2);
    511     dst3.ReadUintFromMem(vform, i, addr3);
    512   }
    513 }
    514 
    515 
    516 void Simulator::ld4(VectorFormat vform,
    517                     LogicVRegister dst1,
    518                     LogicVRegister dst2,
    519                     LogicVRegister dst3,
    520                     LogicVRegister dst4,
    521                     uint64_t addr1) {
    522   dst1.ClearForWrite(vform);
    523   dst2.ClearForWrite(vform);
    524   dst3.ClearForWrite(vform);
    525   dst4.ClearForWrite(vform);
    526   int esize = LaneSizeInBytesFromFormat(vform);
    527   uint64_t addr2 = addr1 + esize;
    528   uint64_t addr3 = addr2 + esize;
    529   uint64_t addr4 = addr3 + esize;
    530   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    531     dst1.ReadUintFromMem(vform, i, addr1);
    532     dst2.ReadUintFromMem(vform, i, addr2);
    533     dst3.ReadUintFromMem(vform, i, addr3);
    534     dst4.ReadUintFromMem(vform, i, addr4);
    535     addr1 += 4 * esize;
    536     addr2 += 4 * esize;
    537     addr3 += 4 * esize;
    538     addr4 += 4 * esize;
    539   }
    540 }
    541 
    542 
    543 void Simulator::ld4(VectorFormat vform,
    544                     LogicVRegister dst1,
    545                     LogicVRegister dst2,
    546                     LogicVRegister dst3,
    547                     LogicVRegister dst4,
    548                     int index,
    549                     uint64_t addr1) {
    550   dst1.ClearForWrite(vform);
    551   dst2.ClearForWrite(vform);
    552   dst3.ClearForWrite(vform);
    553   dst4.ClearForWrite(vform);
    554   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    555   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    556   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    557   dst1.ReadUintFromMem(vform, index, addr1);
    558   dst2.ReadUintFromMem(vform, index, addr2);
    559   dst3.ReadUintFromMem(vform, index, addr3);
    560   dst4.ReadUintFromMem(vform, index, addr4);
    561 }
    562 
    563 
    564 void Simulator::ld4r(VectorFormat vform,
    565                      LogicVRegister dst1,
    566                      LogicVRegister dst2,
    567                      LogicVRegister dst3,
    568                      LogicVRegister dst4,
    569                      uint64_t addr) {
    570   dst1.ClearForWrite(vform);
    571   dst2.ClearForWrite(vform);
    572   dst3.ClearForWrite(vform);
    573   dst4.ClearForWrite(vform);
    574   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    575   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    576   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    577   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    578     dst1.ReadUintFromMem(vform, i, addr);
    579     dst2.ReadUintFromMem(vform, i, addr2);
    580     dst3.ReadUintFromMem(vform, i, addr3);
    581     dst4.ReadUintFromMem(vform, i, addr4);
    582   }
    583 }
    584 
    585 
    586 void Simulator::st1(VectorFormat vform,
    587                     LogicVRegister src,
    588                     uint64_t addr) {
    589   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    590     src.WriteUintToMem(vform, i, addr);
    591     addr += LaneSizeInBytesFromFormat(vform);
    592   }
    593 }
    594 
    595 
    596 void Simulator::st1(VectorFormat vform,
    597                     LogicVRegister src,
    598                     int index,
    599                     uint64_t addr) {
    600   src.WriteUintToMem(vform, index, addr);
    601 }
    602 
    603 
    604 void Simulator::st2(VectorFormat vform,
    605                     LogicVRegister dst,
    606                     LogicVRegister dst2,
    607                     uint64_t addr) {
    608   int esize = LaneSizeInBytesFromFormat(vform);
    609   uint64_t addr2 = addr + esize;
    610   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    611     dst.WriteUintToMem(vform, i, addr);
    612     dst2.WriteUintToMem(vform, i, addr2);
    613     addr += 2 * esize;
    614     addr2 += 2 * esize;
    615   }
    616 }
    617 
    618 
    619 void Simulator::st2(VectorFormat vform,
    620                     LogicVRegister dst,
    621                     LogicVRegister dst2,
    622                     int index,
    623                     uint64_t addr) {
    624   int esize = LaneSizeInBytesFromFormat(vform);
    625   dst.WriteUintToMem(vform, index, addr);
    626   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    627 }
    628 
    629 
    630 void Simulator::st3(VectorFormat vform,
    631                     LogicVRegister dst,
    632                     LogicVRegister dst2,
    633                     LogicVRegister dst3,
    634                     uint64_t addr) {
    635   int esize = LaneSizeInBytesFromFormat(vform);
    636   uint64_t addr2 = addr + esize;
    637   uint64_t addr3 = addr2 + esize;
    638   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    639     dst.WriteUintToMem(vform, i, addr);
    640     dst2.WriteUintToMem(vform, i, addr2);
    641     dst3.WriteUintToMem(vform, i, addr3);
    642     addr += 3 * esize;
    643     addr2 += 3 * esize;
    644     addr3 += 3 * esize;
    645   }
    646 }
    647 
    648 
    649 void Simulator::st3(VectorFormat vform,
    650                     LogicVRegister dst,
    651                     LogicVRegister dst2,
    652                     LogicVRegister dst3,
    653                     int index,
    654                     uint64_t addr) {
    655   int esize = LaneSizeInBytesFromFormat(vform);
    656   dst.WriteUintToMem(vform, index, addr);
    657   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    658   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    659 }
    660 
    661 
    662 void Simulator::st4(VectorFormat vform,
    663                     LogicVRegister dst,
    664                     LogicVRegister dst2,
    665                     LogicVRegister dst3,
    666                     LogicVRegister dst4,
    667                     uint64_t addr) {
    668   int esize = LaneSizeInBytesFromFormat(vform);
    669   uint64_t addr2 = addr + esize;
    670   uint64_t addr3 = addr2 + esize;
    671   uint64_t addr4 = addr3 + esize;
    672   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    673     dst.WriteUintToMem(vform, i, addr);
    674     dst2.WriteUintToMem(vform, i, addr2);
    675     dst3.WriteUintToMem(vform, i, addr3);
    676     dst4.WriteUintToMem(vform, i, addr4);
    677     addr += 4 * esize;
    678     addr2 += 4 * esize;
    679     addr3 += 4 * esize;
    680     addr4 += 4 * esize;
    681   }
    682 }
    683 
    684 
    685 void Simulator::st4(VectorFormat vform,
    686                     LogicVRegister dst,
    687                     LogicVRegister dst2,
    688                     LogicVRegister dst3,
    689                     LogicVRegister dst4,
    690                     int index,
    691                     uint64_t addr) {
    692   int esize = LaneSizeInBytesFromFormat(vform);
    693   dst.WriteUintToMem(vform, index, addr);
    694   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    695   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    696   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
    697 }
    698 
    699 
    700 LogicVRegister Simulator::cmp(VectorFormat vform,
    701                               LogicVRegister dst,
    702                               const LogicVRegister& src1,
    703                               const LogicVRegister& src2,
    704                               Condition cond) {
    705   dst.ClearForWrite(vform);
    706   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    707     int64_t  sa = src1.Int(vform, i);
    708     int64_t  sb = src2.Int(vform, i);
    709     uint64_t ua = src1.Uint(vform, i);
    710     uint64_t ub = src2.Uint(vform, i);
    711     bool result = false;
    712     switch (cond) {
    713       case eq: result = (ua == ub); break;
    714       case ge: result = (sa >= sb); break;
    715       case gt: result = (sa > sb) ; break;
    716       case hi: result = (ua > ub) ; break;
    717       case hs: result = (ua >= ub); break;
    718       case lt: result = (sa < sb) ; break;
    719       case le: result = (sa <= sb); break;
    720       default: VIXL_UNREACHABLE(); break;
    721     }
    722     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
    723   }
    724   return dst;
    725 }
    726 
    727 
    728 LogicVRegister Simulator::cmp(VectorFormat vform,
    729                               LogicVRegister dst,
    730                               const LogicVRegister& src1,
    731                               int imm,
    732                               Condition cond) {
    733   SimVRegister temp;
    734   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
    735   return cmp(vform, dst, src1, imm_reg, cond);
    736 }
    737 
    738 
    739 LogicVRegister Simulator::cmptst(VectorFormat vform,
    740                                  LogicVRegister dst,
    741                                  const LogicVRegister& src1,
    742                                  const LogicVRegister& src2) {
    743   dst.ClearForWrite(vform);
    744   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    745     uint64_t ua = src1.Uint(vform, i);
    746     uint64_t ub = src2.Uint(vform, i);
    747     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
    748   }
    749   return dst;
    750 }
    751 
    752 
    753 LogicVRegister Simulator::add(VectorFormat vform,
    754                               LogicVRegister dst,
    755                               const LogicVRegister& src1,
    756                               const LogicVRegister& src2) {
    757   dst.ClearForWrite(vform);
    758   // TODO(all): consider assigning the result of LaneCountFromFormat to a local.
    759   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    760     // Test for unsigned saturation.
    761     uint64_t ua = src1.UintLeftJustified(vform, i);
    762     uint64_t ub = src2.UintLeftJustified(vform, i);
    763     uint64_t ur = ua + ub;
    764     if (ur < ua) {
    765       dst.SetUnsignedSat(i, true);
    766     }
    767 
    768     // Test for signed saturation.
    769     int64_t sa = src1.IntLeftJustified(vform, i);
    770     int64_t sb = src2.IntLeftJustified(vform, i);
    771     int64_t sr = sa + sb;
    772     // If the signs of the operands are the same, but different from the result,
    773     // there was an overflow.
    774     if (((sa >= 0) == (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
    775       dst.SetSignedSat(i, sa >= 0);
    776     }
    777 
    778     dst.SetInt(vform, i, src1.Int(vform, i) + src2.Int(vform, i));
    779   }
    780   return dst;
    781 }
    782 
    783 
    784 LogicVRegister Simulator::addp(VectorFormat vform,
    785                                LogicVRegister dst,
    786                                const LogicVRegister& src1,
    787                                const LogicVRegister& src2) {
    788   SimVRegister temp1, temp2;
    789   uzp1(vform, temp1, src1, src2);
    790   uzp2(vform, temp2, src1, src2);
    791   add(vform, dst, temp1, temp2);
    792   return dst;
    793 }
    794 
    795 
    796 LogicVRegister Simulator::mla(VectorFormat vform,
    797                               LogicVRegister dst,
    798                               const LogicVRegister& src1,
    799                               const LogicVRegister& src2) {
    800   SimVRegister temp;
    801   mul(vform, temp, src1, src2);
    802   add(vform, dst, dst, temp);
    803   return dst;
    804 }
    805 
    806 
    807 LogicVRegister Simulator::mls(VectorFormat vform,
    808                               LogicVRegister dst,
    809                               const LogicVRegister& src1,
    810                               const LogicVRegister& src2) {
    811   SimVRegister temp;
    812   mul(vform, temp, src1, src2);
    813   sub(vform, dst, dst, temp);
    814   return dst;
    815 }
    816 
    817 
    818 LogicVRegister Simulator::mul(VectorFormat vform,
    819                               LogicVRegister dst,
    820                               const LogicVRegister& src1,
    821                               const LogicVRegister& src2) {
    822   dst.ClearForWrite(vform);
    823   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    824     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
    825   }
    826   return dst;
    827 }
    828 
    829 
    830 LogicVRegister Simulator::mul(VectorFormat vform,
    831                               LogicVRegister dst,
    832                               const LogicVRegister& src1,
    833                               const LogicVRegister& src2,
    834                               int index) {
    835   SimVRegister temp;
    836   VectorFormat indexform = VectorFormatFillQ(vform);
    837   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
    838 }
    839 
    840 
    841 LogicVRegister Simulator::mla(VectorFormat vform,
    842                               LogicVRegister dst,
    843                               const LogicVRegister& src1,
    844                               const LogicVRegister& src2,
    845                               int index) {
    846   SimVRegister temp;
    847   VectorFormat indexform = VectorFormatFillQ(vform);
    848   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
    849 }
    850 
    851 
    852 LogicVRegister Simulator::mls(VectorFormat vform,
    853                               LogicVRegister dst,
    854                               const LogicVRegister& src1,
    855                               const LogicVRegister& src2,
    856                               int index) {
    857   SimVRegister temp;
    858   VectorFormat indexform = VectorFormatFillQ(vform);
    859   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
    860 }
    861 
    862 
    863 LogicVRegister Simulator::smull(VectorFormat vform,
    864                                 LogicVRegister dst,
    865                                 const LogicVRegister& src1,
    866                                 const LogicVRegister& src2,
    867                                 int index) {
    868   SimVRegister temp;
    869   VectorFormat indexform =
    870                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    871   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    872 }
    873 
    874 
    875 LogicVRegister Simulator::smull2(VectorFormat vform,
    876                                 LogicVRegister dst,
    877                                 const LogicVRegister& src1,
    878                                 const LogicVRegister& src2,
    879                                 int index) {
    880   SimVRegister temp;
    881   VectorFormat indexform =
    882                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    883   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    884 }
    885 
    886 
    887 LogicVRegister Simulator::umull(VectorFormat vform,
    888                                 LogicVRegister dst,
    889                                 const LogicVRegister& src1,
    890                                 const LogicVRegister& src2,
    891                                 int index) {
    892   SimVRegister temp;
    893   VectorFormat indexform =
    894                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    895   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    896 }
    897 
    898 
    899 LogicVRegister Simulator::umull2(VectorFormat vform,
    900                                 LogicVRegister dst,
    901                                 const LogicVRegister& src1,
    902                                 const LogicVRegister& src2,
    903                                 int index) {
    904   SimVRegister temp;
    905   VectorFormat indexform =
    906                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    907   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    908 }
    909 
    910 
    911 LogicVRegister Simulator::smlal(VectorFormat vform,
    912                                 LogicVRegister dst,
    913                                 const LogicVRegister& src1,
    914                                 const LogicVRegister& src2,
    915                                 int index) {
    916   SimVRegister temp;
    917   VectorFormat indexform =
    918                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    919   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    920 }
    921 
    922 
    923 LogicVRegister Simulator::smlal2(VectorFormat vform,
    924                                 LogicVRegister dst,
    925                                 const LogicVRegister& src1,
    926                                 const LogicVRegister& src2,
    927                                 int index) {
    928   SimVRegister temp;
    929   VectorFormat indexform =
    930                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    931   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    932 }
    933 
    934 
    935 LogicVRegister Simulator::umlal(VectorFormat vform,
    936                                 LogicVRegister dst,
    937                                 const LogicVRegister& src1,
    938                                 const LogicVRegister& src2,
    939                                 int index) {
    940   SimVRegister temp;
    941   VectorFormat indexform =
    942                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    943   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    944 }
    945 
    946 
    947 LogicVRegister Simulator::umlal2(VectorFormat vform,
    948                                 LogicVRegister dst,
    949                                 const LogicVRegister& src1,
    950                                 const LogicVRegister& src2,
    951                                 int index) {
    952   SimVRegister temp;
    953   VectorFormat indexform =
    954                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    955   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    956 }
    957 
    958 
    959 LogicVRegister Simulator::smlsl(VectorFormat vform,
    960                                 LogicVRegister dst,
    961                                 const LogicVRegister& src1,
    962                                 const LogicVRegister& src2,
    963                                 int index) {
    964   SimVRegister temp;
    965   VectorFormat indexform =
    966                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    967   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    968 }
    969 
    970 
    971 LogicVRegister Simulator::smlsl2(VectorFormat vform,
    972                                 LogicVRegister dst,
    973                                 const LogicVRegister& src1,
    974                                 const LogicVRegister& src2,
    975                                 int index) {
    976   SimVRegister temp;
    977   VectorFormat indexform =
    978                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    979   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    980 }
    981 
    982 
    983 LogicVRegister Simulator::umlsl(VectorFormat vform,
    984                                 LogicVRegister dst,
    985                                 const LogicVRegister& src1,
    986                                 const LogicVRegister& src2,
    987                                 int index) {
    988   SimVRegister temp;
    989   VectorFormat indexform =
    990                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    991   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    992 }
    993 
    994 
    995 LogicVRegister Simulator::umlsl2(VectorFormat vform,
    996                                 LogicVRegister dst,
    997                                 const LogicVRegister& src1,
    998                                 const LogicVRegister& src2,
    999                                 int index) {
   1000   SimVRegister temp;
   1001   VectorFormat indexform =
   1002                VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1003   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1004 }
   1005 
   1006 
   1007 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   1008                                   LogicVRegister dst,
   1009                                   const LogicVRegister& src1,
   1010                                   const LogicVRegister& src2,
   1011                                   int index) {
   1012   SimVRegister temp;
   1013   VectorFormat indexform =
   1014       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1015   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1016 }
   1017 
   1018 
   1019 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   1020                                   LogicVRegister dst,
   1021                                   const LogicVRegister& src1,
   1022                                   const LogicVRegister& src2,
   1023                                   int index) {
   1024   SimVRegister temp;
   1025   VectorFormat indexform =
   1026       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1027   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1028 }
   1029 
   1030 
   1031 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   1032                                   LogicVRegister dst,
   1033                                   const LogicVRegister& src1,
   1034                                   const LogicVRegister& src2,
   1035                                   int index) {
   1036   SimVRegister temp;
   1037   VectorFormat indexform =
   1038       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1039   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1040 }
   1041 
   1042 
   1043 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   1044                                   LogicVRegister dst,
   1045                                   const LogicVRegister& src1,
   1046                                   const LogicVRegister& src2,
   1047                                   int index) {
   1048   SimVRegister temp;
   1049   VectorFormat indexform =
   1050       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1051   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1052 }
   1053 
   1054 
   1055 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   1056                                   LogicVRegister dst,
   1057                                   const LogicVRegister& src1,
   1058                                   const LogicVRegister& src2,
   1059                                   int index) {
   1060   SimVRegister temp;
   1061   VectorFormat indexform =
   1062       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1063   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1064 }
   1065 
   1066 
   1067 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   1068                                   LogicVRegister dst,
   1069                                   const LogicVRegister& src1,
   1070                                   const LogicVRegister& src2,
   1071                                   int index) {
   1072   SimVRegister temp;
   1073   VectorFormat indexform =
   1074       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1075   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1076 }
   1077 
   1078 
   1079 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   1080                                   LogicVRegister dst,
   1081                                   const LogicVRegister& src1,
   1082                                   const LogicVRegister& src2,
   1083                                   int index) {
   1084   SimVRegister temp;
   1085   VectorFormat indexform = VectorFormatFillQ(vform);
   1086   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1087 }
   1088 
   1089 
   1090 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   1091                                   LogicVRegister dst,
   1092                                   const LogicVRegister& src1,
   1093                                   const LogicVRegister& src2,
   1094                                   int index) {
   1095   SimVRegister temp;
   1096   VectorFormat indexform = VectorFormatFillQ(vform);
   1097   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1098 }
   1099 
   1100 
   1101 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
   1102   uint16_t result = 0;
   1103   uint16_t extended_op2 = op2;
   1104   for (int i = 0; i < 8; ++i) {
   1105     if ((op1 >> i) & 1) {
   1106       result = result ^ (extended_op2 << i);
   1107     }
   1108   }
   1109   return result;
   1110 }
   1111 
   1112 
   1113 LogicVRegister Simulator::pmul(VectorFormat vform,
   1114                                LogicVRegister dst,
   1115                                const LogicVRegister& src1,
   1116                                const LogicVRegister& src2) {
   1117   dst.ClearForWrite(vform);
   1118   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1119     dst.SetUint(vform, i,
   1120                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
   1121   }
   1122   return dst;
   1123 }
   1124 
   1125 
   1126 LogicVRegister Simulator::pmull(VectorFormat vform,
   1127                                LogicVRegister dst,
   1128                                const LogicVRegister& src1,
   1129                                const LogicVRegister& src2) {
   1130   VectorFormat vform_src = VectorFormatHalfWidth(vform);
   1131   dst.ClearForWrite(vform);
   1132   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1133     dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, i),
   1134                                          src2.Uint(vform_src, i)));
   1135   }
   1136   return dst;
   1137 }
   1138 
   1139 
   1140 LogicVRegister Simulator::pmull2(VectorFormat vform,
   1141                                 LogicVRegister dst,
   1142                                 const LogicVRegister& src1,
   1143                                 const LogicVRegister& src2) {
   1144   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
   1145   dst.ClearForWrite(vform);
   1146   int lane_count = LaneCountFromFormat(vform);
   1147   for (int i = 0; i < lane_count; i++) {
   1148     dst.SetUint(vform, i, PolynomialMult(src1.Uint(vform_src, lane_count + i),
   1149                                          src2.Uint(vform_src, lane_count + i)));
   1150   }
   1151   return dst;
   1152 }
   1153 
   1154 
   1155 LogicVRegister Simulator::sub(VectorFormat vform,
   1156                               LogicVRegister dst,
   1157                               const LogicVRegister& src1,
   1158                               const LogicVRegister& src2) {
   1159   dst.ClearForWrite(vform);
   1160   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1161     // Test for unsigned saturation.
   1162     if (src2.Uint(vform, i) > src1.Uint(vform, i)) {
   1163       dst.SetUnsignedSat(i, false);
   1164     }
   1165 
   1166     // Test for signed saturation.
   1167     int64_t sa = src1.IntLeftJustified(vform, i);
   1168     int64_t sb = src2.IntLeftJustified(vform, i);
   1169     int64_t sr = sa - sb;
   1170     // If the signs of the operands are different, and the sign of the first
   1171     // operand doesn't match the result, there was an overflow.
   1172     if (((sa >= 0) != (sb >= 0)) && ((sa >= 0) != (sr >= 0))) {
   1173       dst.SetSignedSat(i, sr < 0);
   1174     }
   1175 
   1176     dst.SetInt(vform, i, src1.Int(vform, i) - src2.Int(vform, i));
   1177   }
   1178   return dst;
   1179 }
   1180 
   1181 
   1182 LogicVRegister Simulator::and_(VectorFormat vform,
   1183                                LogicVRegister dst,
   1184                                const LogicVRegister& src1,
   1185                                const LogicVRegister& src2) {
   1186   dst.ClearForWrite(vform);
   1187   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1188     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
   1189   }
   1190   return dst;
   1191 }
   1192 
   1193 
   1194 LogicVRegister Simulator::orr(VectorFormat vform,
   1195                               LogicVRegister dst,
   1196                               const LogicVRegister& src1,
   1197                               const LogicVRegister& src2) {
   1198   dst.ClearForWrite(vform);
   1199   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1200     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
   1201   }
   1202   return dst;
   1203 }
   1204 
   1205 
   1206 LogicVRegister Simulator::orn(VectorFormat vform,
   1207                               LogicVRegister dst,
   1208                               const LogicVRegister& src1,
   1209                               const LogicVRegister& src2) {
   1210   dst.ClearForWrite(vform);
   1211   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1212     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
   1213   }
   1214   return dst;
   1215 }
   1216 
   1217 
   1218 LogicVRegister Simulator::eor(VectorFormat vform,
   1219                               LogicVRegister dst,
   1220                               const LogicVRegister& src1,
   1221                               const LogicVRegister& src2) {
   1222   dst.ClearForWrite(vform);
   1223   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1224     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
   1225   }
   1226   return dst;
   1227 }
   1228 
   1229 
   1230 LogicVRegister Simulator::bic(VectorFormat vform,
   1231                               LogicVRegister dst,
   1232                               const LogicVRegister& src1,
   1233                               const LogicVRegister& src2) {
   1234   dst.ClearForWrite(vform);
   1235   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1236     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
   1237   }
   1238   return dst;
   1239 }
   1240 
   1241 
   1242 LogicVRegister Simulator::bic(VectorFormat vform,
   1243                               LogicVRegister dst,
   1244                               const LogicVRegister& src,
   1245                               uint64_t imm) {
   1246   uint64_t result[16];
   1247   int laneCount = LaneCountFromFormat(vform);
   1248   for (int i = 0; i < laneCount; ++i) {
   1249     result[i] = src.Uint(vform, i) & ~imm;
   1250   }
   1251   dst.ClearForWrite(vform);
   1252   for (int i = 0; i < laneCount; ++i) {
   1253     dst.SetUint(vform, i, result[i]);
   1254   }
   1255   return dst;
   1256 }
   1257 
   1258 
   1259 LogicVRegister Simulator::bif(VectorFormat vform,
   1260                               LogicVRegister dst,
   1261                               const LogicVRegister& src1,
   1262                               const LogicVRegister& src2) {
   1263   dst.ClearForWrite(vform);
   1264   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1265     uint64_t operand1 = dst.Uint(vform, i);
   1266     uint64_t operand2 = ~src2.Uint(vform, i);
   1267     uint64_t operand3 = src1.Uint(vform, i);
   1268     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1269     dst.SetUint(vform, i, result);
   1270   }
   1271   return dst;
   1272 }
   1273 
   1274 
   1275 LogicVRegister Simulator::bit(VectorFormat vform,
   1276                               LogicVRegister dst,
   1277                               const LogicVRegister& src1,
   1278                               const LogicVRegister& src2) {
   1279   dst.ClearForWrite(vform);
   1280   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1281     uint64_t operand1 = dst.Uint(vform, i);
   1282     uint64_t operand2 = src2.Uint(vform, i);
   1283     uint64_t operand3 = src1.Uint(vform, i);
   1284     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1285     dst.SetUint(vform, i, result);
   1286   }
   1287   return dst;
   1288 }
   1289 
   1290 
   1291 LogicVRegister Simulator::bsl(VectorFormat vform,
   1292                               LogicVRegister dst,
   1293                               const LogicVRegister& src1,
   1294                               const LogicVRegister& src2) {
   1295   dst.ClearForWrite(vform);
   1296   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1297     uint64_t operand1 = src2.Uint(vform, i);
   1298     uint64_t operand2 = dst.Uint(vform, i);
   1299     uint64_t operand3 = src1.Uint(vform, i);
   1300     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1301     dst.SetUint(vform, i, result);
   1302   }
   1303   return dst;
   1304 }
   1305 
   1306 
   1307 LogicVRegister Simulator::sminmax(VectorFormat vform,
   1308                                   LogicVRegister dst,
   1309                                   const LogicVRegister& src1,
   1310                                   const LogicVRegister& src2,
   1311                                   bool max) {
   1312   dst.ClearForWrite(vform);
   1313   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1314     int64_t src1_val = src1.Int(vform, i);
   1315     int64_t src2_val = src2.Int(vform, i);
   1316     int64_t dst_val;
   1317     if (max == true) {
   1318       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1319     } else {
   1320       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1321     }
   1322     dst.SetInt(vform, i, dst_val);
   1323   }
   1324   return dst;
   1325 }
   1326 
   1327 
   1328 LogicVRegister Simulator::smax(VectorFormat vform,
   1329                                LogicVRegister dst,
   1330                                const LogicVRegister& src1,
   1331                                const LogicVRegister& src2) {
   1332   return sminmax(vform, dst, src1, src2, true);
   1333 }
   1334 
   1335 
   1336 LogicVRegister Simulator::smin(VectorFormat vform,
   1337                                LogicVRegister dst,
   1338                                const LogicVRegister& src1,
   1339                                const LogicVRegister& src2) {
   1340   return sminmax(vform, dst, src1, src2, false);
   1341 }
   1342 
   1343 
   1344 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
   1345                                    LogicVRegister dst,
   1346                                    int dst_index,
   1347                                    const LogicVRegister& src,
   1348                                    bool max) {
   1349   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1350     int64_t src1_val = src.Int(vform, i);
   1351     int64_t src2_val = src.Int(vform, i + 1);
   1352     int64_t dst_val;
   1353     if (max == true) {
   1354       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1355     } else {
   1356       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1357     }
   1358     dst.SetInt(vform, dst_index + (i >> 1), dst_val);
   1359   }
   1360   return dst;
   1361 }
   1362 
   1363 
   1364 LogicVRegister Simulator::smaxp(VectorFormat vform,
   1365                                 LogicVRegister dst,
   1366                                 const LogicVRegister& src1,
   1367                                 const LogicVRegister& src2) {
   1368   dst.ClearForWrite(vform);
   1369   sminmaxp(vform, dst, 0, src1, true);
   1370   sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
   1371   return dst;
   1372 }
   1373 
   1374 
   1375 LogicVRegister Simulator::sminp(VectorFormat vform,
   1376                                 LogicVRegister dst,
   1377                                 const LogicVRegister& src1,
   1378                                 const LogicVRegister& src2) {
   1379   dst.ClearForWrite(vform);
   1380   sminmaxp(vform, dst, 0, src1, false);
   1381   sminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
   1382   return dst;
   1383 }
   1384 
   1385 
   1386 LogicVRegister Simulator::addp(VectorFormat vform,
   1387                                LogicVRegister dst,
   1388                                const LogicVRegister& src) {
   1389   VIXL_ASSERT(vform == kFormatD);
   1390 
   1391   int64_t dst_val = src.Int(kFormat2D, 0) + src.Int(kFormat2D, 1);
   1392   dst.ClearForWrite(vform);
   1393   dst.SetInt(vform, 0, dst_val);
   1394   return dst;
   1395 }
   1396 
   1397 
   1398 LogicVRegister Simulator::addv(VectorFormat vform,
   1399                                LogicVRegister dst,
   1400                                const LogicVRegister& src) {
   1401   VectorFormat vform_dst
   1402     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
   1403 
   1404 
   1405   int64_t dst_val = 0;
   1406   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1407     dst_val += src.Int(vform, i);
   1408   }
   1409 
   1410   dst.ClearForWrite(vform_dst);
   1411   dst.SetInt(vform_dst, 0, dst_val);
   1412   return dst;
   1413 }
   1414 
   1415 
   1416 LogicVRegister Simulator::saddlv(VectorFormat vform,
   1417                                  LogicVRegister dst,
   1418                                  const LogicVRegister& src) {
   1419   VectorFormat vform_dst
   1420     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1421 
   1422   int64_t dst_val = 0;
   1423   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1424     dst_val += src.Int(vform, i);
   1425   }
   1426 
   1427   dst.ClearForWrite(vform_dst);
   1428   dst.SetInt(vform_dst, 0, dst_val);
   1429   return dst;
   1430 }
   1431 
   1432 
   1433 LogicVRegister Simulator::uaddlv(VectorFormat vform,
   1434                                  LogicVRegister dst,
   1435                                  const LogicVRegister& src) {
   1436   VectorFormat vform_dst
   1437     = ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1438 
   1439   uint64_t dst_val = 0;
   1440   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1441     dst_val += src.Uint(vform, i);
   1442   }
   1443 
   1444   dst.ClearForWrite(vform_dst);
   1445   dst.SetUint(vform_dst, 0, dst_val);
   1446   return dst;
   1447 }
   1448 
   1449 
   1450 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
   1451                                    LogicVRegister dst,
   1452                                    const LogicVRegister& src,
   1453                                    bool max) {
   1454   dst.ClearForWrite(vform);
   1455   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   1456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1457     dst.SetInt(vform, i, 0);
   1458     int64_t src_val = src.Int(vform, i);
   1459     if (max == true) {
   1460       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1461     } else {
   1462       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1463     }
   1464   }
   1465   dst.SetInt(vform, 0, dst_val);
   1466   return dst;
   1467 }
   1468 
   1469 
   1470 LogicVRegister Simulator::smaxv(VectorFormat vform,
   1471                                 LogicVRegister dst,
   1472                                 const LogicVRegister& src) {
   1473   sminmaxv(vform, dst, src, true);
   1474   return dst;
   1475 }
   1476 
   1477 
   1478 LogicVRegister Simulator::sminv(VectorFormat vform,
   1479                                 LogicVRegister dst,
   1480                                 const LogicVRegister& src) {
   1481   sminmaxv(vform, dst, src, false);
   1482   return dst;
   1483 }
   1484 
   1485 
   1486 LogicVRegister Simulator::uminmax(VectorFormat vform,
   1487                                   LogicVRegister dst,
   1488                                   const LogicVRegister& src1,
   1489                                   const LogicVRegister& src2,
   1490                                   bool max) {
   1491   dst.ClearForWrite(vform);
   1492   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1493     uint64_t src1_val = src1.Uint(vform, i);
   1494     uint64_t src2_val = src2.Uint(vform, i);
   1495     uint64_t dst_val;
   1496     if (max == true) {
   1497       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1498     } else {
   1499       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1500     }
   1501     dst.SetUint(vform, i, dst_val);
   1502   }
   1503   return dst;
   1504 }
   1505 
   1506 
   1507 LogicVRegister Simulator::umax(VectorFormat vform,
   1508                                LogicVRegister dst,
   1509                                const LogicVRegister& src1,
   1510                                const LogicVRegister& src2) {
   1511   return uminmax(vform, dst, src1, src2, true);
   1512 }
   1513 
   1514 
   1515 LogicVRegister Simulator::umin(VectorFormat vform,
   1516                                LogicVRegister dst,
   1517                                const LogicVRegister& src1,
   1518                                const LogicVRegister& src2) {
   1519   return uminmax(vform, dst, src1, src2, false);
   1520 }
   1521 
   1522 
   1523 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
   1524                                    LogicVRegister dst,
   1525                                    int dst_index,
   1526                                    const LogicVRegister& src,
   1527                                    bool max) {
   1528   for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1529     uint64_t src1_val = src.Uint(vform, i);
   1530     uint64_t src2_val = src.Uint(vform, i + 1);
   1531     uint64_t dst_val;
   1532     if (max == true) {
   1533       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1534     } else {
   1535       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1536     }
   1537     dst.SetUint(vform, dst_index + (i >> 1), dst_val);
   1538   }
   1539   return dst;
   1540 }
   1541 
   1542 
   1543 LogicVRegister Simulator::umaxp(VectorFormat vform,
   1544                                 LogicVRegister dst,
   1545                                 const LogicVRegister& src1,
   1546                                 const LogicVRegister& src2) {
   1547   dst.ClearForWrite(vform);
   1548   uminmaxp(vform, dst, 0, src1, true);
   1549   uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, true);
   1550   return dst;
   1551 }
   1552 
   1553 
   1554 LogicVRegister Simulator::uminp(VectorFormat vform,
   1555                                 LogicVRegister dst,
   1556                                 const LogicVRegister& src1,
   1557                                 const LogicVRegister& src2) {
   1558   dst.ClearForWrite(vform);
   1559   uminmaxp(vform, dst, 0, src1, false);
   1560   uminmaxp(vform, dst, LaneCountFromFormat(vform) >> 1, src2, false);
   1561   return dst;
   1562 }
   1563 
   1564 
   1565 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
   1566                                    LogicVRegister dst,
   1567                                    const LogicVRegister& src,
   1568                                    bool max) {
   1569   dst.ClearForWrite(vform);
   1570   uint64_t dst_val = max ? 0 : UINT64_MAX;
   1571   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1572     dst.SetUint(vform, i, 0);
   1573     uint64_t src_val = src.Uint(vform, i);
   1574     if (max == true) {
   1575       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1576     } else {
   1577       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1578     }
   1579   }
   1580   dst.SetUint(vform, 0, dst_val);
   1581   return dst;
   1582 }
   1583 
   1584 
   1585 LogicVRegister Simulator::umaxv(VectorFormat vform,
   1586                                 LogicVRegister dst,
   1587                                 const LogicVRegister& src) {
   1588   uminmaxv(vform, dst, src, true);
   1589   return dst;
   1590 }
   1591 
   1592 
   1593 LogicVRegister Simulator::uminv(VectorFormat vform,
   1594                                 LogicVRegister dst,
   1595                                 const LogicVRegister& src) {
   1596   uminmaxv(vform, dst, src, false);
   1597   return dst;
   1598 }
   1599 
   1600 
   1601 LogicVRegister Simulator::shl(VectorFormat vform,
   1602                               LogicVRegister dst,
   1603                               const LogicVRegister& src,
   1604                               int shift) {
   1605   VIXL_ASSERT(shift >= 0);
   1606   SimVRegister temp;
   1607   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1608   return ushl(vform, dst, src, shiftreg);
   1609 }
   1610 
   1611 
   1612 LogicVRegister Simulator::sshll(VectorFormat vform,
   1613                                 LogicVRegister dst,
   1614                                 const LogicVRegister& src,
   1615                                 int shift) {
   1616   VIXL_ASSERT(shift >= 0);
   1617   SimVRegister temp1, temp2;
   1618   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1619   LogicVRegister extendedreg = sxtl(vform, temp2, src);
   1620   return sshl(vform, dst, extendedreg, shiftreg);
   1621 }
   1622 
   1623 
   1624 LogicVRegister Simulator::sshll2(VectorFormat vform,
   1625                                  LogicVRegister dst,
   1626                                  const LogicVRegister& src,
   1627                                  int shift) {
   1628   VIXL_ASSERT(shift >= 0);
   1629   SimVRegister temp1, temp2;
   1630   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1631   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
   1632   return sshl(vform, dst, extendedreg, shiftreg);
   1633 }
   1634 
   1635 
   1636 LogicVRegister Simulator::shll(VectorFormat vform,
   1637                                LogicVRegister dst,
   1638                                const LogicVRegister& src) {
   1639   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1640   return sshll(vform, dst, src, shift);
   1641 }
   1642 
   1643 
   1644 LogicVRegister Simulator::shll2(VectorFormat vform,
   1645                                 LogicVRegister dst,
   1646                                 const LogicVRegister& src) {
   1647   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1648   return sshll2(vform, dst, src, shift);
   1649 }
   1650 
   1651 
   1652 LogicVRegister Simulator::ushll(VectorFormat vform,
   1653                                 LogicVRegister dst,
   1654                                 const LogicVRegister& src,
   1655                                 int shift) {
   1656   VIXL_ASSERT(shift >= 0);
   1657   SimVRegister temp1, temp2;
   1658   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1659   LogicVRegister extendedreg = uxtl(vform, temp2, src);
   1660   return ushl(vform, dst, extendedreg, shiftreg);
   1661 }
   1662 
   1663 
   1664 LogicVRegister Simulator::ushll2(VectorFormat vform,
   1665                                  LogicVRegister dst,
   1666                                  const LogicVRegister& src,
   1667                                  int shift) {
   1668   VIXL_ASSERT(shift >= 0);
   1669   SimVRegister temp1, temp2;
   1670   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1671   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
   1672   return ushl(vform, dst, extendedreg, shiftreg);
   1673 }
   1674 
   1675 
   1676 LogicVRegister Simulator::sli(VectorFormat vform,
   1677                               LogicVRegister dst,
   1678                               const LogicVRegister& src,
   1679                               int shift) {
   1680   dst.ClearForWrite(vform);
   1681   int laneCount = LaneCountFromFormat(vform);
   1682   for (int i = 0; i < laneCount; i++) {
   1683     uint64_t src_lane = src.Uint(vform, i);
   1684     uint64_t dst_lane = dst.Uint(vform, i);
   1685     uint64_t shifted = src_lane << shift;
   1686     uint64_t mask = MaxUintFromFormat(vform) << shift;
   1687     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1688   }
   1689   return dst;
   1690 }
   1691 
   1692 
   1693 LogicVRegister Simulator::sqshl(VectorFormat vform,
   1694                                 LogicVRegister dst,
   1695                                 const LogicVRegister& src,
   1696                                 int shift) {
   1697   VIXL_ASSERT(shift >= 0);
   1698   SimVRegister temp;
   1699   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1700   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
   1701 }
   1702 
   1703 
   1704 LogicVRegister Simulator::uqshl(VectorFormat vform,
   1705                                 LogicVRegister dst,
   1706                                 const LogicVRegister& src,
   1707                                 int shift) {
   1708   VIXL_ASSERT(shift >= 0);
   1709   SimVRegister temp;
   1710   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1711   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1712 }
   1713 
   1714 
   1715 LogicVRegister Simulator::sqshlu(VectorFormat vform,
   1716                                  LogicVRegister dst,
   1717                                  const LogicVRegister& src,
   1718                                  int shift) {
   1719   VIXL_ASSERT(shift >= 0);
   1720   SimVRegister temp;
   1721   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1722   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1723 }
   1724 
   1725 
   1726 LogicVRegister Simulator::sri(VectorFormat vform,
   1727                               LogicVRegister dst,
   1728                               const LogicVRegister& src,
   1729                               int shift) {
   1730   dst.ClearForWrite(vform);
   1731   int laneCount = LaneCountFromFormat(vform);
   1732   VIXL_ASSERT((shift > 0) &&
   1733               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
   1734   for (int i = 0; i < laneCount; i++) {
   1735     uint64_t src_lane = src.Uint(vform, i);
   1736     uint64_t dst_lane = dst.Uint(vform, i);
   1737     uint64_t shifted;
   1738     uint64_t mask;
   1739     if (shift == 64) {
   1740       shifted = 0;
   1741       mask = 0;
   1742     } else {
   1743       shifted = src_lane >> shift;
   1744       mask = MaxUintFromFormat(vform) >> shift;
   1745     }
   1746     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1747   }
   1748   return dst;
   1749 }
   1750 
   1751 
   1752 LogicVRegister Simulator::ushr(VectorFormat vform,
   1753                                LogicVRegister dst,
   1754                                const LogicVRegister& src,
   1755                                int shift) {
   1756   VIXL_ASSERT(shift >= 0);
   1757   SimVRegister temp;
   1758   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1759   return ushl(vform, dst, src, shiftreg);
   1760 }
   1761 
   1762 
   1763 LogicVRegister Simulator::sshr(VectorFormat vform,
   1764                                LogicVRegister dst,
   1765                                const LogicVRegister& src,
   1766                                int shift) {
   1767   VIXL_ASSERT(shift >= 0);
   1768   SimVRegister temp;
   1769   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1770   return sshl(vform, dst, src, shiftreg);
   1771 }
   1772 
   1773 
   1774 LogicVRegister Simulator::ssra(VectorFormat vform,
   1775                                LogicVRegister dst,
   1776                                const LogicVRegister& src,
   1777                                int shift) {
   1778   SimVRegister temp;
   1779   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
   1780   return add(vform, dst, dst, shifted_reg);
   1781 }
   1782 
   1783 
   1784 LogicVRegister Simulator::usra(VectorFormat vform,
   1785                                LogicVRegister dst,
   1786                                const LogicVRegister& src,
   1787                                int shift) {
   1788   SimVRegister temp;
   1789   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
   1790   return add(vform, dst, dst, shifted_reg);
   1791 }
   1792 
   1793 
   1794 LogicVRegister Simulator::srsra(VectorFormat vform,
   1795                                 LogicVRegister dst,
   1796                                 const LogicVRegister& src,
   1797                                 int shift) {
   1798   SimVRegister temp;
   1799   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
   1800   return add(vform, dst, dst, shifted_reg);
   1801 }
   1802 
   1803 
   1804 LogicVRegister Simulator::ursra(VectorFormat vform,
   1805                                 LogicVRegister dst,
   1806                                 const LogicVRegister& src,
   1807                                 int shift) {
   1808   SimVRegister temp;
   1809   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
   1810   return add(vform, dst, dst, shifted_reg);
   1811 }
   1812 
   1813 
   1814 LogicVRegister Simulator::cls(VectorFormat vform,
   1815                               LogicVRegister dst,
   1816                               const LogicVRegister& src) {
   1817   uint64_t result[16];
   1818   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1819   int laneCount = LaneCountFromFormat(vform);
   1820   for (int i = 0; i < laneCount; i++) {
   1821     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
   1822   }
   1823 
   1824   dst.ClearForWrite(vform);
   1825   for (int i = 0; i < laneCount; ++i) {
   1826     dst.SetUint(vform, i, result[i]);
   1827   }
   1828   return dst;
   1829 }
   1830 
   1831 
   1832 LogicVRegister Simulator::clz(VectorFormat vform,
   1833                               LogicVRegister dst,
   1834                               const LogicVRegister& src) {
   1835   uint64_t result[16];
   1836   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1837   int laneCount = LaneCountFromFormat(vform);
   1838   for (int i = 0; i < laneCount; i++) {
   1839     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
   1840   }
   1841 
   1842   dst.ClearForWrite(vform);
   1843   for (int i = 0; i < laneCount; ++i) {
   1844     dst.SetUint(vform, i, result[i]);
   1845   }
   1846   return dst;
   1847 }
   1848 
   1849 
   1850 LogicVRegister Simulator::cnt(VectorFormat vform,
   1851                               LogicVRegister dst,
   1852                               const LogicVRegister& src) {
   1853   uint64_t result[16];
   1854   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   1855   int laneCount = LaneCountFromFormat(vform);
   1856   for (int i = 0; i < laneCount; i++) {
   1857     uint64_t value = src.Uint(vform, i);
   1858     result[i] = 0;
   1859     for (int j = 0; j < laneSizeInBits; j++) {
   1860       result[i] += (value & 1);
   1861       value >>= 1;
   1862     }
   1863   }
   1864 
   1865   dst.ClearForWrite(vform);
   1866   for (int i = 0; i < laneCount; ++i) {
   1867     dst.SetUint(vform, i, result[i]);
   1868   }
   1869   return dst;
   1870 }
   1871 
   1872 
   1873 LogicVRegister Simulator::sshl(VectorFormat vform,
   1874                                LogicVRegister dst,
   1875                                const LogicVRegister& src1,
   1876                                const LogicVRegister& src2) {
   1877   dst.ClearForWrite(vform);
   1878   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1879     int8_t shift_val = src2.Int(vform, i);
   1880     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
   1881 
   1882     // Set signed saturation state.
   1883     if ((shift_val > CountLeadingSignBits(lj_src_val)) &&
   1884         (lj_src_val != 0)) {
   1885       dst.SetSignedSat(i, lj_src_val >= 0);
   1886     }
   1887 
   1888     // Set unsigned saturation state.
   1889     if (lj_src_val < 0) {
   1890       dst.SetUnsignedSat(i, false);
   1891     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
   1892                (lj_src_val != 0)) {
   1893       dst.SetUnsignedSat(i, true);
   1894     }
   1895 
   1896     int64_t src_val = src1.Int(vform, i);
   1897     if (shift_val > 63) {
   1898       dst.SetInt(vform, i, 0);
   1899     } else if (shift_val < -63) {
   1900       dst.SetRounding(i, src_val < 0);
   1901       dst.SetInt(vform, i, (src_val < 0) ? -1 : 0);
   1902     } else {
   1903       if (shift_val < 0) {
   1904         // Set rounding state. Rounding only needed on right shifts.
   1905         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1906           dst.SetRounding(i, true);
   1907         }
   1908         src_val >>= -shift_val;
   1909       } else {
   1910         src_val <<= shift_val;
   1911       }
   1912       dst.SetInt(vform, i, src_val);
   1913     }
   1914   }
   1915   return dst;
   1916 }
   1917 
   1918 
   1919 LogicVRegister Simulator::ushl(VectorFormat vform,
   1920                                LogicVRegister dst,
   1921                                const LogicVRegister& src1,
   1922                                const LogicVRegister& src2) {
   1923   dst.ClearForWrite(vform);
   1924   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1925     int8_t shift_val = src2.Int(vform, i);
   1926     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
   1927 
   1928     // Set saturation state.
   1929     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
   1930       dst.SetUnsignedSat(i, true);
   1931     }
   1932 
   1933     uint64_t src_val = src1.Uint(vform, i);
   1934     if ((shift_val > 63) || (shift_val < -64)) {
   1935       dst.SetUint(vform, i, 0);
   1936     } else {
   1937       if (shift_val < 0) {
   1938         // Set rounding state. Rounding only needed on right shifts.
   1939         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1940           dst.SetRounding(i, true);
   1941         }
   1942 
   1943         if (shift_val == -64) {
   1944           src_val = 0;
   1945         } else {
   1946           src_val >>= -shift_val;
   1947         }
   1948       } else {
   1949         src_val <<= shift_val;
   1950       }
   1951       dst.SetUint(vform, i, src_val);
   1952     }
   1953   }
   1954   return dst;
   1955 }
   1956 
   1957 
   1958 LogicVRegister Simulator::neg(VectorFormat vform,
   1959                               LogicVRegister dst,
   1960                               const LogicVRegister& src) {
   1961   dst.ClearForWrite(vform);
   1962   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1963     // Test for signed saturation.
   1964     int64_t sa = src.Int(vform, i);
   1965     if (sa == MinIntFromFormat(vform)) {
   1966       dst.SetSignedSat(i, true);
   1967     }
   1968     dst.SetInt(vform, i, -sa);
   1969   }
   1970   return dst;
   1971 }
   1972 
   1973 
   1974 LogicVRegister Simulator::suqadd(VectorFormat vform,
   1975                                  LogicVRegister dst,
   1976                                  const LogicVRegister& src) {
   1977   dst.ClearForWrite(vform);
   1978   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1979     int64_t  sa = dst.IntLeftJustified(vform, i);
   1980     uint64_t ub = src.UintLeftJustified(vform, i);
   1981     int64_t  sr = sa + ub;
   1982 
   1983     if (sr < sa) {  // Test for signed positive saturation.
   1984       dst.SetInt(vform, i, MaxIntFromFormat(vform));
   1985     } else {
   1986       dst.SetInt(vform, i, dst.Int(vform, i) + src.Int(vform, i));
   1987     }
   1988   }
   1989   return dst;
   1990 }
   1991 
   1992 
   1993 LogicVRegister Simulator::usqadd(VectorFormat vform,
   1994                                  LogicVRegister dst,
   1995                                  const LogicVRegister& src) {
   1996   dst.ClearForWrite(vform);
   1997   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1998     uint64_t  ua = dst.UintLeftJustified(vform, i);
   1999     int64_t   sb = src.IntLeftJustified(vform, i);
   2000     uint64_t  ur = ua + sb;
   2001 
   2002     if ((sb > 0) && (ur <= ua)) {
   2003       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
   2004     } else if ((sb < 0) && (ur >= ua)) {
   2005       dst.SetUint(vform, i, 0);                         // Negative saturation.
   2006     } else {
   2007       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
   2008     }
   2009   }
   2010   return dst;
   2011 }
   2012 
   2013 
   2014 LogicVRegister Simulator::abs(VectorFormat vform,
   2015                               LogicVRegister dst,
   2016                               const LogicVRegister& src) {
   2017   dst.ClearForWrite(vform);
   2018   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2019     // Test for signed saturation.
   2020     int64_t sa = src.Int(vform, i);
   2021     if (sa == MinIntFromFormat(vform)) {
   2022       dst.SetSignedSat(i, true);
   2023     }
   2024     if (sa < 0) {
   2025       dst.SetInt(vform, i, -sa);
   2026     } else {
   2027       dst.SetInt(vform, i, sa);
   2028     }
   2029   }
   2030   return dst;
   2031 }
   2032 
   2033 
   2034 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
   2035                                         LogicVRegister dst,
   2036                                         bool dstIsSigned,
   2037                                         const LogicVRegister& src,
   2038                                         bool srcIsSigned) {
   2039   bool upperhalf = false;
   2040   VectorFormat srcform = kFormatUndefined;
   2041   int64_t  ssrc[8];
   2042   uint64_t usrc[8];
   2043 
   2044   switch (dstform) {
   2045     case kFormat8B : upperhalf = false; srcform = kFormat8H; break;
   2046     case kFormat16B: upperhalf = true;  srcform = kFormat8H; break;
   2047     case kFormat4H : upperhalf = false; srcform = kFormat4S; break;
   2048     case kFormat8H : upperhalf = true;  srcform = kFormat4S; break;
   2049     case kFormat2S : upperhalf = false; srcform = kFormat2D; break;
   2050     case kFormat4S : upperhalf = true;  srcform = kFormat2D; break;
   2051     case kFormatB  : upperhalf = false; srcform = kFormatH;  break;
   2052     case kFormatH  : upperhalf = false; srcform = kFormatS;  break;
   2053     case kFormatS  : upperhalf = false; srcform = kFormatD;  break;
   2054     default:VIXL_UNIMPLEMENTED();
   2055   }
   2056 
   2057   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2058     ssrc[i] = src.Int(srcform, i);
   2059     usrc[i] = src.Uint(srcform, i);
   2060   }
   2061 
   2062   int offset;
   2063   if (upperhalf) {
   2064     offset = LaneCountFromFormat(dstform) / 2;
   2065   } else {
   2066     offset = 0;
   2067     dst.ClearForWrite(dstform);
   2068   }
   2069 
   2070   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2071     // Test for signed saturation
   2072     if (ssrc[i] > MaxIntFromFormat(dstform)) {
   2073       dst.SetSignedSat(offset + i, true);
   2074     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
   2075       dst.SetSignedSat(offset + i, false);
   2076     }
   2077 
   2078     // Test for unsigned saturation
   2079     if (srcIsSigned) {
   2080       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
   2081         dst.SetUnsignedSat(offset + i, true);
   2082       } else if (ssrc[i] < 0) {
   2083         dst.SetUnsignedSat(offset + i, false);
   2084       }
   2085     } else {
   2086       if (usrc[i] > MaxUintFromFormat(dstform)) {
   2087         dst.SetUnsignedSat(offset + i, true);
   2088       }
   2089     }
   2090 
   2091     int64_t result;
   2092     if (srcIsSigned) {
   2093       result = ssrc[i] & MaxUintFromFormat(dstform);
   2094     } else {
   2095       result = usrc[i] & MaxUintFromFormat(dstform);
   2096     }
   2097 
   2098     if (dstIsSigned) {
   2099       dst.SetInt(dstform, offset + i, result);
   2100     } else {
   2101       dst.SetUint(dstform, offset + i, result);
   2102     }
   2103   }
   2104   return dst;
   2105 }
   2106 
   2107 
   2108 LogicVRegister Simulator::xtn(VectorFormat vform,
   2109                               LogicVRegister dst,
   2110                               const LogicVRegister& src) {
   2111   return extractnarrow(vform, dst, true, src, true);
   2112 }
   2113 
   2114 
   2115 LogicVRegister Simulator::sqxtn(VectorFormat vform,
   2116                                 LogicVRegister dst,
   2117                                 const LogicVRegister& src) {
   2118   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
   2119 }
   2120 
   2121 
   2122 LogicVRegister Simulator::sqxtun(VectorFormat vform,
   2123                                  LogicVRegister dst,
   2124                                  const LogicVRegister& src) {
   2125   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
   2126 }
   2127 
   2128 
   2129 LogicVRegister Simulator::uqxtn(VectorFormat vform,
   2130                                 LogicVRegister dst,
   2131                                 const LogicVRegister& src) {
   2132   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
   2133 }
   2134 
   2135 
   2136 LogicVRegister Simulator::absdiff(VectorFormat vform,
   2137                                   LogicVRegister dst,
   2138                                   const LogicVRegister& src1,
   2139                                   const LogicVRegister& src2,
   2140                                   bool issigned) {
   2141   dst.ClearForWrite(vform);
   2142   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2143     if (issigned) {
   2144       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
   2145       sr = sr > 0 ? sr : -sr;
   2146       dst.SetInt(vform, i, sr);
   2147     } else {
   2148       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
   2149       sr = sr > 0 ? sr : -sr;
   2150       dst.SetUint(vform, i, sr);
   2151     }
   2152   }
   2153   return dst;
   2154 }
   2155 
   2156 
   2157 LogicVRegister Simulator::saba(VectorFormat vform,
   2158                                LogicVRegister dst,
   2159                                const LogicVRegister& src1,
   2160                                const LogicVRegister& src2) {
   2161   SimVRegister temp;
   2162   dst.ClearForWrite(vform);
   2163   absdiff(vform, temp, src1, src2, true);
   2164   add(vform, dst, dst, temp);
   2165   return dst;
   2166 }
   2167 
   2168 
   2169 LogicVRegister Simulator::uaba(VectorFormat vform,
   2170                                LogicVRegister dst,
   2171                                const LogicVRegister& src1,
   2172                                const LogicVRegister& src2) {
   2173   SimVRegister temp;
   2174   dst.ClearForWrite(vform);
   2175   absdiff(vform, temp, src1, src2, false);
   2176   add(vform, dst, dst, temp);
   2177   return dst;
   2178 }
   2179 
   2180 
   2181 LogicVRegister Simulator::not_(VectorFormat vform,
   2182                                LogicVRegister dst,
   2183                                const LogicVRegister& src) {
   2184   dst.ClearForWrite(vform);
   2185   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2186     dst.SetUint(vform, i, ~src.Uint(vform, i));
   2187   }
   2188   return dst;
   2189 }
   2190 
   2191 
   2192 LogicVRegister Simulator::rbit(VectorFormat vform,
   2193                                LogicVRegister dst,
   2194                                const LogicVRegister& src) {
   2195   uint64_t result[16];
   2196   int laneCount = LaneCountFromFormat(vform);
   2197   int laneSizeInBits  = LaneSizeInBitsFromFormat(vform);
   2198   uint64_t reversed_value;
   2199   uint64_t value;
   2200   for (int i = 0; i < laneCount; i++) {
   2201     value = src.Uint(vform, i);
   2202     reversed_value = 0;
   2203     for (int j = 0; j < laneSizeInBits; j++) {
   2204       reversed_value = (reversed_value << 1) | (value & 1);
   2205       value >>= 1;
   2206     }
   2207     result[i] = reversed_value;
   2208   }
   2209 
   2210   dst.ClearForWrite(vform);
   2211   for (int i = 0; i < laneCount; ++i) {
   2212     dst.SetUint(vform, i, result[i]);
   2213   }
   2214   return dst;
   2215 }
   2216 
   2217 
   2218 LogicVRegister Simulator::rev(VectorFormat vform,
   2219                               LogicVRegister dst,
   2220                               const LogicVRegister& src,
   2221                               int revSize) {
   2222   uint64_t result[16];
   2223   int laneCount = LaneCountFromFormat(vform);
   2224   int laneSize = LaneSizeInBytesFromFormat(vform);
   2225   int lanesPerLoop =  revSize / laneSize;
   2226   for (int i = 0; i < laneCount; i += lanesPerLoop) {
   2227     for (int j = 0; j < lanesPerLoop; j++) {
   2228       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
   2229     }
   2230   }
   2231   dst.ClearForWrite(vform);
   2232   for (int i = 0; i < laneCount; ++i) {
   2233     dst.SetUint(vform, i, result[i]);
   2234   }
   2235   return dst;
   2236 }
   2237 
   2238 
   2239 LogicVRegister Simulator::rev16(VectorFormat vform,
   2240                                 LogicVRegister dst,
   2241                                 const LogicVRegister& src) {
   2242   return rev(vform, dst, src, 2);
   2243 }
   2244 
   2245 
   2246 LogicVRegister Simulator::rev32(VectorFormat vform,
   2247                                 LogicVRegister dst,
   2248                                 const LogicVRegister& src) {
   2249   return rev(vform, dst, src, 4);
   2250 }
   2251 
   2252 
   2253 LogicVRegister Simulator::rev64(VectorFormat vform,
   2254                                 LogicVRegister dst,
   2255                                 const LogicVRegister& src) {
   2256   return rev(vform, dst, src, 8);
   2257 }
   2258 
   2259 
   2260 LogicVRegister Simulator::addlp(VectorFormat vform,
   2261                                  LogicVRegister dst,
   2262                                  const LogicVRegister& src,
   2263                                  bool is_signed,
   2264                                  bool do_accumulate) {
   2265   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
   2266 
   2267   int64_t  sr[16];
   2268   uint64_t ur[16];
   2269 
   2270   int laneCount = LaneCountFromFormat(vform);
   2271   for (int i = 0; i < laneCount; ++i) {
   2272     if (is_signed) {
   2273       sr[i] = src.Int(vformsrc, 2 * i) + src.Int(vformsrc, 2 * i + 1);
   2274     } else {
   2275       ur[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
   2276     }
   2277   }
   2278 
   2279   dst.ClearForWrite(vform);
   2280   for (int i = 0; i < laneCount; ++i) {
   2281     if (do_accumulate) {
   2282       if (is_signed) {
   2283         dst.SetInt(vform, i, dst.Int(vform, i) + sr[i]);
   2284       } else {
   2285         dst.SetUint(vform, i, dst.Uint(vform, i) + ur[i]);
   2286       }
   2287     } else {
   2288       if (is_signed) {
   2289         dst.SetInt(vform, i, sr[i]);
   2290       } else {
   2291         dst.SetUint(vform, i, ur[i]);
   2292       }
   2293     }
   2294   }
   2295 
   2296   return dst;
   2297 }
   2298 
   2299 
   2300 LogicVRegister Simulator::saddlp(VectorFormat vform,
   2301                                  LogicVRegister dst,
   2302                                  const LogicVRegister& src) {
   2303   return addlp(vform, dst, src, true, false);
   2304 }
   2305 
   2306 
   2307 LogicVRegister Simulator::uaddlp(VectorFormat vform,
   2308                                  LogicVRegister dst,
   2309                                  const LogicVRegister& src) {
   2310   return addlp(vform, dst, src, false, false);
   2311 }
   2312 
   2313 
   2314 LogicVRegister Simulator::sadalp(VectorFormat vform,
   2315                                  LogicVRegister dst,
   2316                                  const LogicVRegister& src) {
   2317   return addlp(vform, dst, src, true, true);
   2318 }
   2319 
   2320 
   2321 LogicVRegister Simulator::uadalp(VectorFormat vform,
   2322                                  LogicVRegister dst,
   2323                                  const LogicVRegister& src) {
   2324   return addlp(vform, dst, src, false, true);
   2325 }
   2326 
   2327 
   2328 LogicVRegister Simulator::ext(VectorFormat vform,
   2329                               LogicVRegister dst,
   2330                               const LogicVRegister& src1,
   2331                               const LogicVRegister& src2,
   2332                               int index) {
   2333   uint8_t result[16];
   2334   int laneCount = LaneCountFromFormat(vform);
   2335   for (int i = 0; i < laneCount - index; ++i) {
   2336     result[i] = src1.Uint(vform, i + index);
   2337   }
   2338   for (int i = 0; i < index; ++i) {
   2339     result[laneCount - index + i] = src2.Uint(vform, i);
   2340   }
   2341   dst.ClearForWrite(vform);
   2342   for (int i = 0; i < laneCount; ++i) {
   2343     dst.SetUint(vform, i, result[i]);
   2344   }
   2345   return dst;
   2346 }
   2347 
   2348 
   2349 LogicVRegister Simulator::dup_element(VectorFormat vform,
   2350                                       LogicVRegister dst,
   2351                                       const LogicVRegister& src,
   2352                                       int src_index) {
   2353   int laneCount = LaneCountFromFormat(vform);
   2354   uint64_t value = src.Uint(vform, src_index);
   2355   dst.ClearForWrite(vform);
   2356   for (int i = 0; i < laneCount; ++i) {
   2357     dst.SetUint(vform, i, value);
   2358   }
   2359   return dst;
   2360 }
   2361 
   2362 
   2363 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
   2364                                         LogicVRegister dst,
   2365                                         uint64_t imm) {
   2366   int laneCount = LaneCountFromFormat(vform);
   2367   uint64_t value = imm & MaxUintFromFormat(vform);
   2368   dst.ClearForWrite(vform);
   2369   for (int i = 0; i < laneCount; ++i) {
   2370     dst.SetUint(vform, i, value);
   2371   }
   2372   return dst;
   2373 }
   2374 
   2375 
   2376 LogicVRegister Simulator::ins_element(VectorFormat vform,
   2377                                       LogicVRegister dst,
   2378                                       int dst_index,
   2379                                       const LogicVRegister& src,
   2380                                       int src_index) {
   2381   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
   2382   return dst;
   2383 }
   2384 
   2385 
   2386 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
   2387                                         LogicVRegister dst,
   2388                                         int dst_index,
   2389                                         uint64_t imm) {
   2390   uint64_t value = imm & MaxUintFromFormat(vform);
   2391   dst.SetUint(vform, dst_index, value);
   2392   return dst;
   2393 }
   2394 
   2395 
   2396 LogicVRegister Simulator::movi(VectorFormat vform,
   2397                                LogicVRegister dst,
   2398                                uint64_t imm) {
   2399   int laneCount = LaneCountFromFormat(vform);
   2400   dst.ClearForWrite(vform);
   2401   for (int i = 0; i < laneCount; ++i) {
   2402     dst.SetUint(vform, i, imm);
   2403   }
   2404   return dst;
   2405 }
   2406 
   2407 
   2408 LogicVRegister Simulator::mvni(VectorFormat vform,
   2409                                LogicVRegister dst,
   2410                                uint64_t imm) {
   2411   int laneCount = LaneCountFromFormat(vform);
   2412   dst.ClearForWrite(vform);
   2413   for (int i = 0; i < laneCount; ++i) {
   2414     dst.SetUint(vform, i, ~imm);
   2415   }
   2416   return dst;
   2417 }
   2418 
   2419 
   2420 LogicVRegister Simulator::orr(VectorFormat vform,
   2421                               LogicVRegister dst,
   2422                               const LogicVRegister& src,
   2423                               uint64_t imm) {
   2424   uint64_t result[16];
   2425   int laneCount = LaneCountFromFormat(vform);
   2426   for (int i = 0; i < laneCount; ++i) {
   2427     result[i] = src.Uint(vform, i) | imm;
   2428   }
   2429   dst.ClearForWrite(vform);
   2430   for (int i = 0; i < laneCount; ++i) {
   2431     dst.SetUint(vform, i, result[i]);
   2432   }
   2433   return dst;
   2434 }
   2435 
   2436 
   2437 LogicVRegister Simulator::uxtl(VectorFormat vform,
   2438                                LogicVRegister dst,
   2439                                const LogicVRegister& src) {
   2440   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2441 
   2442   dst.ClearForWrite(vform);
   2443   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2444     dst.SetUint(vform, i, src.Uint(vform_half, i));
   2445   }
   2446   return dst;
   2447 }
   2448 
   2449 
   2450 LogicVRegister Simulator::sxtl(VectorFormat vform,
   2451                                LogicVRegister dst,
   2452                                const LogicVRegister& src) {
   2453   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2454 
   2455   dst.ClearForWrite(vform);
   2456   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2457     dst.SetInt(vform, i, src.Int(vform_half, i));
   2458   }
   2459   return dst;
   2460 }
   2461 
   2462 
   2463 LogicVRegister Simulator::uxtl2(VectorFormat vform,
   2464                                 LogicVRegister dst,
   2465                                 const LogicVRegister& src) {
   2466   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2467   int lane_count = LaneCountFromFormat(vform);
   2468 
   2469   dst.ClearForWrite(vform);
   2470   for (int i = 0; i < lane_count; i++) {
   2471     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
   2472   }
   2473   return dst;
   2474 }
   2475 
   2476 
   2477 LogicVRegister Simulator::sxtl2(VectorFormat vform,
   2478                                 LogicVRegister dst,
   2479                                 const LogicVRegister& src) {
   2480   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2481   int lane_count = LaneCountFromFormat(vform);
   2482 
   2483   dst.ClearForWrite(vform);
   2484   for (int i = 0; i < lane_count; i++) {
   2485     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
   2486   }
   2487   return dst;
   2488 }
   2489 
   2490 
   2491 LogicVRegister Simulator::shrn(VectorFormat vform,
   2492                                LogicVRegister dst,
   2493                                const LogicVRegister& src,
   2494                                int shift) {
   2495   SimVRegister temp;
   2496   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
   2497   VectorFormat vform_dst = vform;
   2498   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
   2499   return extractnarrow(vform_dst, dst, false, shifted_src, false);
   2500 }
   2501 
   2502 
   2503 LogicVRegister Simulator::shrn2(VectorFormat vform,
   2504                                 LogicVRegister dst,
   2505                                 const LogicVRegister& src,
   2506                                 int shift) {
   2507   SimVRegister temp;
   2508   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2509   VectorFormat vformdst = vform;
   2510   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
   2511   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2512 }
   2513 
   2514 
   2515 LogicVRegister Simulator::rshrn(VectorFormat vform,
   2516                                 LogicVRegister dst,
   2517                                 const LogicVRegister& src,
   2518                                 int shift) {
   2519   SimVRegister temp;
   2520   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2521   VectorFormat vformdst = vform;
   2522   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2523   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2524 }
   2525 
   2526 
   2527 LogicVRegister Simulator::rshrn2(VectorFormat vform,
   2528                                  LogicVRegister dst,
   2529                                  const LogicVRegister& src,
   2530                                  int shift) {
   2531   SimVRegister temp;
   2532   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2533   VectorFormat vformdst = vform;
   2534   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2535   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2536 }
   2537 
   2538 
   2539 LogicVRegister Simulator::tbl(VectorFormat vform,
   2540                               LogicVRegister dst,
   2541                               const LogicVRegister& tab,
   2542                               const LogicVRegister& ind) {
   2543     movi(vform, dst, 0);
   2544     return tbx(vform, dst, tab, ind);
   2545 }
   2546 
   2547 
   2548 LogicVRegister Simulator::tbl(VectorFormat vform,
   2549                               LogicVRegister dst,
   2550                               const LogicVRegister& tab,
   2551                               const LogicVRegister& tab2,
   2552                               const LogicVRegister& ind) {
   2553     movi(vform, dst, 0);
   2554     return tbx(vform, dst, tab, tab2, ind);
   2555 }
   2556 
   2557 
   2558 LogicVRegister Simulator::tbl(VectorFormat vform,
   2559                               LogicVRegister dst,
   2560                               const LogicVRegister& tab,
   2561                               const LogicVRegister& tab2,
   2562                               const LogicVRegister& tab3,
   2563                               const LogicVRegister& ind) {
   2564     movi(vform, dst, 0);
   2565     return tbx(vform, dst, tab, tab2, tab3, ind);
   2566 }
   2567 
   2568 
   2569 LogicVRegister Simulator::tbl(VectorFormat vform,
   2570                               LogicVRegister dst,
   2571                               const LogicVRegister& tab,
   2572                               const LogicVRegister& tab2,
   2573                               const LogicVRegister& tab3,
   2574                               const LogicVRegister& tab4,
   2575                               const LogicVRegister& ind) {
   2576     movi(vform, dst, 0);
   2577     return tbx(vform, dst, tab, tab2, tab3, tab4, ind);
   2578 }
   2579 
   2580 
   2581 LogicVRegister Simulator::tbx(VectorFormat vform,
   2582                               LogicVRegister dst,
   2583                               const LogicVRegister& tab,
   2584                               const LogicVRegister& ind) {
   2585   dst.ClearForWrite(vform);
   2586   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2587     unsigned j = ind.Uint(vform, i);
   2588     switch (j >> 4) {
   2589       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2590     }
   2591   }
   2592   return dst;
   2593 }
   2594 
   2595 
   2596 LogicVRegister Simulator::tbx(VectorFormat vform,
   2597                               LogicVRegister dst,
   2598                               const LogicVRegister& tab,
   2599                               const LogicVRegister& tab2,
   2600                               const LogicVRegister& ind) {
   2601   dst.ClearForWrite(vform);
   2602   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2603     unsigned j = ind.Uint(vform, i);
   2604     switch (j >> 4) {
   2605       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2606       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2607     }
   2608   }
   2609   return dst;
   2610 }
   2611 
   2612 
   2613 LogicVRegister Simulator::tbx(VectorFormat vform,
   2614                               LogicVRegister dst,
   2615                               const LogicVRegister& tab,
   2616                               const LogicVRegister& tab2,
   2617                               const LogicVRegister& tab3,
   2618                               const LogicVRegister& ind) {
   2619   dst.ClearForWrite(vform);
   2620   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2621     unsigned j = ind.Uint(vform, i);
   2622     switch (j >> 4) {
   2623       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2624       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2625       case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
   2626     }
   2627   }
   2628   return dst;
   2629 }
   2630 
   2631 
   2632 LogicVRegister Simulator::tbx(VectorFormat vform,
   2633                               LogicVRegister dst,
   2634                               const LogicVRegister& tab,
   2635                               const LogicVRegister& tab2,
   2636                               const LogicVRegister& tab3,
   2637                               const LogicVRegister& tab4,
   2638                               const LogicVRegister& ind) {
   2639   dst.ClearForWrite(vform);
   2640   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2641     unsigned j = ind.Uint(vform, i);
   2642     switch (j >> 4) {
   2643       case 0: dst.SetUint(vform, i, tab.Uint(kFormat16B, j & 15)); break;
   2644       case 1: dst.SetUint(vform, i, tab2.Uint(kFormat16B, j & 15)); break;
   2645       case 2: dst.SetUint(vform, i, tab3.Uint(kFormat16B, j & 15)); break;
   2646       case 3: dst.SetUint(vform, i, tab4.Uint(kFormat16B, j & 15)); break;
   2647     }
   2648   }
   2649   return dst;
   2650 }
   2651 
   2652 
   2653 LogicVRegister Simulator::uqshrn(VectorFormat vform,
   2654                                  LogicVRegister dst,
   2655                                  const LogicVRegister& src,
   2656                                  int shift) {
   2657   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2658 }
   2659 
   2660 
   2661 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
   2662                                   LogicVRegister dst,
   2663                                   const LogicVRegister& src,
   2664                                   int shift) {
   2665   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2666 }
   2667 
   2668 
   2669 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
   2670                                   LogicVRegister dst,
   2671                                   const LogicVRegister& src,
   2672                                   int shift) {
   2673   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2674 }
   2675 
   2676 
   2677 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
   2678                                    LogicVRegister dst,
   2679                                    const LogicVRegister& src,
   2680                                    int shift) {
   2681   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2682 }
   2683 
   2684 
   2685 LogicVRegister Simulator::sqshrn(VectorFormat vform,
   2686                                  LogicVRegister dst,
   2687                                  const LogicVRegister& src,
   2688                                  int shift) {
   2689   SimVRegister temp;
   2690   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2691   VectorFormat vformdst = vform;
   2692   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2693   return sqxtn(vformdst, dst, shifted_src);
   2694 }
   2695 
   2696 
   2697 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
   2698                                   LogicVRegister dst,
   2699                                   const LogicVRegister& src,
   2700                                   int shift) {
   2701   SimVRegister temp;
   2702   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2703   VectorFormat vformdst = vform;
   2704   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2705   return sqxtn(vformdst, dst, shifted_src);
   2706 }
   2707 
   2708 
   2709 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
   2710                                   LogicVRegister dst,
   2711                                   const LogicVRegister& src,
   2712                                   int shift) {
   2713   SimVRegister temp;
   2714   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2715   VectorFormat vformdst = vform;
   2716   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2717   return sqxtn(vformdst, dst, shifted_src);
   2718 }
   2719 
   2720 
   2721 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
   2722                                    LogicVRegister dst,
   2723                                    const LogicVRegister& src,
   2724                                    int shift) {
   2725   SimVRegister temp;
   2726   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2727   VectorFormat vformdst = vform;
   2728   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2729   return sqxtn(vformdst, dst, shifted_src);
   2730 }
   2731 
   2732 
   2733 LogicVRegister Simulator::sqshrun(VectorFormat vform,
   2734                                   LogicVRegister dst,
   2735                                   const LogicVRegister& src,
   2736                                   int shift) {
   2737   SimVRegister temp;
   2738   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2739   VectorFormat vformdst = vform;
   2740   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2741   return sqxtun(vformdst, dst, shifted_src);
   2742 }
   2743 
   2744 
   2745 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
   2746                                    LogicVRegister dst,
   2747                                    const LogicVRegister& src,
   2748                                    int shift) {
   2749   SimVRegister temp;
   2750   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2751   VectorFormat vformdst = vform;
   2752   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2753   return sqxtun(vformdst, dst, shifted_src);
   2754 }
   2755 
   2756 
   2757 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
   2758                                    LogicVRegister dst,
   2759                                    const LogicVRegister& src,
   2760                                    int shift) {
   2761   SimVRegister temp;
   2762   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2763   VectorFormat vformdst = vform;
   2764   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2765   return sqxtun(vformdst, dst, shifted_src);
   2766 }
   2767 
   2768 
   2769 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
   2770                                     LogicVRegister dst,
   2771                                     const LogicVRegister& src,
   2772                                     int shift) {
   2773   SimVRegister temp;
   2774   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2775   VectorFormat vformdst = vform;
   2776   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2777   return sqxtun(vformdst, dst, shifted_src);
   2778 }
   2779 
   2780 
   2781 LogicVRegister Simulator::uaddl(VectorFormat vform,
   2782                                 LogicVRegister dst,
   2783                                 const LogicVRegister& src1,
   2784                                 const LogicVRegister& src2) {
   2785   SimVRegister temp1, temp2;
   2786   uxtl(vform, temp1, src1);
   2787   uxtl(vform, temp2, src2);
   2788   add(vform, dst, temp1, temp2);
   2789   return dst;
   2790 }
   2791 
   2792 
   2793 LogicVRegister Simulator::uaddl2(VectorFormat vform,
   2794                                  LogicVRegister dst,
   2795                                  const LogicVRegister& src1,
   2796                                  const LogicVRegister& src2) {
   2797   SimVRegister temp1, temp2;
   2798   uxtl2(vform, temp1, src1);
   2799   uxtl2(vform, temp2, src2);
   2800   add(vform, dst, temp1, temp2);
   2801   return dst;
   2802 }
   2803 
   2804 
   2805 LogicVRegister Simulator::uaddw(VectorFormat vform,
   2806                                 LogicVRegister dst,
   2807                                 const LogicVRegister& src1,
   2808                                 const LogicVRegister& src2) {
   2809   SimVRegister temp;
   2810   uxtl(vform, temp, src2);
   2811   add(vform, dst, src1, temp);
   2812   return dst;
   2813 }
   2814 
   2815 
   2816 LogicVRegister Simulator::uaddw2(VectorFormat vform,
   2817                                  LogicVRegister dst,
   2818                                  const LogicVRegister& src1,
   2819                                  const LogicVRegister& src2) {
   2820   SimVRegister temp;
   2821   uxtl2(vform, temp, src2);
   2822   add(vform, dst, src1, temp);
   2823   return dst;
   2824 }
   2825 
   2826 
   2827 LogicVRegister Simulator::saddl(VectorFormat vform,
   2828                                 LogicVRegister dst,
   2829                                 const LogicVRegister& src1,
   2830                                 const LogicVRegister& src2) {
   2831   SimVRegister temp1, temp2;
   2832   sxtl(vform, temp1, src1);
   2833   sxtl(vform, temp2, src2);
   2834   add(vform, dst, temp1, temp2);
   2835   return dst;
   2836 }
   2837 
   2838 
   2839 LogicVRegister Simulator::saddl2(VectorFormat vform,
   2840                                  LogicVRegister dst,
   2841                                  const LogicVRegister& src1,
   2842                                  const LogicVRegister& src2) {
   2843   SimVRegister temp1, temp2;
   2844   sxtl2(vform, temp1, src1);
   2845   sxtl2(vform, temp2, src2);
   2846   add(vform, dst, temp1, temp2);
   2847   return dst;
   2848 }
   2849 
   2850 
   2851 LogicVRegister Simulator::saddw(VectorFormat vform,
   2852                                 LogicVRegister dst,
   2853                                 const LogicVRegister& src1,
   2854                                 const LogicVRegister& src2) {
   2855   SimVRegister temp;
   2856   sxtl(vform, temp, src2);
   2857   add(vform, dst, src1, temp);
   2858   return dst;
   2859 }
   2860 
   2861 
   2862 LogicVRegister Simulator::saddw2(VectorFormat vform,
   2863                                  LogicVRegister dst,
   2864                                  const LogicVRegister& src1,
   2865                                  const LogicVRegister& src2) {
   2866   SimVRegister temp;
   2867   sxtl2(vform, temp, src2);
   2868   add(vform, dst, src1, temp);
   2869   return dst;
   2870 }
   2871 
   2872 
   2873 LogicVRegister Simulator::usubl(VectorFormat vform,
   2874                                 LogicVRegister dst,
   2875                                 const LogicVRegister& src1,
   2876                                 const LogicVRegister& src2) {
   2877   SimVRegister temp1, temp2;
   2878   uxtl(vform, temp1, src1);
   2879   uxtl(vform, temp2, src2);
   2880   sub(vform, dst, temp1, temp2);
   2881   return dst;
   2882 }
   2883 
   2884 
   2885 LogicVRegister Simulator::usubl2(VectorFormat vform,
   2886                                  LogicVRegister dst,
   2887                                  const LogicVRegister& src1,
   2888                                  const LogicVRegister& src2) {
   2889   SimVRegister temp1, temp2;
   2890   uxtl2(vform, temp1, src1);
   2891   uxtl2(vform, temp2, src2);
   2892   sub(vform, dst, temp1, temp2);
   2893   return dst;
   2894 }
   2895 
   2896 
   2897 LogicVRegister Simulator::usubw(VectorFormat vform,
   2898                                 LogicVRegister dst,
   2899                                 const LogicVRegister& src1,
   2900                                 const LogicVRegister& src2) {
   2901   SimVRegister temp;
   2902   uxtl(vform, temp, src2);
   2903   sub(vform, dst, src1, temp);
   2904   return dst;
   2905 }
   2906 
   2907 
   2908 LogicVRegister Simulator::usubw2(VectorFormat vform,
   2909                                  LogicVRegister dst,
   2910                                  const LogicVRegister& src1,
   2911                                  const LogicVRegister& src2) {
   2912   SimVRegister temp;
   2913   uxtl2(vform, temp, src2);
   2914   sub(vform, dst, src1, temp);
   2915   return dst;
   2916 }
   2917 
   2918 
   2919 LogicVRegister Simulator::ssubl(VectorFormat vform,
   2920                                 LogicVRegister dst,
   2921                                 const LogicVRegister& src1,
   2922                                 const LogicVRegister& src2) {
   2923   SimVRegister temp1, temp2;
   2924   sxtl(vform, temp1, src1);
   2925   sxtl(vform, temp2, src2);
   2926   sub(vform, dst, temp1, temp2);
   2927   return dst;
   2928 }
   2929 
   2930 
   2931 LogicVRegister Simulator::ssubl2(VectorFormat vform,
   2932                                  LogicVRegister dst,
   2933                                  const LogicVRegister& src1,
   2934                                  const LogicVRegister& src2) {
   2935   SimVRegister temp1, temp2;
   2936   sxtl2(vform, temp1, src1);
   2937   sxtl2(vform, temp2, src2);
   2938   sub(vform, dst, temp1, temp2);
   2939   return dst;
   2940 }
   2941 
   2942 
   2943 LogicVRegister Simulator::ssubw(VectorFormat vform,
   2944                                 LogicVRegister dst,
   2945                                 const LogicVRegister& src1,
   2946                                 const LogicVRegister& src2) {
   2947   SimVRegister temp;
   2948   sxtl(vform, temp, src2);
   2949   sub(vform, dst, src1, temp);
   2950   return dst;
   2951 }
   2952 
   2953 
   2954 LogicVRegister Simulator::ssubw2(VectorFormat vform,
   2955                                  LogicVRegister dst,
   2956                                  const LogicVRegister& src1,
   2957                                  const LogicVRegister& src2) {
   2958   SimVRegister temp;
   2959   sxtl2(vform, temp, src2);
   2960   sub(vform, dst, src1, temp);
   2961   return dst;
   2962 }
   2963 
   2964 
   2965 LogicVRegister Simulator::uabal(VectorFormat vform,
   2966                                 LogicVRegister dst,
   2967                                 const LogicVRegister& src1,
   2968                                 const LogicVRegister& src2) {
   2969   SimVRegister temp1, temp2;
   2970   uxtl(vform, temp1, src1);
   2971   uxtl(vform, temp2, src2);
   2972   uaba(vform, dst, temp1, temp2);
   2973   return dst;
   2974 }
   2975 
   2976 
   2977 LogicVRegister Simulator::uabal2(VectorFormat vform,
   2978                                  LogicVRegister dst,
   2979                                  const LogicVRegister& src1,
   2980                                  const LogicVRegister& src2) {
   2981   SimVRegister temp1, temp2;
   2982   uxtl2(vform, temp1, src1);
   2983   uxtl2(vform, temp2, src2);
   2984   uaba(vform, dst, temp1, temp2);
   2985   return dst;
   2986 }
   2987 
   2988 
   2989 LogicVRegister Simulator::sabal(VectorFormat vform,
   2990                                 LogicVRegister dst,
   2991                                 const LogicVRegister& src1,
   2992                                 const LogicVRegister& src2) {
   2993   SimVRegister temp1, temp2;
   2994   sxtl(vform, temp1, src1);
   2995   sxtl(vform, temp2, src2);
   2996   saba(vform, dst, temp1, temp2);
   2997   return dst;
   2998 }
   2999 
   3000 
   3001 LogicVRegister Simulator::sabal2(VectorFormat vform,
   3002                                  LogicVRegister dst,
   3003                                  const LogicVRegister& src1,
   3004                                  const LogicVRegister& src2) {
   3005   SimVRegister temp1, temp2;
   3006   sxtl2(vform, temp1, src1);
   3007   sxtl2(vform, temp2, src2);
   3008   saba(vform, dst, temp1, temp2);
   3009   return dst;
   3010 }
   3011 
   3012 
   3013 LogicVRegister Simulator::uabdl(VectorFormat vform,
   3014                                 LogicVRegister dst,
   3015                                 const LogicVRegister& src1,
   3016                                 const LogicVRegister& src2) {
   3017   SimVRegister temp1, temp2;
   3018   uxtl(vform, temp1, src1);
   3019   uxtl(vform, temp2, src2);
   3020   absdiff(vform, dst, temp1, temp2, false);
   3021   return dst;
   3022 }
   3023 
   3024 
   3025 LogicVRegister Simulator::uabdl2(VectorFormat vform,
   3026                                  LogicVRegister dst,
   3027                                  const LogicVRegister& src1,
   3028                                  const LogicVRegister& src2) {
   3029   SimVRegister temp1, temp2;
   3030   uxtl2(vform, temp1, src1);
   3031   uxtl2(vform, temp2, src2);
   3032   absdiff(vform, dst, temp1, temp2, false);
   3033   return dst;
   3034 }
   3035 
   3036 
   3037 LogicVRegister Simulator::sabdl(VectorFormat vform,
   3038                                 LogicVRegister dst,
   3039                                 const LogicVRegister& src1,
   3040                                 const LogicVRegister& src2) {
   3041   SimVRegister temp1, temp2;
   3042   sxtl(vform, temp1, src1);
   3043   sxtl(vform, temp2, src2);
   3044   absdiff(vform, dst, temp1, temp2, true);
   3045   return dst;
   3046 }
   3047 
   3048 
   3049 LogicVRegister Simulator::sabdl2(VectorFormat vform,
   3050                                  LogicVRegister dst,
   3051                                  const LogicVRegister& src1,
   3052                                  const LogicVRegister& src2) {
   3053   SimVRegister temp1, temp2;
   3054   sxtl2(vform, temp1, src1);
   3055   sxtl2(vform, temp2, src2);
   3056   absdiff(vform, dst, temp1, temp2, true);
   3057   return dst;
   3058 }
   3059 
   3060 
   3061 LogicVRegister Simulator::umull(VectorFormat vform,
   3062                                 LogicVRegister dst,
   3063                                 const LogicVRegister& src1,
   3064                                 const LogicVRegister& src2) {
   3065   SimVRegister temp1, temp2;
   3066   uxtl(vform, temp1, src1);
   3067   uxtl(vform, temp2, src2);
   3068   mul(vform, dst, temp1, temp2);
   3069   return dst;
   3070 }
   3071 
   3072 
   3073 LogicVRegister Simulator::umull2(VectorFormat vform,
   3074                                  LogicVRegister dst,
   3075                                  const LogicVRegister& src1,
   3076                                  const LogicVRegister& src2) {
   3077   SimVRegister temp1, temp2;
   3078   uxtl2(vform, temp1, src1);
   3079   uxtl2(vform, temp2, src2);
   3080   mul(vform, dst, temp1, temp2);
   3081   return dst;
   3082 }
   3083 
   3084 
   3085 LogicVRegister Simulator::smull(VectorFormat vform,
   3086                                 LogicVRegister dst,
   3087                                 const LogicVRegister& src1,
   3088                                 const LogicVRegister& src2) {
   3089   SimVRegister temp1, temp2;
   3090   sxtl(vform, temp1, src1);
   3091   sxtl(vform, temp2, src2);
   3092   mul(vform, dst, temp1, temp2);
   3093   return dst;
   3094 }
   3095 
   3096 
   3097 LogicVRegister Simulator::smull2(VectorFormat vform,
   3098                                  LogicVRegister dst,
   3099                                  const LogicVRegister& src1,
   3100                                  const LogicVRegister& src2) {
   3101   SimVRegister temp1, temp2;
   3102   sxtl2(vform, temp1, src1);
   3103   sxtl2(vform, temp2, src2);
   3104   mul(vform, dst, temp1, temp2);
   3105   return dst;
   3106 }
   3107 
   3108 
   3109 LogicVRegister Simulator::umlsl(VectorFormat vform,
   3110                                 LogicVRegister dst,
   3111                                 const LogicVRegister& src1,
   3112                                 const LogicVRegister& src2) {
   3113   SimVRegister temp1, temp2;
   3114   uxtl(vform, temp1, src1);
   3115   uxtl(vform, temp2, src2);
   3116   mls(vform, dst, temp1, temp2);
   3117   return dst;
   3118 }
   3119 
   3120 
   3121 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   3122                                  LogicVRegister dst,
   3123                                  const LogicVRegister& src1,
   3124                                  const LogicVRegister& src2) {
   3125   SimVRegister temp1, temp2;
   3126   uxtl2(vform, temp1, src1);
   3127   uxtl2(vform, temp2, src2);
   3128   mls(vform, dst, temp1, temp2);
   3129   return dst;
   3130 }
   3131 
   3132 
   3133 LogicVRegister Simulator::smlsl(VectorFormat vform,
   3134                                 LogicVRegister dst,
   3135                                 const LogicVRegister& src1,
   3136                                 const LogicVRegister& src2) {
   3137   SimVRegister temp1, temp2;
   3138   sxtl(vform, temp1, src1);
   3139   sxtl(vform, temp2, src2);
   3140   mls(vform, dst, temp1, temp2);
   3141   return dst;
   3142 }
   3143 
   3144 
   3145 LogicVRegister Simulator::smlsl2(VectorFormat vform,
   3146                                  LogicVRegister dst,
   3147                                  const LogicVRegister& src1,
   3148                                  const LogicVRegister& src2) {
   3149   SimVRegister temp1, temp2;
   3150   sxtl2(vform, temp1, src1);
   3151   sxtl2(vform, temp2, src2);
   3152   mls(vform, dst, temp1, temp2);
   3153   return dst;
   3154 }
   3155 
   3156 
   3157 LogicVRegister Simulator::umlal(VectorFormat vform,
   3158                                 LogicVRegister dst,
   3159                                 const LogicVRegister& src1,
   3160                                 const LogicVRegister& src2) {
   3161   SimVRegister temp1, temp2;
   3162   uxtl(vform, temp1, src1);
   3163   uxtl(vform, temp2, src2);
   3164   mla(vform, dst, temp1, temp2);
   3165   return dst;
   3166 }
   3167 
   3168 
   3169 LogicVRegister Simulator::umlal2(VectorFormat vform,
   3170                                  LogicVRegister dst,
   3171                                  const LogicVRegister& src1,
   3172                                  const LogicVRegister& src2) {
   3173   SimVRegister temp1, temp2;
   3174   uxtl2(vform, temp1, src1);
   3175   uxtl2(vform, temp2, src2);
   3176   mla(vform, dst, temp1, temp2);
   3177   return dst;
   3178 }
   3179 
   3180 
   3181 LogicVRegister Simulator::smlal(VectorFormat vform,
   3182                                 LogicVRegister dst,
   3183                                 const LogicVRegister& src1,
   3184                                 const LogicVRegister& src2) {
   3185   SimVRegister temp1, temp2;
   3186   sxtl(vform, temp1, src1);
   3187   sxtl(vform, temp2, src2);
   3188   mla(vform, dst, temp1, temp2);
   3189   return dst;
   3190 }
   3191 
   3192 
   3193 LogicVRegister Simulator::smlal2(VectorFormat vform,
   3194                                  LogicVRegister dst,
   3195                                  const LogicVRegister& src1,
   3196                                  const LogicVRegister& src2) {
   3197   SimVRegister temp1, temp2;
   3198   sxtl2(vform, temp1, src1);
   3199   sxtl2(vform, temp2, src2);
   3200   mla(vform, dst, temp1, temp2);
   3201   return dst;
   3202 }
   3203 
   3204 
   3205 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   3206                                   LogicVRegister dst,
   3207                                   const LogicVRegister& src1,
   3208                                   const LogicVRegister& src2) {
   3209   SimVRegister temp;
   3210   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3211   return add(vform, dst, dst, product).SignedSaturate(vform);
   3212 }
   3213 
   3214 
   3215 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   3216                                   LogicVRegister dst,
   3217                                   const LogicVRegister& src1,
   3218                                   const LogicVRegister& src2) {
   3219   SimVRegister temp;
   3220   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3221   return add(vform, dst, dst, product).SignedSaturate(vform);
   3222 }
   3223 
   3224 
   3225 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   3226                                   LogicVRegister dst,
   3227                                   const LogicVRegister& src1,
   3228                                   const LogicVRegister& src2) {
   3229   SimVRegister temp;
   3230   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3231   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3232 }
   3233 
   3234 
   3235 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   3236                                   LogicVRegister dst,
   3237                                   const LogicVRegister& src1,
   3238                                   const LogicVRegister& src2) {
   3239   SimVRegister temp;
   3240   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3241   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3242 }
   3243 
   3244 
   3245 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   3246                                   LogicVRegister dst,
   3247                                   const LogicVRegister& src1,
   3248                                   const LogicVRegister& src2) {
   3249   SimVRegister temp;
   3250   LogicVRegister product = smull(vform, temp, src1, src2);
   3251   return add(vform, dst, product, product).SignedSaturate(vform);
   3252 }
   3253 
   3254 
   3255 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   3256                                   LogicVRegister dst,
   3257                                   const LogicVRegister& src1,
   3258                                   const LogicVRegister& src2) {
   3259   SimVRegister temp;
   3260   LogicVRegister product = smull2(vform, temp, src1, src2);
   3261   return add(vform, dst, product, product).SignedSaturate(vform);
   3262 }
   3263 
   3264 
   3265 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   3266                                    LogicVRegister dst,
   3267                                    const LogicVRegister& src1,
   3268                                    const LogicVRegister& src2,
   3269                                    bool round) {
   3270   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   3271   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   3272   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   3273 
   3274   int esize = LaneSizeInBitsFromFormat(vform);
   3275   int round_const = round ? (1 << (esize - 2)) : 0;
   3276   int64_t product;
   3277 
   3278   dst.ClearForWrite(vform);
   3279   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3280     product = src1.Int(vform, i) * src2.Int(vform, i);
   3281     product += round_const;
   3282     product = product >> (esize - 1);
   3283 
   3284     if (product > MaxIntFromFormat(vform)) {
   3285       product = MaxIntFromFormat(vform);
   3286     } else if (product < MinIntFromFormat(vform)) {
   3287       product = MinIntFromFormat(vform);
   3288     }
   3289     dst.SetInt(vform, i, product);
   3290   }
   3291   return dst;
   3292 }
   3293 
   3294 
   3295 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   3296                                   LogicVRegister dst,
   3297                                   const LogicVRegister& src1,
   3298                                   const LogicVRegister& src2) {
   3299   return sqrdmulh(vform, dst, src1, src2, false);
   3300 }
   3301 
   3302 
   3303 LogicVRegister Simulator::addhn(VectorFormat vform,
   3304                                 LogicVRegister dst,
   3305                                 const LogicVRegister& src1,
   3306                                 const LogicVRegister& src2) {
   3307   SimVRegister temp;
   3308   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3309   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3310   return dst;
   3311 }
   3312 
   3313 
   3314 LogicVRegister Simulator::addhn2(VectorFormat vform,
   3315                                  LogicVRegister dst,
   3316                                  const LogicVRegister& src1,
   3317                                  const LogicVRegister& src2) {
   3318   SimVRegister temp;
   3319   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3320   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3321   return dst;
   3322 }
   3323 
   3324 
   3325 LogicVRegister Simulator::raddhn(VectorFormat vform,
   3326                                  LogicVRegister dst,
   3327                                  const LogicVRegister& src1,
   3328                                  const LogicVRegister& src2) {
   3329   SimVRegister temp;
   3330   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3331   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3332   return dst;
   3333 }
   3334 
   3335 
   3336 LogicVRegister Simulator::raddhn2(VectorFormat vform,
   3337                                   LogicVRegister dst,
   3338                                   const LogicVRegister& src1,
   3339                                   const LogicVRegister& src2) {
   3340   SimVRegister temp;
   3341   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3342   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3343   return dst;
   3344 }
   3345 
   3346 
   3347 LogicVRegister Simulator::subhn(VectorFormat vform,
   3348                                 LogicVRegister dst,
   3349                                 const LogicVRegister& src1,
   3350                                 const LogicVRegister& src2) {
   3351   SimVRegister temp;
   3352   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3353   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3354   return dst;
   3355 }
   3356 
   3357 
   3358 LogicVRegister Simulator::subhn2(VectorFormat vform,
   3359                                  LogicVRegister dst,
   3360                                  const LogicVRegister& src1,
   3361                                  const LogicVRegister& src2) {
   3362   SimVRegister temp;
   3363   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3364   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3365   return dst;
   3366 }
   3367 
   3368 
   3369 LogicVRegister Simulator::rsubhn(VectorFormat vform,
   3370                                  LogicVRegister dst,
   3371                                  const LogicVRegister& src1,
   3372                                  const LogicVRegister& src2) {
   3373   SimVRegister temp;
   3374   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3375   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3376   return dst;
   3377 }
   3378 
   3379 
   3380 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
   3381                                   LogicVRegister dst,
   3382                                   const LogicVRegister& src1,
   3383                                   const LogicVRegister& src2) {
   3384   SimVRegister temp;
   3385   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3386   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3387   return dst;
   3388 }
   3389 
   3390 
   3391 LogicVRegister Simulator::trn1(VectorFormat vform,
   3392                                LogicVRegister dst,
   3393                                const LogicVRegister& src1,
   3394                                const LogicVRegister& src2) {
   3395   uint64_t result[16];
   3396   int laneCount = LaneCountFromFormat(vform);
   3397   int pairs = laneCount / 2;
   3398   for (int i = 0; i < pairs; ++i) {
   3399     result[2 * i]       = src1.Uint(vform, 2 * i);
   3400     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   3401   }
   3402 
   3403   dst.ClearForWrite(vform);
   3404   for (int i = 0; i < laneCount; ++i) {
   3405     dst.SetUint(vform, i, result[i]);
   3406   }
   3407   return dst;
   3408 }
   3409 
   3410 
   3411 LogicVRegister Simulator::trn2(VectorFormat vform,
   3412                                LogicVRegister dst,
   3413                                const LogicVRegister& src1,
   3414                                const LogicVRegister& src2) {
   3415   uint64_t result[16];
   3416   int laneCount = LaneCountFromFormat(vform);
   3417   int pairs = laneCount / 2;
   3418   for (int i = 0; i < pairs; ++i) {
   3419     result[2 * i]       = src1.Uint(vform, (2 * i) + 1);
   3420     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   3421   }
   3422 
   3423   dst.ClearForWrite(vform);
   3424   for (int i = 0; i < laneCount; ++i) {
   3425     dst.SetUint(vform, i, result[i]);
   3426   }
   3427   return dst;
   3428 }
   3429 
   3430 
   3431 LogicVRegister Simulator::zip1(VectorFormat vform,
   3432                                LogicVRegister dst,
   3433                                const LogicVRegister& src1,
   3434                                const LogicVRegister& src2) {
   3435   uint64_t result[16];
   3436   int laneCount = LaneCountFromFormat(vform);
   3437   int pairs = laneCount / 2;
   3438   for (int i = 0; i < pairs; ++i) {
   3439     result[2 * i]       = src1.Uint(vform, i);
   3440     result[(2 * i) + 1] = src2.Uint(vform, i);
   3441   }
   3442 
   3443   dst.ClearForWrite(vform);
   3444   for (int i = 0; i < laneCount; ++i) {
   3445     dst.SetUint(vform, i, result[i]);
   3446   }
   3447   return dst;
   3448 }
   3449 
   3450 
   3451 LogicVRegister Simulator::zip2(VectorFormat vform,
   3452                                LogicVRegister dst,
   3453                                const LogicVRegister& src1,
   3454                                const LogicVRegister& src2) {
   3455   uint64_t result[16];
   3456   int laneCount = LaneCountFromFormat(vform);
   3457   int pairs = laneCount / 2;
   3458   for (int i = 0; i < pairs; ++i) {
   3459     result[2 * i]       = src1.Uint(vform, pairs + i);
   3460     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   3461   }
   3462 
   3463   dst.ClearForWrite(vform);
   3464   for (int i = 0; i < laneCount; ++i) {
   3465     dst.SetUint(vform, i, result[i]);
   3466   }
   3467   return dst;
   3468 }
   3469 
   3470 
   3471 LogicVRegister Simulator::uzp1(VectorFormat vform,
   3472                                LogicVRegister dst,
   3473                                const LogicVRegister& src1,
   3474                                const LogicVRegister& src2) {
   3475   uint64_t result[32];
   3476   int laneCount = LaneCountFromFormat(vform);
   3477   for (int i = 0; i < laneCount; ++i) {
   3478     result[i]             = src1.Uint(vform, i);
   3479     result[laneCount + i] = src2.Uint(vform, i);
   3480   }
   3481 
   3482   dst.ClearForWrite(vform);
   3483   for (int i = 0; i < laneCount; ++i) {
   3484     dst.SetUint(vform, i, result[2 * i]);
   3485   }
   3486   return dst;
   3487 }
   3488 
   3489 
   3490 LogicVRegister Simulator::uzp2(VectorFormat vform,
   3491                                LogicVRegister dst,
   3492                                const LogicVRegister& src1,
   3493                                const LogicVRegister& src2) {
   3494   uint64_t result[32];
   3495   int laneCount = LaneCountFromFormat(vform);
   3496   for (int i = 0; i < laneCount; ++i) {
   3497     result[i]             = src1.Uint(vform, i);
   3498     result[laneCount + i] = src2.Uint(vform, i);
   3499   }
   3500 
   3501   dst.ClearForWrite(vform);
   3502   for (int i = 0; i < laneCount; ++i) {
   3503     dst.SetUint(vform, i, result[ (2 * i) + 1]);
   3504   }
   3505   return dst;
   3506 }
   3507 
   3508 
   3509 template <typename T>
   3510 T Simulator::FPAdd(T op1, T op2) {
   3511   T result = FPProcessNaNs(op1, op2);
   3512   if (std::isnan(result)) return result;
   3513 
   3514   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
   3515     // inf + -inf returns the default NaN.
   3516     FPProcessException();
   3517     return FPDefaultNaN<T>();
   3518   } else {
   3519     // Other cases should be handled by standard arithmetic.
   3520     return op1 + op2;
   3521   }
   3522 }
   3523 
   3524 
   3525 template <typename T>
   3526 T Simulator::FPSub(T op1, T op2) {
   3527   // NaNs should be handled elsewhere.
   3528   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3529 
   3530   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
   3531     // inf - inf returns the default NaN.
   3532     FPProcessException();
   3533     return FPDefaultNaN<T>();
   3534   } else {
   3535     // Other cases should be handled by standard arithmetic.
   3536     return op1 - op2;
   3537   }
   3538 }
   3539 
   3540 
   3541 template <typename T>
   3542 T Simulator::FPMul(T op1, T op2) {
   3543   // NaNs should be handled elsewhere.
   3544   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3545 
   3546   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3547     // inf * 0.0 returns the default NaN.
   3548     FPProcessException();
   3549     return FPDefaultNaN<T>();
   3550   } else {
   3551     // Other cases should be handled by standard arithmetic.
   3552     return op1 * op2;
   3553   }
   3554 }
   3555 
   3556 
   3557 template<typename T>
   3558 T Simulator::FPMulx(T op1, T op2) {
   3559   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3560     // inf * 0.0 returns +/-2.0.
   3561     T two = 2.0;
   3562     return copysign(1.0, op1) * copysign(1.0, op2) * two;
   3563   }
   3564   return FPMul(op1, op2);
   3565 }
   3566 
   3567 
   3568 template<typename T>
   3569 T Simulator::FPMulAdd(T a, T op1, T op2) {
   3570   T result = FPProcessNaNs3(a, op1, op2);
   3571 
   3572   T sign_a = copysign(1.0, a);
   3573   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
   3574   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
   3575   bool operation_generates_nan =
   3576       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
   3577       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
   3578       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
   3579 
   3580   if (std::isnan(result)) {
   3581     // Generated NaNs override quiet NaNs propagated from a.
   3582     if (operation_generates_nan && IsQuietNaN(a)) {
   3583       FPProcessException();
   3584       return FPDefaultNaN<T>();
   3585     } else {
   3586       return result;
   3587     }
   3588   }
   3589 
   3590   // If the operation would produce a NaN, return the default NaN.
   3591   if (operation_generates_nan) {
   3592     FPProcessException();
   3593     return FPDefaultNaN<T>();
   3594   }
   3595 
   3596   // Work around broken fma implementations for exact zero results: The sign of
   3597   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   3598   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
   3599     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
   3600   }
   3601 
   3602   result = FusedMultiplyAdd(op1, op2, a);
   3603   VIXL_ASSERT(!std::isnan(result));
   3604 
   3605   // Work around broken fma implementations for rounded zero results: If a is
   3606   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   3607   if ((a == 0.0) && (result == 0.0)) {
   3608     return copysign(0.0, sign_prod);
   3609   }
   3610 
   3611   return result;
   3612 }
   3613 
   3614 
   3615 template <typename T>
   3616 T Simulator::FPDiv(T op1, T op2) {
   3617   // NaNs should be handled elsewhere.
   3618   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3619 
   3620   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
   3621     // inf / inf and 0.0 / 0.0 return the default NaN.
   3622     FPProcessException();
   3623     return FPDefaultNaN<T>();
   3624   } else {
   3625     if (op2 == 0.0) FPProcessException();
   3626 
   3627     // Other cases should be handled by standard arithmetic.
   3628     return op1 / op2;
   3629   }
   3630 }
   3631 
   3632 
   3633 template <typename T>
   3634 T Simulator::FPSqrt(T op) {
   3635   if (std::isnan(op)) {
   3636     return FPProcessNaN(op);
   3637   } else if (op < 0.0) {
   3638     FPProcessException();
   3639     return FPDefaultNaN<T>();
   3640   } else {
   3641     return sqrt(op);
   3642   }
   3643 }
   3644 
   3645 
   3646 template <typename T>
   3647 T Simulator::FPMax(T a, T b) {
   3648   T result = FPProcessNaNs(a, b);
   3649   if (std::isnan(result)) return result;
   3650 
   3651   if ((a == 0.0) && (b == 0.0) &&
   3652       (copysign(1.0, a) != copysign(1.0, b))) {
   3653     // a and b are zero, and the sign differs: return +0.0.
   3654     return 0.0;
   3655   } else {
   3656     return (a > b) ? a : b;
   3657   }
   3658 }
   3659 
   3660 
   3661 template <typename T>
   3662 T Simulator::FPMaxNM(T a, T b) {
   3663   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3664     a = kFP64NegativeInfinity;
   3665   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3666     b = kFP64NegativeInfinity;
   3667   }
   3668 
   3669   T result = FPProcessNaNs(a, b);
   3670   return std::isnan(result) ? result : FPMax(a, b);
   3671 }
   3672 
   3673 
   3674 template <typename T>
   3675 T Simulator::FPMin(T a, T b) {
   3676   T result = FPProcessNaNs(a, b);
   3677   if (std::isnan(result)) return result;
   3678 
   3679   if ((a == 0.0) && (b == 0.0) &&
   3680       (copysign(1.0, a) != copysign(1.0, b))) {
   3681     // a and b are zero, and the sign differs: return -0.0.
   3682     return -0.0;
   3683   } else {
   3684     return (a < b) ? a : b;
   3685   }
   3686 }
   3687 
   3688 
   3689 template <typename T>
   3690 T Simulator::FPMinNM(T a, T b) {
   3691   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3692     a = kFP64PositiveInfinity;
   3693   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3694     b = kFP64PositiveInfinity;
   3695   }
   3696 
   3697   T result = FPProcessNaNs(a, b);
   3698   return std::isnan(result) ? result : FPMin(a, b);
   3699 }
   3700 
   3701 
   3702 template <typename T>
   3703 T Simulator::FPRecipStepFused(T op1, T op2) {
   3704   const T two = 2.0;
   3705   if ((std::isinf(op1) && (op2 == 0.0))
   3706       || ((op1 == 0.0) && (std::isinf(op2)))) {
   3707     return two;
   3708   } else if (std::isinf(op1) || std::isinf(op2)) {
   3709     // Return +inf if signs match, otherwise -inf.
   3710     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3711                                           : kFP64NegativeInfinity;
   3712   } else {
   3713     return FusedMultiplyAdd(op1, op2, two);
   3714   }
   3715 }
   3716 
   3717 
   3718 template <typename T>
   3719 T Simulator::FPRSqrtStepFused(T op1, T op2) {
   3720   const T one_point_five = 1.5;
   3721   const T two = 2.0;
   3722 
   3723   if ((std::isinf(op1) && (op2 == 0.0))
   3724       || ((op1 == 0.0) && (std::isinf(op2)))) {
   3725     return one_point_five;
   3726   } else if (std::isinf(op1) || std::isinf(op2)) {
   3727     // Return +inf if signs match, otherwise -inf.
   3728     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3729                                           : kFP64NegativeInfinity;
   3730   } else {
   3731     // The multiply-add-halve operation must be fully fused, so avoid interim
   3732     // rounding by checking which operand can be losslessly divided by two
   3733     // before doing the multiply-add.
   3734     if (std::isnormal(op1 / two)) {
   3735       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
   3736     } else if (std::isnormal(op2 / two)) {
   3737       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
   3738     } else {
   3739       // Neither operand is normal after halving: the result is dominated by
   3740       // the addition term, so just return that.
   3741       return one_point_five;
   3742     }
   3743   }
   3744 }
   3745 
   3746 
   3747 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
   3748   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   3749       (value == kFP64NegativeInfinity)) {
   3750     return value;
   3751   } else if (std::isnan(value)) {
   3752     return FPProcessNaN(value);
   3753   }
   3754 
   3755   double int_result = std::floor(value);
   3756   double error = value - int_result;
   3757   switch (round_mode) {
   3758     case FPTieAway: {
   3759       // Take care of correctly handling the range ]-0.5, -0.0], which must
   3760       // yield -0.0.
   3761       if ((-0.5 < value) && (value < 0.0)) {
   3762         int_result = -0.0;
   3763 
   3764       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
   3765         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3766         // result is positive, round up.
   3767         int_result++;
   3768       }
   3769       break;
   3770     }
   3771     case FPTieEven: {
   3772       // Take care of correctly handling the range [-0.5, -0.0], which must
   3773       // yield -0.0.
   3774       if ((-0.5 <= value) && (value < 0.0)) {
   3775         int_result = -0.0;
   3776 
   3777       // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3778       // result is odd, round up.
   3779       } else if ((error > 0.5) ||
   3780           ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
   3781         int_result++;
   3782       }
   3783       break;
   3784     }
   3785     case FPZero: {
   3786       // If value>0 then we take floor(value)
   3787       // otherwise, ceil(value).
   3788       if (value < 0) {
   3789          int_result = ceil(value);
   3790       }
   3791       break;
   3792     }
   3793     case FPNegativeInfinity: {
   3794       // We always use floor(value).
   3795       break;
   3796     }
   3797     case FPPositiveInfinity: {
   3798       // Take care of correctly handling the range ]-1.0, -0.0], which must
   3799       // yield -0.0.
   3800       if ((-1.0 < value) && (value < 0.0)) {
   3801         int_result = -0.0;
   3802 
   3803       // If the error is non-zero, round up.
   3804       } else if (error > 0.0) {
   3805         int_result++;
   3806       }
   3807       break;
   3808     }
   3809     default: VIXL_UNIMPLEMENTED();
   3810   }
   3811   return int_result;
   3812 }
   3813 
   3814 
   3815 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
   3816   value = FPRoundInt(value, rmode);
   3817   if (value >= kWMaxInt) {
   3818     return kWMaxInt;
   3819   } else if (value < kWMinInt) {
   3820     return kWMinInt;
   3821   }
   3822   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
   3823 }
   3824 
   3825 
   3826 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
   3827   value = FPRoundInt(value, rmode);
   3828   if (value >= kXMaxInt) {
   3829     return kXMaxInt;
   3830   } else if (value < kXMinInt) {
   3831     return kXMinInt;
   3832   }
   3833   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
   3834 }
   3835 
   3836 
   3837 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
   3838   value = FPRoundInt(value, rmode);
   3839   if (value >= kWMaxUInt) {
   3840     return kWMaxUInt;
   3841   } else if (value < 0.0) {
   3842     return 0;
   3843   }
   3844   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
   3845 }
   3846 
   3847 
   3848 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
   3849   value = FPRoundInt(value, rmode);
   3850   if (value >= kXMaxUInt) {
   3851     return kXMaxUInt;
   3852   } else if (value < 0.0) {
   3853     return 0;
   3854   }
   3855   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
   3856 }
   3857 
   3858 
   3859 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
   3860 template <typename T>                                            \
   3861 LogicVRegister Simulator::FN(VectorFormat vform,                 \
   3862                              LogicVRegister dst,                 \
   3863                              const LogicVRegister& src1,         \
   3864                              const LogicVRegister& src2) {       \
   3865   dst.ClearForWrite(vform);                                      \
   3866   for (int i = 0; i < LaneCountFromFormat(vform); i++) {         \
   3867     T op1 = src1.Float<T>(i);                                    \
   3868     T op2 = src2.Float<T>(i);                                    \
   3869     T result;                                                    \
   3870     if (PROCNAN) {                                               \
   3871       result = FPProcessNaNs(op1, op2);                          \
   3872       if (!std::isnan(result)) {                                      \
   3873         result = OP(op1, op2);                                   \
   3874       }                                                          \
   3875     } else {                                                     \
   3876       result = OP(op1, op2);                                     \
   3877     }                                                            \
   3878     dst.SetFloat(i, result);                                     \
   3879   }                                                              \
   3880   return dst;                                                    \
   3881 }                                                                \
   3882                                                                  \
   3883 LogicVRegister Simulator::FN(VectorFormat vform,                 \
   3884                              LogicVRegister dst,                 \
   3885                              const LogicVRegister& src1,         \
   3886                              const LogicVRegister& src2) {       \
   3887   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {            \
   3888     FN<float>(vform, dst, src1, src2);                           \
   3889   } else {                                                       \
   3890     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);   \
   3891     FN<double>(vform, dst, src1, src2);                          \
   3892   }                                                              \
   3893   return dst;                                                    \
   3894 }
   3895 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
   3896 #undef DEFINE_NEON_FP_VECTOR_OP
   3897 
   3898 
   3899 LogicVRegister Simulator::fnmul(VectorFormat vform,
   3900                                 LogicVRegister dst,
   3901                                 const LogicVRegister& src1,
   3902                                 const LogicVRegister& src2) {
   3903   SimVRegister temp;
   3904   LogicVRegister product = fmul(vform, temp, src1, src2);
   3905   return fneg(vform, dst, product);
   3906 }
   3907 
   3908 
   3909 template <typename T>
   3910 LogicVRegister Simulator::frecps(VectorFormat vform,
   3911                                  LogicVRegister dst,
   3912                                  const LogicVRegister& src1,
   3913                                  const LogicVRegister& src2) {
   3914   dst.ClearForWrite(vform);
   3915   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3916     T op1 = -src1.Float<T>(i);
   3917     T op2 = src2.Float<T>(i);
   3918     T result = FPProcessNaNs(op1, op2);
   3919     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
   3920   }
   3921   return dst;
   3922 }
   3923 
   3924 
   3925 LogicVRegister Simulator::frecps(VectorFormat vform,
   3926                                  LogicVRegister dst,
   3927                                  const LogicVRegister& src1,
   3928                                  const LogicVRegister& src2) {
   3929   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   3930     frecps<float>(vform, dst, src1, src2);
   3931   } else {
   3932     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   3933     frecps<double>(vform, dst, src1, src2);
   3934   }
   3935   return dst;
   3936 }
   3937 
   3938 
   3939 template <typename T>
   3940 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   3941                                   LogicVRegister dst,
   3942                                   const LogicVRegister& src1,
   3943                                   const LogicVRegister& src2) {
   3944   dst.ClearForWrite(vform);
   3945   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3946     T op1 = -src1.Float<T>(i);
   3947     T op2 = src2.Float<T>(i);
   3948     T result = FPProcessNaNs(op1, op2);
   3949     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
   3950   }
   3951   return dst;
   3952 }
   3953 
   3954 
   3955 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   3956                                   LogicVRegister dst,
   3957                                   const LogicVRegister& src1,
   3958                                   const LogicVRegister& src2) {
   3959   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   3960     frsqrts<float>(vform, dst, src1, src2);
   3961   } else {
   3962     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   3963     frsqrts<double>(vform, dst, src1, src2);
   3964   }
   3965   return dst;
   3966 }
   3967 
   3968 
   3969 template <typename T>
   3970 LogicVRegister Simulator::fcmp(VectorFormat vform,
   3971                                LogicVRegister dst,
   3972                                const LogicVRegister& src1,
   3973                                const LogicVRegister& src2,
   3974                                Condition cond) {
   3975   dst.ClearForWrite(vform);
   3976   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3977     bool result = false;
   3978     T op1 = src1.Float<T>(i);
   3979     T op2 = src2.Float<T>(i);
   3980     T nan_result = FPProcessNaNs(op1, op2);
   3981     if (!std::isnan(nan_result)) {
   3982       switch (cond) {
   3983         case eq: result = (op1 == op2); break;
   3984         case ge: result = (op1 >= op2); break;
   3985         case gt: result = (op1 > op2) ; break;
   3986         case le: result = (op1 <= op2); break;
   3987         case lt: result = (op1 < op2) ; break;
   3988         default: VIXL_UNREACHABLE(); break;
   3989       }
   3990     }
   3991     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   3992   }
   3993   return dst;
   3994 }
   3995 
   3996 
   3997 LogicVRegister Simulator::fcmp(VectorFormat vform,
   3998                                LogicVRegister dst,
   3999                                const LogicVRegister& src1,
   4000                                const LogicVRegister& src2,
   4001                                Condition cond) {
   4002   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4003     fcmp<float>(vform, dst, src1, src2, cond);
   4004   } else {
   4005     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4006     fcmp<double>(vform, dst, src1, src2, cond);
   4007   }
   4008   return dst;
   4009 }
   4010 
   4011 
   4012 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
   4013                                     LogicVRegister dst,
   4014                                     const LogicVRegister& src,
   4015                                     Condition cond) {
   4016   SimVRegister temp;
   4017   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4018     LogicVRegister zero_reg = dup_immediate(vform, temp, float_to_rawbits(0.0));
   4019     fcmp<float>(vform, dst, src, zero_reg, cond);
   4020   } else {
   4021     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4022     LogicVRegister zero_reg = dup_immediate(vform, temp,
   4023                                             double_to_rawbits(0.0));
   4024     fcmp<double>(vform, dst, src, zero_reg, cond);
   4025   }
   4026   return dst;
   4027 }
   4028 
   4029 
   4030 LogicVRegister Simulator::fabscmp(VectorFormat vform,
   4031                                   LogicVRegister dst,
   4032                                   const LogicVRegister& src1,
   4033                                   const LogicVRegister& src2,
   4034                                   Condition cond) {
   4035   SimVRegister temp1, temp2;
   4036   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4037     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
   4038     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
   4039     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
   4040   } else {
   4041     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4042     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
   4043     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
   4044     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
   4045   }
   4046   return dst;
   4047 }
   4048 
   4049 
   4050 template <typename T>
   4051 LogicVRegister Simulator::fmla(VectorFormat vform,
   4052                                LogicVRegister dst,
   4053                                const LogicVRegister& src1,
   4054                                const LogicVRegister& src2) {
   4055   dst.ClearForWrite(vform);
   4056   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4057     T op1 = src1.Float<T>(i);
   4058     T op2 = src2.Float<T>(i);
   4059     T acc = dst.Float<T>(i);
   4060     T result = FPMulAdd(acc, op1, op2);
   4061     dst.SetFloat(i, result);
   4062   }
   4063   return dst;
   4064 }
   4065 
   4066 
   4067 LogicVRegister Simulator::fmla(VectorFormat vform,
   4068                                LogicVRegister dst,
   4069                                const LogicVRegister& src1,
   4070                                const LogicVRegister& src2) {
   4071   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4072     fmla<float>(vform, dst, src1, src2);
   4073   } else {
   4074     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4075     fmla<double>(vform, dst, src1, src2);
   4076   }
   4077   return dst;
   4078 }
   4079 
   4080 
   4081 template <typename T>
   4082 LogicVRegister Simulator::fmls(VectorFormat vform,
   4083                                LogicVRegister dst,
   4084                                const LogicVRegister& src1,
   4085                                const LogicVRegister& src2) {
   4086   dst.ClearForWrite(vform);
   4087   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4088     T op1 = -src1.Float<T>(i);
   4089     T op2 = src2.Float<T>(i);
   4090     T acc = dst.Float<T>(i);
   4091     T result = FPMulAdd(acc, op1, op2);
   4092     dst.SetFloat(i, result);
   4093   }
   4094   return dst;
   4095 }
   4096 
   4097 
   4098 LogicVRegister Simulator::fmls(VectorFormat vform,
   4099                                LogicVRegister dst,
   4100                                const LogicVRegister& src1,
   4101                                const LogicVRegister& src2) {
   4102   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4103     fmls<float>(vform, dst, src1, src2);
   4104   } else {
   4105     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4106     fmls<double>(vform, dst, src1, src2);
   4107   }
   4108   return dst;
   4109 }
   4110 
   4111 
   4112 template <typename T>
   4113 LogicVRegister Simulator::fneg(VectorFormat vform,
   4114                                LogicVRegister dst,
   4115                                const LogicVRegister& src) {
   4116   dst.ClearForWrite(vform);
   4117   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4118     T op = src.Float<T>(i);
   4119     op = -op;
   4120     dst.SetFloat(i, op);
   4121   }
   4122   return dst;
   4123 }
   4124 
   4125 
   4126 LogicVRegister Simulator::fneg(VectorFormat vform,
   4127                                LogicVRegister dst,
   4128                                const LogicVRegister& src) {
   4129   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4130     fneg<float>(vform, dst, src);
   4131   } else {
   4132     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4133     fneg<double>(vform, dst, src);
   4134   }
   4135   return dst;
   4136 }
   4137 
   4138 
   4139 template <typename T>
   4140 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4141                                 LogicVRegister dst,
   4142                                 const LogicVRegister& src) {
   4143   dst.ClearForWrite(vform);
   4144   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4145     T op = src.Float<T>(i);
   4146     if (copysign(1.0, op) < 0.0) {
   4147       op = -op;
   4148     }
   4149     dst.SetFloat(i, op);
   4150   }
   4151   return dst;
   4152 }
   4153 
   4154 
   4155 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4156                                 LogicVRegister dst,
   4157                                 const LogicVRegister& src) {
   4158   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4159     fabs_<float>(vform, dst, src);
   4160   } else {
   4161     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4162     fabs_<double>(vform, dst, src);
   4163   }
   4164   return dst;
   4165 }
   4166 
   4167 
   4168 LogicVRegister Simulator::fabd(VectorFormat vform,
   4169                                LogicVRegister dst,
   4170                                const LogicVRegister& src1,
   4171                                const LogicVRegister& src2) {
   4172   SimVRegister temp;
   4173   fsub(vform, temp, src1, src2);
   4174   fabs_(vform, dst, temp);
   4175   return dst;
   4176 }
   4177 
   4178 
   4179 LogicVRegister Simulator::fsqrt(VectorFormat vform,
   4180                                 LogicVRegister dst,
   4181                                 const LogicVRegister& src) {
   4182   dst.ClearForWrite(vform);
   4183   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4184     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4185       float result = FPSqrt(src.Float<float>(i));
   4186       dst.SetFloat(i, result);
   4187     }
   4188   } else {
   4189     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4190     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4191       double result = FPSqrt(src.Float<double>(i));
   4192       dst.SetFloat(i, result);
   4193     }
   4194   }
   4195   return dst;
   4196 }
   4197 
   4198 
   4199 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                          \
   4200 LogicVRegister Simulator::FNP(VectorFormat vform,                    \
   4201                               LogicVRegister dst,                    \
   4202                               const LogicVRegister& src1,            \
   4203                               const LogicVRegister& src2) {          \
   4204   SimVRegister temp1, temp2;                                         \
   4205   uzp1(vform, temp1, src1, src2);                                    \
   4206   uzp2(vform, temp2, src1, src2);                                    \
   4207   FN(vform, dst, temp1, temp2);                                      \
   4208   return dst;                                                        \
   4209 }                                                                    \
   4210                                                                      \
   4211 LogicVRegister Simulator::FNP(VectorFormat vform,                    \
   4212                               LogicVRegister dst,                    \
   4213                               const LogicVRegister& src) {           \
   4214   if (vform == kFormatS) {                                           \
   4215     float result = OP(src.Float<float>(0), src.Float<float>(1));     \
   4216     dst.SetFloat(0, result);                                         \
   4217   } else {                                                           \
   4218     VIXL_ASSERT(vform == kFormatD);                                  \
   4219     double result = OP(src.Float<double>(0), src.Float<double>(1));  \
   4220     dst.SetFloat(0, result);                                         \
   4221   }                                                                  \
   4222   dst.ClearForWrite(vform);                                          \
   4223   return dst;                                                        \
   4224 }
   4225 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
   4226 #undef DEFINE_NEON_FP_PAIR_OP
   4227 
   4228 
   4229 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
   4230                                    LogicVRegister dst,
   4231                                    const LogicVRegister& src,
   4232                                    FPMinMaxOp Op) {
   4233   VIXL_ASSERT(vform == kFormat4S);
   4234   USE(vform);
   4235   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
   4236   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
   4237   float result = (this->*Op)(result1, result2);
   4238   dst.ClearForWrite(kFormatS);
   4239   dst.SetFloat<float>(0, result);
   4240   return dst;
   4241 }
   4242 
   4243 
   4244 LogicVRegister Simulator::fmaxv(VectorFormat vform,
   4245                                 LogicVRegister dst,
   4246                                 const LogicVRegister& src) {
   4247   return fminmaxv(vform, dst, src, &Simulator::FPMax);
   4248 }
   4249 
   4250 
   4251 LogicVRegister Simulator::fminv(VectorFormat vform,
   4252                                 LogicVRegister dst,
   4253                                 const LogicVRegister& src) {
   4254   return fminmaxv(vform, dst, src, &Simulator::FPMin);
   4255 }
   4256 
   4257 
   4258 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
   4259                                  LogicVRegister dst,
   4260                                  const LogicVRegister& src) {
   4261   return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
   4262 }
   4263 
   4264 
   4265 LogicVRegister Simulator::fminnmv(VectorFormat vform,
   4266                                   LogicVRegister dst,
   4267                                   const LogicVRegister& src) {
   4268   return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
   4269 }
   4270 
   4271 
   4272 LogicVRegister Simulator::fmul(VectorFormat vform,
   4273                                LogicVRegister dst,
   4274                                const LogicVRegister& src1,
   4275                                const LogicVRegister& src2,
   4276                                int index) {
   4277   dst.ClearForWrite(vform);
   4278   SimVRegister temp;
   4279   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4280     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4281     fmul<float>(vform, dst, src1, index_reg);
   4282 
   4283   } else {
   4284     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4285     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4286     fmul<double>(vform, dst, src1, index_reg);
   4287   }
   4288   return dst;
   4289 }
   4290 
   4291 
   4292 LogicVRegister Simulator::fmla(VectorFormat vform,
   4293                                LogicVRegister dst,
   4294                                const LogicVRegister& src1,
   4295                                const LogicVRegister& src2,
   4296                                int index) {
   4297   dst.ClearForWrite(vform);
   4298   SimVRegister temp;
   4299   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4300     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4301     fmla<float>(vform, dst, src1, index_reg);
   4302 
   4303   } else {
   4304     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4305     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4306     fmla<double>(vform, dst, src1, index_reg);
   4307   }
   4308   return dst;
   4309 }
   4310 
   4311 
   4312 LogicVRegister Simulator::fmls(VectorFormat vform,
   4313                                LogicVRegister dst,
   4314                                const LogicVRegister& src1,
   4315                                const LogicVRegister& src2,
   4316                                int index) {
   4317   dst.ClearForWrite(vform);
   4318   SimVRegister temp;
   4319   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4320     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4321     fmls<float>(vform, dst, src1, index_reg);
   4322 
   4323   } else {
   4324     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4325     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4326     fmls<double>(vform, dst, src1, index_reg);
   4327   }
   4328   return dst;
   4329 }
   4330 
   4331 
   4332 LogicVRegister Simulator::fmulx(VectorFormat vform,
   4333                                 LogicVRegister dst,
   4334                                 const LogicVRegister& src1,
   4335                                 const LogicVRegister& src2,
   4336                                 int index) {
   4337   dst.ClearForWrite(vform);
   4338   SimVRegister temp;
   4339   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4340     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4341     fmulx<float>(vform, dst, src1, index_reg);
   4342 
   4343   } else {
   4344     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4345     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4346     fmulx<double>(vform, dst, src1, index_reg);
   4347   }
   4348   return dst;
   4349 }
   4350 
   4351 
   4352 LogicVRegister Simulator::frint(VectorFormat vform,
   4353                                 LogicVRegister dst,
   4354                                 const LogicVRegister& src,
   4355                                 FPRounding rounding_mode,
   4356                                 bool inexact_exception) {
   4357   dst.ClearForWrite(vform);
   4358   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4359     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4360       float input = src.Float<float>(i);
   4361       float rounded = FPRoundInt(input, rounding_mode);
   4362       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4363         FPProcessException();
   4364       }
   4365       dst.SetFloat<float>(i, rounded);
   4366     }
   4367   } else {
   4368     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4369     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4370       double input = src.Float<double>(i);
   4371       double rounded = FPRoundInt(input, rounding_mode);
   4372       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4373         FPProcessException();
   4374       }
   4375       dst.SetFloat<double>(i, rounded);
   4376     }
   4377   }
   4378   return dst;
   4379 }
   4380 
   4381 
   4382 LogicVRegister Simulator::fcvts(VectorFormat vform,
   4383                                 LogicVRegister dst,
   4384                                 const LogicVRegister& src,
   4385                                 FPRounding rounding_mode,
   4386                                 int fbits) {
   4387   dst.ClearForWrite(vform);
   4388   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4389     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4390       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4391       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
   4392     }
   4393   } else {
   4394     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4395     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4396       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4397       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
   4398     }
   4399   }
   4400   return dst;
   4401 }
   4402 
   4403 
   4404 LogicVRegister Simulator::fcvtu(VectorFormat vform,
   4405                                 LogicVRegister dst,
   4406                                 const LogicVRegister& src,
   4407                                 FPRounding rounding_mode,
   4408                                 int fbits) {
   4409   dst.ClearForWrite(vform);
   4410   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4411     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4412       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4413       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
   4414     }
   4415   } else {
   4416     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4417     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4418       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4419       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
   4420     }
   4421   }
   4422   return dst;
   4423 }
   4424 
   4425 
   4426 LogicVRegister Simulator::fcvtl(VectorFormat vform,
   4427                                 LogicVRegister dst,
   4428                                 const LogicVRegister& src) {
   4429   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4430     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4431       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
   4432     }
   4433   } else {
   4434     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4435     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4436       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
   4437     }
   4438   }
   4439   return dst;
   4440 }
   4441 
   4442 
   4443 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
   4444                                  LogicVRegister dst,
   4445                                  const LogicVRegister& src) {
   4446   int lane_count = LaneCountFromFormat(vform);
   4447   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4448     for (int i = 0; i < lane_count; i++) {
   4449       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
   4450     }
   4451   } else {
   4452     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4453     for (int i = 0; i < lane_count; i++) {
   4454       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
   4455     }
   4456   }
   4457   return dst;
   4458 }
   4459 
   4460 
   4461 LogicVRegister Simulator::fcvtn(VectorFormat vform,
   4462                                 LogicVRegister dst,
   4463                                 const LogicVRegister& src) {
   4464   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4465     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4466       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
   4467     }
   4468   } else {
   4469     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4470     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4471       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
   4472     }
   4473   }
   4474   return dst;
   4475 }
   4476 
   4477 
   4478 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
   4479                                  LogicVRegister dst,
   4480                                  const LogicVRegister& src) {
   4481   int lane_count = LaneCountFromFormat(vform) / 2;
   4482   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4483     for (int i = lane_count - 1; i >= 0; i--) {
   4484       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
   4485     }
   4486   } else {
   4487     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4488     for (int i = lane_count - 1; i >= 0; i--) {
   4489       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
   4490     }
   4491   }
   4492   return dst;
   4493 }
   4494 
   4495 
   4496 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
   4497                                  LogicVRegister dst,
   4498                                  const LogicVRegister& src) {
   4499   dst.ClearForWrite(vform);
   4500   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4501   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4502     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4503   }
   4504   return dst;
   4505 }
   4506 
   4507 
   4508 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
   4509                                   LogicVRegister dst,
   4510                                   const LogicVRegister& src) {
   4511   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4512   int lane_count = LaneCountFromFormat(vform) / 2;
   4513   for (int i = lane_count - 1; i >= 0; i--) {
   4514     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4515   }
   4516   return dst;
   4517 }
   4518 
   4519 
   4520 // Based on reference C function recip_sqrt_estimate from ARM ARM.
   4521 double Simulator::recip_sqrt_estimate(double a) {
   4522   int q0, q1, s;
   4523   double r;
   4524   if (a < 0.5) {
   4525     q0 = static_cast<int>(a * 512.0);
   4526     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
   4527   } else  {
   4528     q1 = static_cast<int>(a * 256.0);
   4529     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
   4530   }
   4531   s = static_cast<int>(256.0 * r + 0.5);
   4532   return static_cast<double>(s) / 256.0;
   4533 }
   4534 
   4535 
   4536 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
   4537   return unsigned_bitextract_64(start_bit, end_bit, val);
   4538 }
   4539 
   4540 
   4541 template <typename T>
   4542 T Simulator::FPRecipSqrtEstimate(T op) {
   4543   if (std::isnan(op)) {
   4544     return FPProcessNaN(op);
   4545   } else if (op == 0.0) {
   4546     if (copysign(1.0, op) < 0.0) {
   4547       return kFP64NegativeInfinity;
   4548     } else {
   4549       return kFP64PositiveInfinity;
   4550     }
   4551   } else if (copysign(1.0, op) < 0.0) {
   4552     FPProcessException();
   4553     return FPDefaultNaN<T>();
   4554   } else if (std::isinf(op)) {
   4555     return 0.0;
   4556   } else {
   4557     uint64_t fraction;
   4558     int exp, result_exp;
   4559 
   4560     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4561       exp = float_exp(op);
   4562       fraction = float_mantissa(op);
   4563       fraction <<= 29;
   4564     } else {
   4565       exp = double_exp(op);
   4566       fraction = double_mantissa(op);
   4567     }
   4568 
   4569     if (exp == 0) {
   4570       while (Bits(fraction, 51, 51) == 0) {
   4571         fraction = Bits(fraction, 50, 0) << 1;
   4572         exp -= 1;
   4573       }
   4574       fraction = Bits(fraction, 50, 0) << 1;
   4575     }
   4576 
   4577     double scaled;
   4578     if (Bits(exp, 0, 0) == 0) {
   4579       scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   4580     } else {
   4581       scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
   4582     }
   4583 
   4584     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4585       result_exp = (380 - exp) / 2;
   4586     } else {
   4587       result_exp = (3068 - exp) / 2;
   4588     }
   4589 
   4590     double estimate = recip_sqrt_estimate(scaled);
   4591 
   4592     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4593       return float_pack(0, Bits(result_exp, 7, 0),
   4594           Bits(double_to_rawbits(estimate), 51, 29));
   4595     } else {
   4596       return double_pack(0, Bits(result_exp, 10, 0),
   4597           Bits(double_to_rawbits(estimate), 51, 0));
   4598     }
   4599   }
   4600 }
   4601 
   4602 
   4603 LogicVRegister Simulator::frsqrte(VectorFormat vform,
   4604                                   LogicVRegister dst,
   4605                                   const LogicVRegister& src) {
   4606   dst.ClearForWrite(vform);
   4607   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4608     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4609       float input = src.Float<float>(i);
   4610       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
   4611     }
   4612   } else {
   4613     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4614     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4615       double input = src.Float<double>(i);
   4616       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
   4617     }
   4618   }
   4619   return dst;
   4620 }
   4621 
   4622 template <typename T>
   4623 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   4624   uint32_t sign;
   4625 
   4626   if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4627     sign = float_sign(op);
   4628   } else {
   4629     sign = double_sign(op);
   4630   }
   4631 
   4632   if (std::isnan(op)) {
   4633     return FPProcessNaN(op);
   4634   } else if (std::isinf(op)) {
   4635     return (sign == 1) ? -0.0 : 0.0;
   4636   } else if (op == 0.0) {
   4637     FPProcessException();  // FPExc_DivideByZero exception.
   4638     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4639   } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
   4640               (std::fabs(op) < std::pow(2.0, -128.0))) ||
   4641              ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
   4642               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
   4643     bool overflow_to_inf = false;
   4644     switch (rounding) {
   4645       case FPTieEven: overflow_to_inf = true; break;
   4646       case FPPositiveInfinity: overflow_to_inf = (sign == 0); break;
   4647       case FPNegativeInfinity: overflow_to_inf = (sign == 1); break;
   4648       case FPZero: overflow_to_inf = false; break;
   4649       default: break;
   4650     }
   4651     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
   4652     if (overflow_to_inf) {
   4653       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4654     } else {
   4655       // Return FPMaxNormal(sign).
   4656       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4657         return float_pack(sign, 0xfe, 0x07fffff);
   4658       } else {
   4659         return double_pack(sign, 0x7fe, 0x0fffffffffffffl);
   4660       }
   4661     }
   4662   } else {
   4663     uint64_t fraction;
   4664     int exp, result_exp;
   4665     uint32_t sign;
   4666 
   4667     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4668       sign = float_sign(op);
   4669       exp = float_exp(op);
   4670       fraction = float_mantissa(op);
   4671       fraction <<= 29;
   4672     } else {
   4673       sign = double_sign(op);
   4674       exp = double_exp(op);
   4675       fraction = double_mantissa(op);
   4676     }
   4677 
   4678     if (exp == 0) {
   4679       if (Bits(fraction, 51, 51) == 0) {
   4680         exp -= 1;
   4681         fraction = Bits(fraction, 49, 0) << 2;
   4682       } else {
   4683         fraction = Bits(fraction, 50, 0) << 1;
   4684       }
   4685     }
   4686 
   4687     double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
   4688 
   4689     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4690       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
   4691     } else {
   4692       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
   4693     }
   4694 
   4695     double estimate = recip_estimate(scaled);
   4696 
   4697     fraction = double_mantissa(estimate);
   4698     if (result_exp == 0) {
   4699       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
   4700     } else if (result_exp == -1) {
   4701       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
   4702       result_exp = 0;
   4703     }
   4704     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4705       return float_pack(sign, Bits(result_exp, 7, 0), Bits(fraction, 51, 29));
   4706     } else {
   4707       return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
   4708     }
   4709   }
   4710 }
   4711 
   4712 
   4713 LogicVRegister Simulator::frecpe(VectorFormat vform,
   4714                                  LogicVRegister dst,
   4715                                  const LogicVRegister& src,
   4716                                  FPRounding round) {
   4717   dst.ClearForWrite(vform);
   4718   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4719     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4720       float input = src.Float<float>(i);
   4721       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
   4722     }
   4723   } else {
   4724     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4725     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4726       double input = src.Float<double>(i);
   4727       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
   4728     }
   4729   }
   4730   return dst;
   4731 }
   4732 
   4733 
   4734 LogicVRegister Simulator::ursqrte(VectorFormat vform,
   4735                                   LogicVRegister dst,
   4736                                   const LogicVRegister& src) {
   4737   dst.ClearForWrite(vform);
   4738   uint32_t operand, result;
   4739   double dp_operand, dp_result;
   4740   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4741     operand = src.Uint(vform, i);
   4742     if (operand <= 0x3FFFFFFF) {
   4743       result = 0xFFFFFFFF;
   4744     } else {
   4745       dp_operand = operand * std::pow(2.0, -32);
   4746       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
   4747       result = static_cast<uint32_t>(dp_result);
   4748     }
   4749     dst.SetUint(vform, i, result);
   4750   }
   4751   return dst;
   4752 }
   4753 
   4754 
   4755 // Based on reference C function recip_estimate from ARM ARM.
   4756 double Simulator::recip_estimate(double a) {
   4757   int q, s;
   4758   double r;
   4759   q = static_cast<int>(a * 512.0);
   4760   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
   4761   s = static_cast<int>(256.0 * r + 0.5);
   4762   return static_cast<double>(s) / 256.0;
   4763 }
   4764 
   4765 
   4766 LogicVRegister Simulator::urecpe(VectorFormat vform,
   4767                                  LogicVRegister dst,
   4768                                  const LogicVRegister& src) {
   4769   dst.ClearForWrite(vform);
   4770   uint32_t operand, result;
   4771   double dp_operand, dp_result;
   4772   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4773     operand = src.Uint(vform, i);
   4774     if (operand <= 0x7FFFFFFF) {
   4775       result = 0xFFFFFFFF;
   4776     } else {
   4777       dp_operand = operand * std::pow(2.0, -32);
   4778       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
   4779       result = static_cast<uint32_t>(dp_result);
   4780     }
   4781     dst.SetUint(vform, i, result);
   4782   }
   4783   return dst;
   4784 }
   4785 
   4786 template <typename T>
   4787 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4788                                  LogicVRegister dst,
   4789                                  const LogicVRegister& src) {
   4790   dst.ClearForWrite(vform);
   4791   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4792     T op = src.Float<T>(i);
   4793     T result;
   4794     if (std::isnan(op)) {
   4795        result = FPProcessNaN(op);
   4796     } else {
   4797       int exp;
   4798       uint32_t sign;
   4799       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4800         sign = float_sign(op);
   4801         exp = float_exp(op);
   4802         exp = (exp == 0) ? (0xFF - 1) : Bits(~exp, 7, 0);
   4803         result = float_pack(sign, exp, 0);
   4804       } else {
   4805         sign = double_sign(op);
   4806         exp = double_exp(op);
   4807         exp = (exp == 0) ? (0x7FF - 1) : Bits(~exp, 10, 0);
   4808         result = double_pack(sign, exp, 0);
   4809       }
   4810     }
   4811     dst.SetFloat(i, result);
   4812   }
   4813   return dst;
   4814 }
   4815 
   4816 
   4817 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4818                                  LogicVRegister dst,
   4819                                  const LogicVRegister& src) {
   4820   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4821     frecpx<float>(vform, dst, src);
   4822   } else {
   4823     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4824     frecpx<double>(vform, dst, src);
   4825   }
   4826   return dst;
   4827 }
   4828 
   4829 LogicVRegister Simulator::scvtf(VectorFormat vform,
   4830                                 LogicVRegister dst,
   4831                                 const LogicVRegister& src,
   4832                                 int fbits,
   4833                                 FPRounding round) {
   4834   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4835     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4836       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
   4837       dst.SetFloat<float>(i, result);
   4838     } else {
   4839       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4840       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
   4841       dst.SetFloat<double>(i, result);
   4842     }
   4843   }
   4844   return dst;
   4845 }
   4846 
   4847 
   4848 LogicVRegister Simulator::ucvtf(VectorFormat vform,
   4849                                 LogicVRegister dst,
   4850                                 const LogicVRegister& src,
   4851                                 int fbits,
   4852                                 FPRounding round) {
   4853   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4854     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4855       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
   4856       dst.SetFloat<float>(i, result);
   4857     } else {
   4858       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4859       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
   4860       dst.SetFloat<double>(i, result);
   4861     }
   4862   }
   4863   return dst;
   4864 }
   4865 
   4866 
   4867 }  // namespace vixl
   4868