Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     28 
     29 #include <cmath>
     30 
     31 #include "simulator-aarch64.h"
     32 
     33 namespace vixl {
     34 namespace aarch64 {
     35 
     36 template <>
     37 double Simulator::FPDefaultNaN<double>() {
     38   return kFP64DefaultNaN;
     39 }
     40 
     41 
     42 template <>
     43 float Simulator::FPDefaultNaN<float>() {
     44   return kFP32DefaultNaN;
     45 }
     46 
     47 // See FPRound for a description of this function.
     48 static inline double FPRoundToDouble(int64_t sign,
     49                                      int64_t exponent,
     50                                      uint64_t mantissa,
     51                                      FPRounding round_mode) {
     52   int64_t bits =
     53       FPRound<int64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign,
     54                                                                  exponent,
     55                                                                  mantissa,
     56                                                                  round_mode);
     57   return RawbitsToDouble(bits);
     58 }
     59 
     60 
     61 // See FPRound for a description of this function.
     62 static inline float FPRoundToFloat(int64_t sign,
     63                                    int64_t exponent,
     64                                    uint64_t mantissa,
     65                                    FPRounding round_mode) {
     66   int32_t bits =
     67       FPRound<int32_t, kFloatExponentBits, kFloatMantissaBits>(sign,
     68                                                                exponent,
     69                                                                mantissa,
     70                                                                round_mode);
     71   return RawbitsToFloat(bits);
     72 }
     73 
     74 
     75 // See FPRound for a description of this function.
     76 static inline float16 FPRoundToFloat16(int64_t sign,
     77                                        int64_t exponent,
     78                                        uint64_t mantissa,
     79                                        FPRounding round_mode) {
     80   return FPRound<float16,
     81                  kFloat16ExponentBits,
     82                  kFloat16MantissaBits>(sign, exponent, mantissa, round_mode);
     83 }
     84 
     85 
     86 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
     87   if (src >= 0) {
     88     return UFixedToDouble(src, fbits, round);
     89   } else if (src == INT64_MIN) {
     90     return -UFixedToDouble(src, fbits, round);
     91   } else {
     92     return -UFixedToDouble(-src, fbits, round);
     93   }
     94 }
     95 
     96 
     97 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
     98   // An input of 0 is a special case because the result is effectively
     99   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    100   if (src == 0) {
    101     return 0.0;
    102   }
    103 
    104   // Calculate the exponent. The highest significant bit will have the value
    105   // 2^exponent.
    106   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    107   const int64_t exponent = highest_significant_bit - fbits;
    108 
    109   return FPRoundToDouble(0, exponent, src, round);
    110 }
    111 
    112 
    113 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
    114   if (src >= 0) {
    115     return UFixedToFloat(src, fbits, round);
    116   } else if (src == INT64_MIN) {
    117     return -UFixedToFloat(src, fbits, round);
    118   } else {
    119     return -UFixedToFloat(-src, fbits, round);
    120   }
    121 }
    122 
    123 
    124 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
    125   // An input of 0 is a special case because the result is effectively
    126   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    127   if (src == 0) {
    128     return 0.0f;
    129   }
    130 
    131   // Calculate the exponent. The highest significant bit will have the value
    132   // 2^exponent.
    133   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    134   const int32_t exponent = highest_significant_bit - fbits;
    135 
    136   return FPRoundToFloat(0, exponent, src, round);
    137 }
    138 
    139 
    140 double Simulator::FPToDouble(float value) {
    141   switch (std::fpclassify(value)) {
    142     case FP_NAN: {
    143       if (IsSignallingNaN(value)) {
    144         FPProcessException();
    145       }
    146       if (ReadDN()) return kFP64DefaultNaN;
    147 
    148       // Convert NaNs as the processor would:
    149       //  - The sign is propagated.
    150       //  - The payload (mantissa) is transferred entirely, except that the top
    151       //    bit is forced to '1', making the result a quiet NaN. The unused
    152       //    (low-order) payload bits are set to 0.
    153       uint32_t raw = FloatToRawbits(value);
    154 
    155       uint64_t sign = raw >> 31;
    156       uint64_t exponent = (1 << 11) - 1;
    157       uint64_t payload = ExtractUnsignedBitfield64(21, 0, raw);
    158       payload <<= (52 - 23);           // The unused low-order bits should be 0.
    159       payload |= (UINT64_C(1) << 51);  // Force a quiet NaN.
    160 
    161       return RawbitsToDouble((sign << 63) | (exponent << 52) | payload);
    162     }
    163 
    164     case FP_ZERO:
    165     case FP_NORMAL:
    166     case FP_SUBNORMAL:
    167     case FP_INFINITE: {
    168       // All other inputs are preserved in a standard cast, because every value
    169       // representable using an IEEE-754 float is also representable using an
    170       // IEEE-754 double.
    171       return static_cast<double>(value);
    172     }
    173   }
    174 
    175   VIXL_UNREACHABLE();
    176   return static_cast<double>(value);
    177 }
    178 
    179 
    180 float Simulator::FPToFloat(float16 value) {
    181   uint32_t sign = value >> 15;
    182   uint32_t exponent =
    183       ExtractUnsignedBitfield32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
    184                                 kFloat16MantissaBits,
    185                                 value);
    186   uint32_t mantissa =
    187       ExtractUnsignedBitfield32(kFloat16MantissaBits - 1, 0, value);
    188 
    189   switch (Float16Classify(value)) {
    190     case FP_ZERO:
    191       return (sign == 0) ? 0.0f : -0.0f;
    192 
    193     case FP_INFINITE:
    194       return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
    195 
    196     case FP_SUBNORMAL: {
    197       // Calculate shift required to put mantissa into the most-significant bits
    198       // of the destination mantissa.
    199       int shift = CountLeadingZeros(mantissa << (32 - 10));
    200 
    201       // Shift mantissa and discard implicit '1'.
    202       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
    203       mantissa &= (1 << kFloatMantissaBits) - 1;
    204 
    205       // Adjust the exponent for the shift applied, and rebias.
    206       exponent = exponent - shift + (-15 + 127);
    207       break;
    208     }
    209 
    210     case FP_NAN:
    211       if (IsSignallingNaN(value)) {
    212         FPProcessException();
    213       }
    214       if (ReadDN()) return kFP32DefaultNaN;
    215 
    216       // Convert NaNs as the processor would:
    217       //  - The sign is propagated.
    218       //  - The payload (mantissa) is transferred entirely, except that the top
    219       //    bit is forced to '1', making the result a quiet NaN. The unused
    220       //    (low-order) payload bits are set to 0.
    221       exponent = (1 << kFloatExponentBits) - 1;
    222 
    223       // Increase bits in mantissa, making low-order bits 0.
    224       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    225       mantissa |= 1 << 22;  // Force a quiet NaN.
    226       break;
    227 
    228     case FP_NORMAL:
    229       // Increase bits in mantissa, making low-order bits 0.
    230       mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
    231 
    232       // Change exponent bias.
    233       exponent += (-15 + 127);
    234       break;
    235 
    236     default:
    237       VIXL_UNREACHABLE();
    238   }
    239   return RawbitsToFloat((sign << 31) | (exponent << kFloatMantissaBits) |
    240                         mantissa);
    241 }
    242 
    243 
    244 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
    245   // Only the FPTieEven rounding mode is implemented.
    246   VIXL_ASSERT(round_mode == FPTieEven);
    247   USE(round_mode);
    248 
    249   uint32_t raw = FloatToRawbits(value);
    250   int32_t sign = raw >> 31;
    251   int32_t exponent = ExtractUnsignedBitfield32(30, 23, raw) - 127;
    252   uint32_t mantissa = ExtractUnsignedBitfield32(22, 0, raw);
    253 
    254   switch (std::fpclassify(value)) {
    255     case FP_NAN: {
    256       if (IsSignallingNaN(value)) {
    257         FPProcessException();
    258       }
    259       if (ReadDN()) return kFP16DefaultNaN;
    260 
    261       // Convert NaNs as the processor would:
    262       //  - The sign is propagated.
    263       //  - The payload (mantissa) is transferred as much as possible, except
    264       //    that the top bit is forced to '1', making the result a quiet NaN.
    265       float16 result =
    266           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    267       result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
    268       result |= (1 << 9);  // Force a quiet NaN;
    269       return result;
    270     }
    271 
    272     case FP_ZERO:
    273       return (sign == 0) ? 0 : 0x8000;
    274 
    275     case FP_INFINITE:
    276       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    277 
    278     case FP_NORMAL:
    279     case FP_SUBNORMAL: {
    280       // Convert float-to-half as the processor would, assuming that FPCR.FZ
    281       // (flush-to-zero) is not set.
    282 
    283       // Add the implicit '1' bit to the mantissa.
    284       mantissa += (1 << 23);
    285       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    286     }
    287   }
    288 
    289   VIXL_UNREACHABLE();
    290   return 0;
    291 }
    292 
    293 
    294 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
    295   // Only the FPTieEven rounding mode is implemented.
    296   VIXL_ASSERT(round_mode == FPTieEven);
    297   USE(round_mode);
    298 
    299   uint64_t raw = DoubleToRawbits(value);
    300   int32_t sign = raw >> 63;
    301   int64_t exponent = ExtractUnsignedBitfield64(62, 52, raw) - 1023;
    302   uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
    303 
    304   switch (std::fpclassify(value)) {
    305     case FP_NAN: {
    306       if (IsSignallingNaN(value)) {
    307         FPProcessException();
    308       }
    309       if (ReadDN()) return kFP16DefaultNaN;
    310 
    311       // Convert NaNs as the processor would:
    312       //  - The sign is propagated.
    313       //  - The payload (mantissa) is transferred as much as possible, except
    314       //    that the top bit is forced to '1', making the result a quiet NaN.
    315       float16 result =
    316           (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    317       result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
    318       result |= (1 << 9);  // Force a quiet NaN;
    319       return result;
    320     }
    321 
    322     case FP_ZERO:
    323       return (sign == 0) ? 0 : 0x8000;
    324 
    325     case FP_INFINITE:
    326       return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
    327 
    328     case FP_NORMAL:
    329     case FP_SUBNORMAL: {
    330       // Convert double-to-half as the processor would, assuming that FPCR.FZ
    331       // (flush-to-zero) is not set.
    332 
    333       // Add the implicit '1' bit to the mantissa.
    334       mantissa += (UINT64_C(1) << 52);
    335       return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
    336     }
    337   }
    338 
    339   VIXL_UNREACHABLE();
    340   return 0;
    341 }
    342 
    343 
    344 float Simulator::FPToFloat(double value, FPRounding round_mode) {
    345   // Only the FPTieEven rounding mode is implemented.
    346   VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
    347   USE(round_mode);
    348 
    349   switch (std::fpclassify(value)) {
    350     case FP_NAN: {
    351       if (IsSignallingNaN(value)) {
    352         FPProcessException();
    353       }
    354       if (ReadDN()) return kFP32DefaultNaN;
    355 
    356       // Convert NaNs as the processor would:
    357       //  - The sign is propagated.
    358       //  - The payload (mantissa) is transferred as much as possible, except
    359       //    that the top bit is forced to '1', making the result a quiet NaN.
    360       uint64_t raw = DoubleToRawbits(value);
    361 
    362       uint32_t sign = raw >> 63;
    363       uint32_t exponent = (1 << 8) - 1;
    364       uint32_t payload =
    365           static_cast<uint32_t>(ExtractUnsignedBitfield64(50, 52 - 23, raw));
    366       payload |= (1 << 22);  // Force a quiet NaN.
    367 
    368       return RawbitsToFloat((sign << 31) | (exponent << 23) | payload);
    369     }
    370 
    371     case FP_ZERO:
    372     case FP_INFINITE: {
    373       // In a C++ cast, any value representable in the target type will be
    374       // unchanged. This is always the case for +/-0.0 and infinities.
    375       return static_cast<float>(value);
    376     }
    377 
    378     case FP_NORMAL:
    379     case FP_SUBNORMAL: {
    380       // Convert double-to-float as the processor would, assuming that FPCR.FZ
    381       // (flush-to-zero) is not set.
    382       uint64_t raw = DoubleToRawbits(value);
    383       // Extract the IEEE-754 double components.
    384       uint32_t sign = raw >> 63;
    385       // Extract the exponent and remove the IEEE-754 encoding bias.
    386       int32_t exponent =
    387           static_cast<int32_t>(ExtractUnsignedBitfield64(62, 52, raw)) - 1023;
    388       // Extract the mantissa and add the implicit '1' bit.
    389       uint64_t mantissa = ExtractUnsignedBitfield64(51, 0, raw);
    390       if (std::fpclassify(value) == FP_NORMAL) {
    391         mantissa |= (UINT64_C(1) << 52);
    392       }
    393       return FPRoundToFloat(sign, exponent, mantissa, round_mode);
    394     }
    395   }
    396 
    397   VIXL_UNREACHABLE();
    398   return value;
    399 }
    400 
    401 
    402 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    403   dst.ClearForWrite(vform);
    404   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    405     dst.ReadUintFromMem(vform, i, addr);
    406     addr += LaneSizeInBytesFromFormat(vform);
    407   }
    408 }
    409 
    410 
    411 void Simulator::ld1(VectorFormat vform,
    412                     LogicVRegister dst,
    413                     int index,
    414                     uint64_t addr) {
    415   dst.ReadUintFromMem(vform, index, addr);
    416 }
    417 
    418 
    419 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    420   dst.ClearForWrite(vform);
    421   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    422     dst.ReadUintFromMem(vform, i, addr);
    423   }
    424 }
    425 
    426 
    427 void Simulator::ld2(VectorFormat vform,
    428                     LogicVRegister dst1,
    429                     LogicVRegister dst2,
    430                     uint64_t addr1) {
    431   dst1.ClearForWrite(vform);
    432   dst2.ClearForWrite(vform);
    433   int esize = LaneSizeInBytesFromFormat(vform);
    434   uint64_t addr2 = addr1 + esize;
    435   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    436     dst1.ReadUintFromMem(vform, i, addr1);
    437     dst2.ReadUintFromMem(vform, i, addr2);
    438     addr1 += 2 * esize;
    439     addr2 += 2 * esize;
    440   }
    441 }
    442 
    443 
    444 void Simulator::ld2(VectorFormat vform,
    445                     LogicVRegister dst1,
    446                     LogicVRegister dst2,
    447                     int index,
    448                     uint64_t addr1) {
    449   dst1.ClearForWrite(vform);
    450   dst2.ClearForWrite(vform);
    451   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    452   dst1.ReadUintFromMem(vform, index, addr1);
    453   dst2.ReadUintFromMem(vform, index, addr2);
    454 }
    455 
    456 
    457 void Simulator::ld2r(VectorFormat vform,
    458                      LogicVRegister dst1,
    459                      LogicVRegister dst2,
    460                      uint64_t addr) {
    461   dst1.ClearForWrite(vform);
    462   dst2.ClearForWrite(vform);
    463   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    464   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    465     dst1.ReadUintFromMem(vform, i, addr);
    466     dst2.ReadUintFromMem(vform, i, addr2);
    467   }
    468 }
    469 
    470 
    471 void Simulator::ld3(VectorFormat vform,
    472                     LogicVRegister dst1,
    473                     LogicVRegister dst2,
    474                     LogicVRegister dst3,
    475                     uint64_t addr1) {
    476   dst1.ClearForWrite(vform);
    477   dst2.ClearForWrite(vform);
    478   dst3.ClearForWrite(vform);
    479   int esize = LaneSizeInBytesFromFormat(vform);
    480   uint64_t addr2 = addr1 + esize;
    481   uint64_t addr3 = addr2 + esize;
    482   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    483     dst1.ReadUintFromMem(vform, i, addr1);
    484     dst2.ReadUintFromMem(vform, i, addr2);
    485     dst3.ReadUintFromMem(vform, i, addr3);
    486     addr1 += 3 * esize;
    487     addr2 += 3 * esize;
    488     addr3 += 3 * esize;
    489   }
    490 }
    491 
    492 
    493 void Simulator::ld3(VectorFormat vform,
    494                     LogicVRegister dst1,
    495                     LogicVRegister dst2,
    496                     LogicVRegister dst3,
    497                     int index,
    498                     uint64_t addr1) {
    499   dst1.ClearForWrite(vform);
    500   dst2.ClearForWrite(vform);
    501   dst3.ClearForWrite(vform);
    502   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    503   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    504   dst1.ReadUintFromMem(vform, index, addr1);
    505   dst2.ReadUintFromMem(vform, index, addr2);
    506   dst3.ReadUintFromMem(vform, index, addr3);
    507 }
    508 
    509 
    510 void Simulator::ld3r(VectorFormat vform,
    511                      LogicVRegister dst1,
    512                      LogicVRegister dst2,
    513                      LogicVRegister dst3,
    514                      uint64_t addr) {
    515   dst1.ClearForWrite(vform);
    516   dst2.ClearForWrite(vform);
    517   dst3.ClearForWrite(vform);
    518   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    519   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    520   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    521     dst1.ReadUintFromMem(vform, i, addr);
    522     dst2.ReadUintFromMem(vform, i, addr2);
    523     dst3.ReadUintFromMem(vform, i, addr3);
    524   }
    525 }
    526 
    527 
    528 void Simulator::ld4(VectorFormat vform,
    529                     LogicVRegister dst1,
    530                     LogicVRegister dst2,
    531                     LogicVRegister dst3,
    532                     LogicVRegister dst4,
    533                     uint64_t addr1) {
    534   dst1.ClearForWrite(vform);
    535   dst2.ClearForWrite(vform);
    536   dst3.ClearForWrite(vform);
    537   dst4.ClearForWrite(vform);
    538   int esize = LaneSizeInBytesFromFormat(vform);
    539   uint64_t addr2 = addr1 + esize;
    540   uint64_t addr3 = addr2 + esize;
    541   uint64_t addr4 = addr3 + esize;
    542   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    543     dst1.ReadUintFromMem(vform, i, addr1);
    544     dst2.ReadUintFromMem(vform, i, addr2);
    545     dst3.ReadUintFromMem(vform, i, addr3);
    546     dst4.ReadUintFromMem(vform, i, addr4);
    547     addr1 += 4 * esize;
    548     addr2 += 4 * esize;
    549     addr3 += 4 * esize;
    550     addr4 += 4 * esize;
    551   }
    552 }
    553 
    554 
    555 void Simulator::ld4(VectorFormat vform,
    556                     LogicVRegister dst1,
    557                     LogicVRegister dst2,
    558                     LogicVRegister dst3,
    559                     LogicVRegister dst4,
    560                     int index,
    561                     uint64_t addr1) {
    562   dst1.ClearForWrite(vform);
    563   dst2.ClearForWrite(vform);
    564   dst3.ClearForWrite(vform);
    565   dst4.ClearForWrite(vform);
    566   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    567   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    568   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    569   dst1.ReadUintFromMem(vform, index, addr1);
    570   dst2.ReadUintFromMem(vform, index, addr2);
    571   dst3.ReadUintFromMem(vform, index, addr3);
    572   dst4.ReadUintFromMem(vform, index, addr4);
    573 }
    574 
    575 
    576 void Simulator::ld4r(VectorFormat vform,
    577                      LogicVRegister dst1,
    578                      LogicVRegister dst2,
    579                      LogicVRegister dst3,
    580                      LogicVRegister dst4,
    581                      uint64_t addr) {
    582   dst1.ClearForWrite(vform);
    583   dst2.ClearForWrite(vform);
    584   dst3.ClearForWrite(vform);
    585   dst4.ClearForWrite(vform);
    586   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    587   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    588   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    589   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    590     dst1.ReadUintFromMem(vform, i, addr);
    591     dst2.ReadUintFromMem(vform, i, addr2);
    592     dst3.ReadUintFromMem(vform, i, addr3);
    593     dst4.ReadUintFromMem(vform, i, addr4);
    594   }
    595 }
    596 
    597 
    598 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
    599   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    600     src.WriteUintToMem(vform, i, addr);
    601     addr += LaneSizeInBytesFromFormat(vform);
    602   }
    603 }
    604 
    605 
    606 void Simulator::st1(VectorFormat vform,
    607                     LogicVRegister src,
    608                     int index,
    609                     uint64_t addr) {
    610   src.WriteUintToMem(vform, index, addr);
    611 }
    612 
    613 
    614 void Simulator::st2(VectorFormat vform,
    615                     LogicVRegister dst,
    616                     LogicVRegister dst2,
    617                     uint64_t addr) {
    618   int esize = LaneSizeInBytesFromFormat(vform);
    619   uint64_t addr2 = addr + esize;
    620   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    621     dst.WriteUintToMem(vform, i, addr);
    622     dst2.WriteUintToMem(vform, i, addr2);
    623     addr += 2 * esize;
    624     addr2 += 2 * esize;
    625   }
    626 }
    627 
    628 
    629 void Simulator::st2(VectorFormat vform,
    630                     LogicVRegister dst,
    631                     LogicVRegister dst2,
    632                     int index,
    633                     uint64_t addr) {
    634   int esize = LaneSizeInBytesFromFormat(vform);
    635   dst.WriteUintToMem(vform, index, addr);
    636   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    637 }
    638 
    639 
    640 void Simulator::st3(VectorFormat vform,
    641                     LogicVRegister dst,
    642                     LogicVRegister dst2,
    643                     LogicVRegister dst3,
    644                     uint64_t addr) {
    645   int esize = LaneSizeInBytesFromFormat(vform);
    646   uint64_t addr2 = addr + esize;
    647   uint64_t addr3 = addr2 + esize;
    648   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    649     dst.WriteUintToMem(vform, i, addr);
    650     dst2.WriteUintToMem(vform, i, addr2);
    651     dst3.WriteUintToMem(vform, i, addr3);
    652     addr += 3 * esize;
    653     addr2 += 3 * esize;
    654     addr3 += 3 * esize;
    655   }
    656 }
    657 
    658 
    659 void Simulator::st3(VectorFormat vform,
    660                     LogicVRegister dst,
    661                     LogicVRegister dst2,
    662                     LogicVRegister dst3,
    663                     int index,
    664                     uint64_t addr) {
    665   int esize = LaneSizeInBytesFromFormat(vform);
    666   dst.WriteUintToMem(vform, index, addr);
    667   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    668   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    669 }
    670 
    671 
    672 void Simulator::st4(VectorFormat vform,
    673                     LogicVRegister dst,
    674                     LogicVRegister dst2,
    675                     LogicVRegister dst3,
    676                     LogicVRegister dst4,
    677                     uint64_t addr) {
    678   int esize = LaneSizeInBytesFromFormat(vform);
    679   uint64_t addr2 = addr + esize;
    680   uint64_t addr3 = addr2 + esize;
    681   uint64_t addr4 = addr3 + esize;
    682   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    683     dst.WriteUintToMem(vform, i, addr);
    684     dst2.WriteUintToMem(vform, i, addr2);
    685     dst3.WriteUintToMem(vform, i, addr3);
    686     dst4.WriteUintToMem(vform, i, addr4);
    687     addr += 4 * esize;
    688     addr2 += 4 * esize;
    689     addr3 += 4 * esize;
    690     addr4 += 4 * esize;
    691   }
    692 }
    693 
    694 
    695 void Simulator::st4(VectorFormat vform,
    696                     LogicVRegister dst,
    697                     LogicVRegister dst2,
    698                     LogicVRegister dst3,
    699                     LogicVRegister dst4,
    700                     int index,
    701                     uint64_t addr) {
    702   int esize = LaneSizeInBytesFromFormat(vform);
    703   dst.WriteUintToMem(vform, index, addr);
    704   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    705   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    706   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
    707 }
    708 
    709 
    710 LogicVRegister Simulator::cmp(VectorFormat vform,
    711                               LogicVRegister dst,
    712                               const LogicVRegister& src1,
    713                               const LogicVRegister& src2,
    714                               Condition cond) {
    715   dst.ClearForWrite(vform);
    716   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    717     int64_t sa = src1.Int(vform, i);
    718     int64_t sb = src2.Int(vform, i);
    719     uint64_t ua = src1.Uint(vform, i);
    720     uint64_t ub = src2.Uint(vform, i);
    721     bool result = false;
    722     switch (cond) {
    723       case eq:
    724         result = (ua == ub);
    725         break;
    726       case ge:
    727         result = (sa >= sb);
    728         break;
    729       case gt:
    730         result = (sa > sb);
    731         break;
    732       case hi:
    733         result = (ua > ub);
    734         break;
    735       case hs:
    736         result = (ua >= ub);
    737         break;
    738       case lt:
    739         result = (sa < sb);
    740         break;
    741       case le:
    742         result = (sa <= sb);
    743         break;
    744       default:
    745         VIXL_UNREACHABLE();
    746         break;
    747     }
    748     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
    749   }
    750   return dst;
    751 }
    752 
    753 
    754 LogicVRegister Simulator::cmp(VectorFormat vform,
    755                               LogicVRegister dst,
    756                               const LogicVRegister& src1,
    757                               int imm,
    758                               Condition cond) {
    759   SimVRegister temp;
    760   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
    761   return cmp(vform, dst, src1, imm_reg, cond);
    762 }
    763 
    764 
    765 LogicVRegister Simulator::cmptst(VectorFormat vform,
    766                                  LogicVRegister dst,
    767                                  const LogicVRegister& src1,
    768                                  const LogicVRegister& src2) {
    769   dst.ClearForWrite(vform);
    770   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    771     uint64_t ua = src1.Uint(vform, i);
    772     uint64_t ub = src2.Uint(vform, i);
    773     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
    774   }
    775   return dst;
    776 }
    777 
    778 
    779 LogicVRegister Simulator::add(VectorFormat vform,
    780                               LogicVRegister dst,
    781                               const LogicVRegister& src1,
    782                               const LogicVRegister& src2) {
    783   int lane_size = LaneSizeInBitsFromFormat(vform);
    784   dst.ClearForWrite(vform);
    785   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    786     // Test for unsigned saturation.
    787     uint64_t ua = src1.UintLeftJustified(vform, i);
    788     uint64_t ub = src2.UintLeftJustified(vform, i);
    789     uint64_t ur = ua + ub;
    790     if (ur < ua) {
    791       dst.SetUnsignedSat(i, true);
    792     }
    793 
    794     // Test for signed saturation.
    795     bool pos_a = (ua >> 63) == 0;
    796     bool pos_b = (ub >> 63) == 0;
    797     bool pos_r = (ur >> 63) == 0;
    798     // If the signs of the operands are the same, but different from the result,
    799     // there was an overflow.
    800     if ((pos_a == pos_b) && (pos_a != pos_r)) {
    801       dst.SetSignedSat(i, pos_a);
    802     }
    803 
    804     dst.SetInt(vform, i, ur >> (64 - lane_size));
    805   }
    806   return dst;
    807 }
    808 
    809 
    810 LogicVRegister Simulator::addp(VectorFormat vform,
    811                                LogicVRegister dst,
    812                                const LogicVRegister& src1,
    813                                const LogicVRegister& src2) {
    814   SimVRegister temp1, temp2;
    815   uzp1(vform, temp1, src1, src2);
    816   uzp2(vform, temp2, src1, src2);
    817   add(vform, dst, temp1, temp2);
    818   return dst;
    819 }
    820 
    821 
    822 LogicVRegister Simulator::mla(VectorFormat vform,
    823                               LogicVRegister dst,
    824                               const LogicVRegister& src1,
    825                               const LogicVRegister& src2) {
    826   SimVRegister temp;
    827   mul(vform, temp, src1, src2);
    828   add(vform, dst, dst, temp);
    829   return dst;
    830 }
    831 
    832 
    833 LogicVRegister Simulator::mls(VectorFormat vform,
    834                               LogicVRegister dst,
    835                               const LogicVRegister& src1,
    836                               const LogicVRegister& src2) {
    837   SimVRegister temp;
    838   mul(vform, temp, src1, src2);
    839   sub(vform, dst, dst, temp);
    840   return dst;
    841 }
    842 
    843 
    844 LogicVRegister Simulator::mul(VectorFormat vform,
    845                               LogicVRegister dst,
    846                               const LogicVRegister& src1,
    847                               const LogicVRegister& src2) {
    848   dst.ClearForWrite(vform);
    849   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    850     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
    851   }
    852   return dst;
    853 }
    854 
    855 
    856 LogicVRegister Simulator::mul(VectorFormat vform,
    857                               LogicVRegister dst,
    858                               const LogicVRegister& src1,
    859                               const LogicVRegister& src2,
    860                               int index) {
    861   SimVRegister temp;
    862   VectorFormat indexform = VectorFormatFillQ(vform);
    863   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
    864 }
    865 
    866 
    867 LogicVRegister Simulator::mla(VectorFormat vform,
    868                               LogicVRegister dst,
    869                               const LogicVRegister& src1,
    870                               const LogicVRegister& src2,
    871                               int index) {
    872   SimVRegister temp;
    873   VectorFormat indexform = VectorFormatFillQ(vform);
    874   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
    875 }
    876 
    877 
    878 LogicVRegister Simulator::mls(VectorFormat vform,
    879                               LogicVRegister dst,
    880                               const LogicVRegister& src1,
    881                               const LogicVRegister& src2,
    882                               int index) {
    883   SimVRegister temp;
    884   VectorFormat indexform = VectorFormatFillQ(vform);
    885   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
    886 }
    887 
    888 
    889 LogicVRegister Simulator::smull(VectorFormat vform,
    890                                 LogicVRegister dst,
    891                                 const LogicVRegister& src1,
    892                                 const LogicVRegister& src2,
    893                                 int index) {
    894   SimVRegister temp;
    895   VectorFormat indexform =
    896       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    897   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    898 }
    899 
    900 
    901 LogicVRegister Simulator::smull2(VectorFormat vform,
    902                                  LogicVRegister dst,
    903                                  const LogicVRegister& src1,
    904                                  const LogicVRegister& src2,
    905                                  int index) {
    906   SimVRegister temp;
    907   VectorFormat indexform =
    908       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    909   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    910 }
    911 
    912 
    913 LogicVRegister Simulator::umull(VectorFormat vform,
    914                                 LogicVRegister dst,
    915                                 const LogicVRegister& src1,
    916                                 const LogicVRegister& src2,
    917                                 int index) {
    918   SimVRegister temp;
    919   VectorFormat indexform =
    920       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    921   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    922 }
    923 
    924 
    925 LogicVRegister Simulator::umull2(VectorFormat vform,
    926                                  LogicVRegister dst,
    927                                  const LogicVRegister& src1,
    928                                  const LogicVRegister& src2,
    929                                  int index) {
    930   SimVRegister temp;
    931   VectorFormat indexform =
    932       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    933   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    934 }
    935 
    936 
    937 LogicVRegister Simulator::smlal(VectorFormat vform,
    938                                 LogicVRegister dst,
    939                                 const LogicVRegister& src1,
    940                                 const LogicVRegister& src2,
    941                                 int index) {
    942   SimVRegister temp;
    943   VectorFormat indexform =
    944       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    945   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    946 }
    947 
    948 
    949 LogicVRegister Simulator::smlal2(VectorFormat vform,
    950                                  LogicVRegister dst,
    951                                  const LogicVRegister& src1,
    952                                  const LogicVRegister& src2,
    953                                  int index) {
    954   SimVRegister temp;
    955   VectorFormat indexform =
    956       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    957   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    958 }
    959 
    960 
    961 LogicVRegister Simulator::umlal(VectorFormat vform,
    962                                 LogicVRegister dst,
    963                                 const LogicVRegister& src1,
    964                                 const LogicVRegister& src2,
    965                                 int index) {
    966   SimVRegister temp;
    967   VectorFormat indexform =
    968       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    969   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    970 }
    971 
    972 
    973 LogicVRegister Simulator::umlal2(VectorFormat vform,
    974                                  LogicVRegister dst,
    975                                  const LogicVRegister& src1,
    976                                  const LogicVRegister& src2,
    977                                  int index) {
    978   SimVRegister temp;
    979   VectorFormat indexform =
    980       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    981   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    982 }
    983 
    984 
    985 LogicVRegister Simulator::smlsl(VectorFormat vform,
    986                                 LogicVRegister dst,
    987                                 const LogicVRegister& src1,
    988                                 const LogicVRegister& src2,
    989                                 int index) {
    990   SimVRegister temp;
    991   VectorFormat indexform =
    992       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    993   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    994 }
    995 
    996 
    997 LogicVRegister Simulator::smlsl2(VectorFormat vform,
    998                                  LogicVRegister dst,
    999                                  const LogicVRegister& src1,
   1000                                  const LogicVRegister& src2,
   1001                                  int index) {
   1002   SimVRegister temp;
   1003   VectorFormat indexform =
   1004       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1005   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1006 }
   1007 
   1008 
   1009 LogicVRegister Simulator::umlsl(VectorFormat vform,
   1010                                 LogicVRegister dst,
   1011                                 const LogicVRegister& src1,
   1012                                 const LogicVRegister& src2,
   1013                                 int index) {
   1014   SimVRegister temp;
   1015   VectorFormat indexform =
   1016       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1017   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1018 }
   1019 
   1020 
   1021 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   1022                                  LogicVRegister dst,
   1023                                  const LogicVRegister& src1,
   1024                                  const LogicVRegister& src2,
   1025                                  int index) {
   1026   SimVRegister temp;
   1027   VectorFormat indexform =
   1028       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1029   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1030 }
   1031 
   1032 
   1033 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   1034                                   LogicVRegister dst,
   1035                                   const LogicVRegister& src1,
   1036                                   const LogicVRegister& src2,
   1037                                   int index) {
   1038   SimVRegister temp;
   1039   VectorFormat indexform =
   1040       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1041   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1042 }
   1043 
   1044 
   1045 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   1046                                    LogicVRegister dst,
   1047                                    const LogicVRegister& src1,
   1048                                    const LogicVRegister& src2,
   1049                                    int index) {
   1050   SimVRegister temp;
   1051   VectorFormat indexform =
   1052       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1053   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1054 }
   1055 
   1056 
   1057 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   1058                                   LogicVRegister dst,
   1059                                   const LogicVRegister& src1,
   1060                                   const LogicVRegister& src2,
   1061                                   int index) {
   1062   SimVRegister temp;
   1063   VectorFormat indexform =
   1064       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1065   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1066 }
   1067 
   1068 
   1069 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   1070                                    LogicVRegister dst,
   1071                                    const LogicVRegister& src1,
   1072                                    const LogicVRegister& src2,
   1073                                    int index) {
   1074   SimVRegister temp;
   1075   VectorFormat indexform =
   1076       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1077   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1078 }
   1079 
   1080 
   1081 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   1082                                   LogicVRegister dst,
   1083                                   const LogicVRegister& src1,
   1084                                   const LogicVRegister& src2,
   1085                                   int index) {
   1086   SimVRegister temp;
   1087   VectorFormat indexform =
   1088       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1089   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1090 }
   1091 
   1092 
   1093 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   1094                                    LogicVRegister dst,
   1095                                    const LogicVRegister& src1,
   1096                                    const LogicVRegister& src2,
   1097                                    int index) {
   1098   SimVRegister temp;
   1099   VectorFormat indexform =
   1100       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
   1101   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1102 }
   1103 
   1104 
   1105 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   1106                                   LogicVRegister dst,
   1107                                   const LogicVRegister& src1,
   1108                                   const LogicVRegister& src2,
   1109                                   int index) {
   1110   SimVRegister temp;
   1111   VectorFormat indexform = VectorFormatFillQ(vform);
   1112   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1113 }
   1114 
   1115 
   1116 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   1117                                    LogicVRegister dst,
   1118                                    const LogicVRegister& src1,
   1119                                    const LogicVRegister& src2,
   1120                                    int index) {
   1121   SimVRegister temp;
   1122   VectorFormat indexform = VectorFormatFillQ(vform);
   1123   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
   1124 }
   1125 
   1126 
   1127 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
   1128   uint16_t result = 0;
   1129   uint16_t extended_op2 = op2;
   1130   for (int i = 0; i < 8; ++i) {
   1131     if ((op1 >> i) & 1) {
   1132       result = result ^ (extended_op2 << i);
   1133     }
   1134   }
   1135   return result;
   1136 }
   1137 
   1138 
   1139 LogicVRegister Simulator::pmul(VectorFormat vform,
   1140                                LogicVRegister dst,
   1141                                const LogicVRegister& src1,
   1142                                const LogicVRegister& src2) {
   1143   dst.ClearForWrite(vform);
   1144   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1145     dst.SetUint(vform,
   1146                 i,
   1147                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
   1148   }
   1149   return dst;
   1150 }
   1151 
   1152 
   1153 LogicVRegister Simulator::pmull(VectorFormat vform,
   1154                                 LogicVRegister dst,
   1155                                 const LogicVRegister& src1,
   1156                                 const LogicVRegister& src2) {
   1157   VectorFormat vform_src = VectorFormatHalfWidth(vform);
   1158   dst.ClearForWrite(vform);
   1159   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1160     dst.SetUint(vform,
   1161                 i,
   1162                 PolynomialMult(src1.Uint(vform_src, i),
   1163                                src2.Uint(vform_src, i)));
   1164   }
   1165   return dst;
   1166 }
   1167 
   1168 
   1169 LogicVRegister Simulator::pmull2(VectorFormat vform,
   1170                                  LogicVRegister dst,
   1171                                  const LogicVRegister& src1,
   1172                                  const LogicVRegister& src2) {
   1173   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
   1174   dst.ClearForWrite(vform);
   1175   int lane_count = LaneCountFromFormat(vform);
   1176   for (int i = 0; i < lane_count; i++) {
   1177     dst.SetUint(vform,
   1178                 i,
   1179                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
   1180                                src2.Uint(vform_src, lane_count + i)));
   1181   }
   1182   return dst;
   1183 }
   1184 
   1185 
   1186 LogicVRegister Simulator::sub(VectorFormat vform,
   1187                               LogicVRegister dst,
   1188                               const LogicVRegister& src1,
   1189                               const LogicVRegister& src2) {
   1190   int lane_size = LaneSizeInBitsFromFormat(vform);
   1191   dst.ClearForWrite(vform);
   1192   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1193     // Test for unsigned saturation.
   1194     uint64_t ua = src1.UintLeftJustified(vform, i);
   1195     uint64_t ub = src2.UintLeftJustified(vform, i);
   1196     uint64_t ur = ua - ub;
   1197     if (ub > ua) {
   1198       dst.SetUnsignedSat(i, false);
   1199     }
   1200 
   1201     // Test for signed saturation.
   1202     bool pos_a = (ua >> 63) == 0;
   1203     bool pos_b = (ub >> 63) == 0;
   1204     bool pos_r = (ur >> 63) == 0;
   1205     // If the signs of the operands are different, and the sign of the first
   1206     // operand doesn't match the result, there was an overflow.
   1207     if ((pos_a != pos_b) && (pos_a != pos_r)) {
   1208       dst.SetSignedSat(i, pos_a);
   1209     }
   1210 
   1211     dst.SetInt(vform, i, ur >> (64 - lane_size));
   1212   }
   1213   return dst;
   1214 }
   1215 
   1216 
   1217 LogicVRegister Simulator::and_(VectorFormat vform,
   1218                                LogicVRegister dst,
   1219                                const LogicVRegister& src1,
   1220                                const LogicVRegister& src2) {
   1221   dst.ClearForWrite(vform);
   1222   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1223     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
   1224   }
   1225   return dst;
   1226 }
   1227 
   1228 
   1229 LogicVRegister Simulator::orr(VectorFormat vform,
   1230                               LogicVRegister dst,
   1231                               const LogicVRegister& src1,
   1232                               const LogicVRegister& src2) {
   1233   dst.ClearForWrite(vform);
   1234   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1235     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
   1236   }
   1237   return dst;
   1238 }
   1239 
   1240 
   1241 LogicVRegister Simulator::orn(VectorFormat vform,
   1242                               LogicVRegister dst,
   1243                               const LogicVRegister& src1,
   1244                               const LogicVRegister& src2) {
   1245   dst.ClearForWrite(vform);
   1246   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1247     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
   1248   }
   1249   return dst;
   1250 }
   1251 
   1252 
   1253 LogicVRegister Simulator::eor(VectorFormat vform,
   1254                               LogicVRegister dst,
   1255                               const LogicVRegister& src1,
   1256                               const LogicVRegister& src2) {
   1257   dst.ClearForWrite(vform);
   1258   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1259     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
   1260   }
   1261   return dst;
   1262 }
   1263 
   1264 
   1265 LogicVRegister Simulator::bic(VectorFormat vform,
   1266                               LogicVRegister dst,
   1267                               const LogicVRegister& src1,
   1268                               const LogicVRegister& src2) {
   1269   dst.ClearForWrite(vform);
   1270   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1271     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
   1272   }
   1273   return dst;
   1274 }
   1275 
   1276 
   1277 LogicVRegister Simulator::bic(VectorFormat vform,
   1278                               LogicVRegister dst,
   1279                               const LogicVRegister& src,
   1280                               uint64_t imm) {
   1281   uint64_t result[16];
   1282   int laneCount = LaneCountFromFormat(vform);
   1283   for (int i = 0; i < laneCount; ++i) {
   1284     result[i] = src.Uint(vform, i) & ~imm;
   1285   }
   1286   dst.ClearForWrite(vform);
   1287   for (int i = 0; i < laneCount; ++i) {
   1288     dst.SetUint(vform, i, result[i]);
   1289   }
   1290   return dst;
   1291 }
   1292 
   1293 
   1294 LogicVRegister Simulator::bif(VectorFormat vform,
   1295                               LogicVRegister dst,
   1296                               const LogicVRegister& src1,
   1297                               const LogicVRegister& src2) {
   1298   dst.ClearForWrite(vform);
   1299   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1300     uint64_t operand1 = dst.Uint(vform, i);
   1301     uint64_t operand2 = ~src2.Uint(vform, i);
   1302     uint64_t operand3 = src1.Uint(vform, i);
   1303     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1304     dst.SetUint(vform, i, result);
   1305   }
   1306   return dst;
   1307 }
   1308 
   1309 
   1310 LogicVRegister Simulator::bit(VectorFormat vform,
   1311                               LogicVRegister dst,
   1312                               const LogicVRegister& src1,
   1313                               const LogicVRegister& src2) {
   1314   dst.ClearForWrite(vform);
   1315   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1316     uint64_t operand1 = dst.Uint(vform, i);
   1317     uint64_t operand2 = src2.Uint(vform, i);
   1318     uint64_t operand3 = src1.Uint(vform, i);
   1319     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1320     dst.SetUint(vform, i, result);
   1321   }
   1322   return dst;
   1323 }
   1324 
   1325 
   1326 LogicVRegister Simulator::bsl(VectorFormat vform,
   1327                               LogicVRegister dst,
   1328                               const LogicVRegister& src1,
   1329                               const LogicVRegister& src2) {
   1330   dst.ClearForWrite(vform);
   1331   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1332     uint64_t operand1 = src2.Uint(vform, i);
   1333     uint64_t operand2 = dst.Uint(vform, i);
   1334     uint64_t operand3 = src1.Uint(vform, i);
   1335     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1336     dst.SetUint(vform, i, result);
   1337   }
   1338   return dst;
   1339 }
   1340 
   1341 
   1342 LogicVRegister Simulator::sminmax(VectorFormat vform,
   1343                                   LogicVRegister dst,
   1344                                   const LogicVRegister& src1,
   1345                                   const LogicVRegister& src2,
   1346                                   bool max) {
   1347   dst.ClearForWrite(vform);
   1348   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1349     int64_t src1_val = src1.Int(vform, i);
   1350     int64_t src2_val = src2.Int(vform, i);
   1351     int64_t dst_val;
   1352     if (max) {
   1353       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1354     } else {
   1355       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1356     }
   1357     dst.SetInt(vform, i, dst_val);
   1358   }
   1359   return dst;
   1360 }
   1361 
   1362 
   1363 LogicVRegister Simulator::smax(VectorFormat vform,
   1364                                LogicVRegister dst,
   1365                                const LogicVRegister& src1,
   1366                                const LogicVRegister& src2) {
   1367   return sminmax(vform, dst, src1, src2, true);
   1368 }
   1369 
   1370 
   1371 LogicVRegister Simulator::smin(VectorFormat vform,
   1372                                LogicVRegister dst,
   1373                                const LogicVRegister& src1,
   1374                                const LogicVRegister& src2) {
   1375   return sminmax(vform, dst, src1, src2, false);
   1376 }
   1377 
   1378 
   1379 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
   1380                                    LogicVRegister dst,
   1381                                    const LogicVRegister& src1,
   1382                                    const LogicVRegister& src2,
   1383                                    bool max) {
   1384   int lanes = LaneCountFromFormat(vform);
   1385   int64_t result[kMaxLanesPerVector];
   1386   const LogicVRegister* src = &src1;
   1387   for (int j = 0; j < 2; j++) {
   1388     for (int i = 0; i < lanes; i += 2) {
   1389       int64_t first_val = src->Int(vform, i);
   1390       int64_t second_val = src->Int(vform, i + 1);
   1391       int64_t dst_val;
   1392       if (max) {
   1393         dst_val = (first_val > second_val) ? first_val : second_val;
   1394       } else {
   1395         dst_val = (first_val < second_val) ? first_val : second_val;
   1396       }
   1397       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
   1398       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1399     }
   1400     src = &src2;
   1401   }
   1402   dst.SetIntArray(vform, result);
   1403   return dst;
   1404 }
   1405 
   1406 
   1407 LogicVRegister Simulator::smaxp(VectorFormat vform,
   1408                                 LogicVRegister dst,
   1409                                 const LogicVRegister& src1,
   1410                                 const LogicVRegister& src2) {
   1411   return sminmaxp(vform, dst, src1, src2, true);
   1412 }
   1413 
   1414 
   1415 LogicVRegister Simulator::sminp(VectorFormat vform,
   1416                                 LogicVRegister dst,
   1417                                 const LogicVRegister& src1,
   1418                                 const LogicVRegister& src2) {
   1419   return sminmaxp(vform, dst, src1, src2, false);
   1420 }
   1421 
   1422 
   1423 LogicVRegister Simulator::addp(VectorFormat vform,
   1424                                LogicVRegister dst,
   1425                                const LogicVRegister& src) {
   1426   VIXL_ASSERT(vform == kFormatD);
   1427 
   1428   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
   1429   dst.ClearForWrite(vform);
   1430   dst.SetUint(vform, 0, dst_val);
   1431   return dst;
   1432 }
   1433 
   1434 
   1435 LogicVRegister Simulator::addv(VectorFormat vform,
   1436                                LogicVRegister dst,
   1437                                const LogicVRegister& src) {
   1438   VectorFormat vform_dst =
   1439       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
   1440 
   1441 
   1442   int64_t dst_val = 0;
   1443   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1444     dst_val += src.Int(vform, i);
   1445   }
   1446 
   1447   dst.ClearForWrite(vform_dst);
   1448   dst.SetInt(vform_dst, 0, dst_val);
   1449   return dst;
   1450 }
   1451 
   1452 
   1453 LogicVRegister Simulator::saddlv(VectorFormat vform,
   1454                                  LogicVRegister dst,
   1455                                  const LogicVRegister& src) {
   1456   VectorFormat vform_dst =
   1457       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1458 
   1459   int64_t dst_val = 0;
   1460   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1461     dst_val += src.Int(vform, i);
   1462   }
   1463 
   1464   dst.ClearForWrite(vform_dst);
   1465   dst.SetInt(vform_dst, 0, dst_val);
   1466   return dst;
   1467 }
   1468 
   1469 
   1470 LogicVRegister Simulator::uaddlv(VectorFormat vform,
   1471                                  LogicVRegister dst,
   1472                                  const LogicVRegister& src) {
   1473   VectorFormat vform_dst =
   1474       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1475 
   1476   uint64_t dst_val = 0;
   1477   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1478     dst_val += src.Uint(vform, i);
   1479   }
   1480 
   1481   dst.ClearForWrite(vform_dst);
   1482   dst.SetUint(vform_dst, 0, dst_val);
   1483   return dst;
   1484 }
   1485 
   1486 
   1487 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
   1488                                    LogicVRegister dst,
   1489                                    const LogicVRegister& src,
   1490                                    bool max) {
   1491   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   1492   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1493     int64_t src_val = src.Int(vform, i);
   1494     if (max) {
   1495       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1496     } else {
   1497       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1498     }
   1499   }
   1500   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1501   dst.SetInt(vform, 0, dst_val);
   1502   return dst;
   1503 }
   1504 
   1505 
   1506 LogicVRegister Simulator::smaxv(VectorFormat vform,
   1507                                 LogicVRegister dst,
   1508                                 const LogicVRegister& src) {
   1509   sminmaxv(vform, dst, src, true);
   1510   return dst;
   1511 }
   1512 
   1513 
   1514 LogicVRegister Simulator::sminv(VectorFormat vform,
   1515                                 LogicVRegister dst,
   1516                                 const LogicVRegister& src) {
   1517   sminmaxv(vform, dst, src, false);
   1518   return dst;
   1519 }
   1520 
   1521 
   1522 LogicVRegister Simulator::uminmax(VectorFormat vform,
   1523                                   LogicVRegister dst,
   1524                                   const LogicVRegister& src1,
   1525                                   const LogicVRegister& src2,
   1526                                   bool max) {
   1527   dst.ClearForWrite(vform);
   1528   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1529     uint64_t src1_val = src1.Uint(vform, i);
   1530     uint64_t src2_val = src2.Uint(vform, i);
   1531     uint64_t dst_val;
   1532     if (max) {
   1533       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1534     } else {
   1535       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1536     }
   1537     dst.SetUint(vform, i, dst_val);
   1538   }
   1539   return dst;
   1540 }
   1541 
   1542 
   1543 LogicVRegister Simulator::umax(VectorFormat vform,
   1544                                LogicVRegister dst,
   1545                                const LogicVRegister& src1,
   1546                                const LogicVRegister& src2) {
   1547   return uminmax(vform, dst, src1, src2, true);
   1548 }
   1549 
   1550 
   1551 LogicVRegister Simulator::umin(VectorFormat vform,
   1552                                LogicVRegister dst,
   1553                                const LogicVRegister& src1,
   1554                                const LogicVRegister& src2) {
   1555   return uminmax(vform, dst, src1, src2, false);
   1556 }
   1557 
   1558 
   1559 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
   1560                                    LogicVRegister dst,
   1561                                    const LogicVRegister& src1,
   1562                                    const LogicVRegister& src2,
   1563                                    bool max) {
   1564   int lanes = LaneCountFromFormat(vform);
   1565   uint64_t result[kMaxLanesPerVector];
   1566   const LogicVRegister* src = &src1;
   1567   for (int j = 0; j < 2; j++) {
   1568     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1569       uint64_t first_val = src->Uint(vform, i);
   1570       uint64_t second_val = src->Uint(vform, i + 1);
   1571       uint64_t dst_val;
   1572       if (max) {
   1573         dst_val = (first_val > second_val) ? first_val : second_val;
   1574       } else {
   1575         dst_val = (first_val < second_val) ? first_val : second_val;
   1576       }
   1577       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
   1578       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1579     }
   1580     src = &src2;
   1581   }
   1582   dst.SetUintArray(vform, result);
   1583   return dst;
   1584 }
   1585 
   1586 
   1587 LogicVRegister Simulator::umaxp(VectorFormat vform,
   1588                                 LogicVRegister dst,
   1589                                 const LogicVRegister& src1,
   1590                                 const LogicVRegister& src2) {
   1591   return uminmaxp(vform, dst, src1, src2, true);
   1592 }
   1593 
   1594 
   1595 LogicVRegister Simulator::uminp(VectorFormat vform,
   1596                                 LogicVRegister dst,
   1597                                 const LogicVRegister& src1,
   1598                                 const LogicVRegister& src2) {
   1599   return uminmaxp(vform, dst, src1, src2, false);
   1600 }
   1601 
   1602 
   1603 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
   1604                                    LogicVRegister dst,
   1605                                    const LogicVRegister& src,
   1606                                    bool max) {
   1607   uint64_t dst_val = max ? 0 : UINT64_MAX;
   1608   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1609     uint64_t src_val = src.Uint(vform, i);
   1610     if (max) {
   1611       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1612     } else {
   1613       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1614     }
   1615   }
   1616   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1617   dst.SetUint(vform, 0, dst_val);
   1618   return dst;
   1619 }
   1620 
   1621 
   1622 LogicVRegister Simulator::umaxv(VectorFormat vform,
   1623                                 LogicVRegister dst,
   1624                                 const LogicVRegister& src) {
   1625   uminmaxv(vform, dst, src, true);
   1626   return dst;
   1627 }
   1628 
   1629 
   1630 LogicVRegister Simulator::uminv(VectorFormat vform,
   1631                                 LogicVRegister dst,
   1632                                 const LogicVRegister& src) {
   1633   uminmaxv(vform, dst, src, false);
   1634   return dst;
   1635 }
   1636 
   1637 
   1638 LogicVRegister Simulator::shl(VectorFormat vform,
   1639                               LogicVRegister dst,
   1640                               const LogicVRegister& src,
   1641                               int shift) {
   1642   VIXL_ASSERT(shift >= 0);
   1643   SimVRegister temp;
   1644   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1645   return ushl(vform, dst, src, shiftreg);
   1646 }
   1647 
   1648 
   1649 LogicVRegister Simulator::sshll(VectorFormat vform,
   1650                                 LogicVRegister dst,
   1651                                 const LogicVRegister& src,
   1652                                 int shift) {
   1653   VIXL_ASSERT(shift >= 0);
   1654   SimVRegister temp1, temp2;
   1655   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1656   LogicVRegister extendedreg = sxtl(vform, temp2, src);
   1657   return sshl(vform, dst, extendedreg, shiftreg);
   1658 }
   1659 
   1660 
   1661 LogicVRegister Simulator::sshll2(VectorFormat vform,
   1662                                  LogicVRegister dst,
   1663                                  const LogicVRegister& src,
   1664                                  int shift) {
   1665   VIXL_ASSERT(shift >= 0);
   1666   SimVRegister temp1, temp2;
   1667   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1668   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
   1669   return sshl(vform, dst, extendedreg, shiftreg);
   1670 }
   1671 
   1672 
   1673 LogicVRegister Simulator::shll(VectorFormat vform,
   1674                                LogicVRegister dst,
   1675                                const LogicVRegister& src) {
   1676   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1677   return sshll(vform, dst, src, shift);
   1678 }
   1679 
   1680 
   1681 LogicVRegister Simulator::shll2(VectorFormat vform,
   1682                                 LogicVRegister dst,
   1683                                 const LogicVRegister& src) {
   1684   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1685   return sshll2(vform, dst, src, shift);
   1686 }
   1687 
   1688 
   1689 LogicVRegister Simulator::ushll(VectorFormat vform,
   1690                                 LogicVRegister dst,
   1691                                 const LogicVRegister& src,
   1692                                 int shift) {
   1693   VIXL_ASSERT(shift >= 0);
   1694   SimVRegister temp1, temp2;
   1695   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1696   LogicVRegister extendedreg = uxtl(vform, temp2, src);
   1697   return ushl(vform, dst, extendedreg, shiftreg);
   1698 }
   1699 
   1700 
   1701 LogicVRegister Simulator::ushll2(VectorFormat vform,
   1702                                  LogicVRegister dst,
   1703                                  const LogicVRegister& src,
   1704                                  int shift) {
   1705   VIXL_ASSERT(shift >= 0);
   1706   SimVRegister temp1, temp2;
   1707   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1708   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
   1709   return ushl(vform, dst, extendedreg, shiftreg);
   1710 }
   1711 
   1712 
   1713 LogicVRegister Simulator::sli(VectorFormat vform,
   1714                               LogicVRegister dst,
   1715                               const LogicVRegister& src,
   1716                               int shift) {
   1717   dst.ClearForWrite(vform);
   1718   int laneCount = LaneCountFromFormat(vform);
   1719   for (int i = 0; i < laneCount; i++) {
   1720     uint64_t src_lane = src.Uint(vform, i);
   1721     uint64_t dst_lane = dst.Uint(vform, i);
   1722     uint64_t shifted = src_lane << shift;
   1723     uint64_t mask = MaxUintFromFormat(vform) << shift;
   1724     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1725   }
   1726   return dst;
   1727 }
   1728 
   1729 
   1730 LogicVRegister Simulator::sqshl(VectorFormat vform,
   1731                                 LogicVRegister dst,
   1732                                 const LogicVRegister& src,
   1733                                 int shift) {
   1734   VIXL_ASSERT(shift >= 0);
   1735   SimVRegister temp;
   1736   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1737   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
   1738 }
   1739 
   1740 
   1741 LogicVRegister Simulator::uqshl(VectorFormat vform,
   1742                                 LogicVRegister dst,
   1743                                 const LogicVRegister& src,
   1744                                 int shift) {
   1745   VIXL_ASSERT(shift >= 0);
   1746   SimVRegister temp;
   1747   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1748   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1749 }
   1750 
   1751 
   1752 LogicVRegister Simulator::sqshlu(VectorFormat vform,
   1753                                  LogicVRegister dst,
   1754                                  const LogicVRegister& src,
   1755                                  int shift) {
   1756   VIXL_ASSERT(shift >= 0);
   1757   SimVRegister temp;
   1758   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1759   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1760 }
   1761 
   1762 
   1763 LogicVRegister Simulator::sri(VectorFormat vform,
   1764                               LogicVRegister dst,
   1765                               const LogicVRegister& src,
   1766                               int shift) {
   1767   dst.ClearForWrite(vform);
   1768   int laneCount = LaneCountFromFormat(vform);
   1769   VIXL_ASSERT((shift > 0) &&
   1770               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
   1771   for (int i = 0; i < laneCount; i++) {
   1772     uint64_t src_lane = src.Uint(vform, i);
   1773     uint64_t dst_lane = dst.Uint(vform, i);
   1774     uint64_t shifted;
   1775     uint64_t mask;
   1776     if (shift == 64) {
   1777       shifted = 0;
   1778       mask = 0;
   1779     } else {
   1780       shifted = src_lane >> shift;
   1781       mask = MaxUintFromFormat(vform) >> shift;
   1782     }
   1783     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1784   }
   1785   return dst;
   1786 }
   1787 
   1788 
   1789 LogicVRegister Simulator::ushr(VectorFormat vform,
   1790                                LogicVRegister dst,
   1791                                const LogicVRegister& src,
   1792                                int shift) {
   1793   VIXL_ASSERT(shift >= 0);
   1794   SimVRegister temp;
   1795   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1796   return ushl(vform, dst, src, shiftreg);
   1797 }
   1798 
   1799 
   1800 LogicVRegister Simulator::sshr(VectorFormat vform,
   1801                                LogicVRegister dst,
   1802                                const LogicVRegister& src,
   1803                                int shift) {
   1804   VIXL_ASSERT(shift >= 0);
   1805   SimVRegister temp;
   1806   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1807   return sshl(vform, dst, src, shiftreg);
   1808 }
   1809 
   1810 
   1811 LogicVRegister Simulator::ssra(VectorFormat vform,
   1812                                LogicVRegister dst,
   1813                                const LogicVRegister& src,
   1814                                int shift) {
   1815   SimVRegister temp;
   1816   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
   1817   return add(vform, dst, dst, shifted_reg);
   1818 }
   1819 
   1820 
   1821 LogicVRegister Simulator::usra(VectorFormat vform,
   1822                                LogicVRegister dst,
   1823                                const LogicVRegister& src,
   1824                                int shift) {
   1825   SimVRegister temp;
   1826   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
   1827   return add(vform, dst, dst, shifted_reg);
   1828 }
   1829 
   1830 
   1831 LogicVRegister Simulator::srsra(VectorFormat vform,
   1832                                 LogicVRegister dst,
   1833                                 const LogicVRegister& src,
   1834                                 int shift) {
   1835   SimVRegister temp;
   1836   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
   1837   return add(vform, dst, dst, shifted_reg);
   1838 }
   1839 
   1840 
   1841 LogicVRegister Simulator::ursra(VectorFormat vform,
   1842                                 LogicVRegister dst,
   1843                                 const LogicVRegister& src,
   1844                                 int shift) {
   1845   SimVRegister temp;
   1846   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
   1847   return add(vform, dst, dst, shifted_reg);
   1848 }
   1849 
   1850 
   1851 LogicVRegister Simulator::cls(VectorFormat vform,
   1852                               LogicVRegister dst,
   1853                               const LogicVRegister& src) {
   1854   uint64_t result[16];
   1855   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1856   int laneCount = LaneCountFromFormat(vform);
   1857   for (int i = 0; i < laneCount; i++) {
   1858     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
   1859   }
   1860 
   1861   dst.ClearForWrite(vform);
   1862   for (int i = 0; i < laneCount; ++i) {
   1863     dst.SetUint(vform, i, result[i]);
   1864   }
   1865   return dst;
   1866 }
   1867 
   1868 
   1869 LogicVRegister Simulator::clz(VectorFormat vform,
   1870                               LogicVRegister dst,
   1871                               const LogicVRegister& src) {
   1872   uint64_t result[16];
   1873   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1874   int laneCount = LaneCountFromFormat(vform);
   1875   for (int i = 0; i < laneCount; i++) {
   1876     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
   1877   }
   1878 
   1879   dst.ClearForWrite(vform);
   1880   for (int i = 0; i < laneCount; ++i) {
   1881     dst.SetUint(vform, i, result[i]);
   1882   }
   1883   return dst;
   1884 }
   1885 
   1886 
   1887 LogicVRegister Simulator::cnt(VectorFormat vform,
   1888                               LogicVRegister dst,
   1889                               const LogicVRegister& src) {
   1890   uint64_t result[16];
   1891   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1892   int laneCount = LaneCountFromFormat(vform);
   1893   for (int i = 0; i < laneCount; i++) {
   1894     uint64_t value = src.Uint(vform, i);
   1895     result[i] = 0;
   1896     for (int j = 0; j < laneSizeInBits; j++) {
   1897       result[i] += (value & 1);
   1898       value >>= 1;
   1899     }
   1900   }
   1901 
   1902   dst.ClearForWrite(vform);
   1903   for (int i = 0; i < laneCount; ++i) {
   1904     dst.SetUint(vform, i, result[i]);
   1905   }
   1906   return dst;
   1907 }
   1908 
   1909 
   1910 LogicVRegister Simulator::sshl(VectorFormat vform,
   1911                                LogicVRegister dst,
   1912                                const LogicVRegister& src1,
   1913                                const LogicVRegister& src2) {
   1914   dst.ClearForWrite(vform);
   1915   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1916     int8_t shift_val = src2.Int(vform, i);
   1917     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
   1918 
   1919     // Set signed saturation state.
   1920     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
   1921       dst.SetSignedSat(i, lj_src_val >= 0);
   1922     }
   1923 
   1924     // Set unsigned saturation state.
   1925     if (lj_src_val < 0) {
   1926       dst.SetUnsignedSat(i, false);
   1927     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
   1928                (lj_src_val != 0)) {
   1929       dst.SetUnsignedSat(i, true);
   1930     }
   1931 
   1932     int64_t src_val = src1.Int(vform, i);
   1933     bool src_is_negative = src_val < 0;
   1934     if (shift_val > 63) {
   1935       dst.SetInt(vform, i, 0);
   1936     } else if (shift_val < -63) {
   1937       dst.SetRounding(i, src_is_negative);
   1938       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
   1939     } else {
   1940       // Use unsigned types for shifts, as behaviour is undefined for signed
   1941       // lhs.
   1942       uint64_t usrc_val = static_cast<uint64_t>(src_val);
   1943 
   1944       if (shift_val < 0) {
   1945         // Convert to right shift.
   1946         shift_val = -shift_val;
   1947 
   1948         // Set rounding state by testing most-significant bit shifted out.
   1949         // Rounding only needed on right shifts.
   1950         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
   1951           dst.SetRounding(i, true);
   1952         }
   1953 
   1954         usrc_val >>= shift_val;
   1955 
   1956         if (src_is_negative) {
   1957           // Simulate sign-extension.
   1958           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
   1959         }
   1960       } else {
   1961         usrc_val <<= shift_val;
   1962       }
   1963       dst.SetUint(vform, i, usrc_val);
   1964     }
   1965   }
   1966   return dst;
   1967 }
   1968 
   1969 
   1970 LogicVRegister Simulator::ushl(VectorFormat vform,
   1971                                LogicVRegister dst,
   1972                                const LogicVRegister& src1,
   1973                                const LogicVRegister& src2) {
   1974   dst.ClearForWrite(vform);
   1975   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1976     int8_t shift_val = src2.Int(vform, i);
   1977     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
   1978 
   1979     // Set saturation state.
   1980     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
   1981       dst.SetUnsignedSat(i, true);
   1982     }
   1983 
   1984     uint64_t src_val = src1.Uint(vform, i);
   1985     if ((shift_val > 63) || (shift_val < -64)) {
   1986       dst.SetUint(vform, i, 0);
   1987     } else {
   1988       if (shift_val < 0) {
   1989         // Set rounding state. Rounding only needed on right shifts.
   1990         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1991           dst.SetRounding(i, true);
   1992         }
   1993 
   1994         if (shift_val == -64) {
   1995           src_val = 0;
   1996         } else {
   1997           src_val >>= -shift_val;
   1998         }
   1999       } else {
   2000         src_val <<= shift_val;
   2001       }
   2002       dst.SetUint(vform, i, src_val);
   2003     }
   2004   }
   2005   return dst;
   2006 }
   2007 
   2008 
   2009 LogicVRegister Simulator::neg(VectorFormat vform,
   2010                               LogicVRegister dst,
   2011                               const LogicVRegister& src) {
   2012   dst.ClearForWrite(vform);
   2013   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2014     // Test for signed saturation.
   2015     int64_t sa = src.Int(vform, i);
   2016     if (sa == MinIntFromFormat(vform)) {
   2017       dst.SetSignedSat(i, true);
   2018     }
   2019     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   2020   }
   2021   return dst;
   2022 }
   2023 
   2024 
   2025 LogicVRegister Simulator::suqadd(VectorFormat vform,
   2026                                  LogicVRegister dst,
   2027                                  const LogicVRegister& src) {
   2028   dst.ClearForWrite(vform);
   2029   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2030     int64_t sa = dst.IntLeftJustified(vform, i);
   2031     uint64_t ub = src.UintLeftJustified(vform, i);
   2032     uint64_t ur = sa + ub;
   2033 
   2034     int64_t sr;
   2035     memcpy(&sr, &ur, sizeof(sr));
   2036     if (sr < sa) {  // Test for signed positive saturation.
   2037       dst.SetInt(vform, i, MaxIntFromFormat(vform));
   2038     } else {
   2039       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
   2040     }
   2041   }
   2042   return dst;
   2043 }
   2044 
   2045 
   2046 LogicVRegister Simulator::usqadd(VectorFormat vform,
   2047                                  LogicVRegister dst,
   2048                                  const LogicVRegister& src) {
   2049   dst.ClearForWrite(vform);
   2050   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2051     uint64_t ua = dst.UintLeftJustified(vform, i);
   2052     int64_t sb = src.IntLeftJustified(vform, i);
   2053     uint64_t ur = ua + sb;
   2054 
   2055     if ((sb > 0) && (ur <= ua)) {
   2056       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
   2057     } else if ((sb < 0) && (ur >= ua)) {
   2058       dst.SetUint(vform, i, 0);  // Negative saturation.
   2059     } else {
   2060       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
   2061     }
   2062   }
   2063   return dst;
   2064 }
   2065 
   2066 
   2067 LogicVRegister Simulator::abs(VectorFormat vform,
   2068                               LogicVRegister dst,
   2069                               const LogicVRegister& src) {
   2070   dst.ClearForWrite(vform);
   2071   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2072     // Test for signed saturation.
   2073     int64_t sa = src.Int(vform, i);
   2074     if (sa == MinIntFromFormat(vform)) {
   2075       dst.SetSignedSat(i, true);
   2076     }
   2077     if (sa < 0) {
   2078       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   2079     } else {
   2080       dst.SetInt(vform, i, sa);
   2081     }
   2082   }
   2083   return dst;
   2084 }
   2085 
   2086 
   2087 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
   2088                                         LogicVRegister dst,
   2089                                         bool dstIsSigned,
   2090                                         const LogicVRegister& src,
   2091                                         bool srcIsSigned) {
   2092   bool upperhalf = false;
   2093   VectorFormat srcform = kFormatUndefined;
   2094   int64_t ssrc[8];
   2095   uint64_t usrc[8];
   2096 
   2097   switch (dstform) {
   2098     case kFormat8B:
   2099       upperhalf = false;
   2100       srcform = kFormat8H;
   2101       break;
   2102     case kFormat16B:
   2103       upperhalf = true;
   2104       srcform = kFormat8H;
   2105       break;
   2106     case kFormat4H:
   2107       upperhalf = false;
   2108       srcform = kFormat4S;
   2109       break;
   2110     case kFormat8H:
   2111       upperhalf = true;
   2112       srcform = kFormat4S;
   2113       break;
   2114     case kFormat2S:
   2115       upperhalf = false;
   2116       srcform = kFormat2D;
   2117       break;
   2118     case kFormat4S:
   2119       upperhalf = true;
   2120       srcform = kFormat2D;
   2121       break;
   2122     case kFormatB:
   2123       upperhalf = false;
   2124       srcform = kFormatH;
   2125       break;
   2126     case kFormatH:
   2127       upperhalf = false;
   2128       srcform = kFormatS;
   2129       break;
   2130     case kFormatS:
   2131       upperhalf = false;
   2132       srcform = kFormatD;
   2133       break;
   2134     default:
   2135       VIXL_UNIMPLEMENTED();
   2136   }
   2137 
   2138   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2139     ssrc[i] = src.Int(srcform, i);
   2140     usrc[i] = src.Uint(srcform, i);
   2141   }
   2142 
   2143   int offset;
   2144   if (upperhalf) {
   2145     offset = LaneCountFromFormat(dstform) / 2;
   2146   } else {
   2147     offset = 0;
   2148     dst.ClearForWrite(dstform);
   2149   }
   2150 
   2151   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   2152     // Test for signed saturation
   2153     if (ssrc[i] > MaxIntFromFormat(dstform)) {
   2154       dst.SetSignedSat(offset + i, true);
   2155     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
   2156       dst.SetSignedSat(offset + i, false);
   2157     }
   2158 
   2159     // Test for unsigned saturation
   2160     if (srcIsSigned) {
   2161       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
   2162         dst.SetUnsignedSat(offset + i, true);
   2163       } else if (ssrc[i] < 0) {
   2164         dst.SetUnsignedSat(offset + i, false);
   2165       }
   2166     } else {
   2167       if (usrc[i] > MaxUintFromFormat(dstform)) {
   2168         dst.SetUnsignedSat(offset + i, true);
   2169       }
   2170     }
   2171 
   2172     int64_t result;
   2173     if (srcIsSigned) {
   2174       result = ssrc[i] & MaxUintFromFormat(dstform);
   2175     } else {
   2176       result = usrc[i] & MaxUintFromFormat(dstform);
   2177     }
   2178 
   2179     if (dstIsSigned) {
   2180       dst.SetInt(dstform, offset + i, result);
   2181     } else {
   2182       dst.SetUint(dstform, offset + i, result);
   2183     }
   2184   }
   2185   return dst;
   2186 }
   2187 
   2188 
   2189 LogicVRegister Simulator::xtn(VectorFormat vform,
   2190                               LogicVRegister dst,
   2191                               const LogicVRegister& src) {
   2192   return extractnarrow(vform, dst, true, src, true);
   2193 }
   2194 
   2195 
   2196 LogicVRegister Simulator::sqxtn(VectorFormat vform,
   2197                                 LogicVRegister dst,
   2198                                 const LogicVRegister& src) {
   2199   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
   2200 }
   2201 
   2202 
   2203 LogicVRegister Simulator::sqxtun(VectorFormat vform,
   2204                                  LogicVRegister dst,
   2205                                  const LogicVRegister& src) {
   2206   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
   2207 }
   2208 
   2209 
   2210 LogicVRegister Simulator::uqxtn(VectorFormat vform,
   2211                                 LogicVRegister dst,
   2212                                 const LogicVRegister& src) {
   2213   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
   2214 }
   2215 
   2216 
   2217 LogicVRegister Simulator::absdiff(VectorFormat vform,
   2218                                   LogicVRegister dst,
   2219                                   const LogicVRegister& src1,
   2220                                   const LogicVRegister& src2,
   2221                                   bool issigned) {
   2222   dst.ClearForWrite(vform);
   2223   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2224     if (issigned) {
   2225       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
   2226       sr = sr > 0 ? sr : -sr;
   2227       dst.SetInt(vform, i, sr);
   2228     } else {
   2229       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
   2230       sr = sr > 0 ? sr : -sr;
   2231       dst.SetUint(vform, i, sr);
   2232     }
   2233   }
   2234   return dst;
   2235 }
   2236 
   2237 
   2238 LogicVRegister Simulator::saba(VectorFormat vform,
   2239                                LogicVRegister dst,
   2240                                const LogicVRegister& src1,
   2241                                const LogicVRegister& src2) {
   2242   SimVRegister temp;
   2243   dst.ClearForWrite(vform);
   2244   absdiff(vform, temp, src1, src2, true);
   2245   add(vform, dst, dst, temp);
   2246   return dst;
   2247 }
   2248 
   2249 
   2250 LogicVRegister Simulator::uaba(VectorFormat vform,
   2251                                LogicVRegister dst,
   2252                                const LogicVRegister& src1,
   2253                                const LogicVRegister& src2) {
   2254   SimVRegister temp;
   2255   dst.ClearForWrite(vform);
   2256   absdiff(vform, temp, src1, src2, false);
   2257   add(vform, dst, dst, temp);
   2258   return dst;
   2259 }
   2260 
   2261 
   2262 LogicVRegister Simulator::not_(VectorFormat vform,
   2263                                LogicVRegister dst,
   2264                                const LogicVRegister& src) {
   2265   dst.ClearForWrite(vform);
   2266   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2267     dst.SetUint(vform, i, ~src.Uint(vform, i));
   2268   }
   2269   return dst;
   2270 }
   2271 
   2272 
   2273 LogicVRegister Simulator::rbit(VectorFormat vform,
   2274                                LogicVRegister dst,
   2275                                const LogicVRegister& src) {
   2276   uint64_t result[16];
   2277   int laneCount = LaneCountFromFormat(vform);
   2278   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   2279   uint64_t reversed_value;
   2280   uint64_t value;
   2281   for (int i = 0; i < laneCount; i++) {
   2282     value = src.Uint(vform, i);
   2283     reversed_value = 0;
   2284     for (int j = 0; j < laneSizeInBits; j++) {
   2285       reversed_value = (reversed_value << 1) | (value & 1);
   2286       value >>= 1;
   2287     }
   2288     result[i] = reversed_value;
   2289   }
   2290 
   2291   dst.ClearForWrite(vform);
   2292   for (int i = 0; i < laneCount; ++i) {
   2293     dst.SetUint(vform, i, result[i]);
   2294   }
   2295   return dst;
   2296 }
   2297 
   2298 
   2299 LogicVRegister Simulator::rev(VectorFormat vform,
   2300                               LogicVRegister dst,
   2301                               const LogicVRegister& src,
   2302                               int revSize) {
   2303   uint64_t result[16];
   2304   int laneCount = LaneCountFromFormat(vform);
   2305   int laneSize = LaneSizeInBytesFromFormat(vform);
   2306   int lanesPerLoop = revSize / laneSize;
   2307   for (int i = 0; i < laneCount; i += lanesPerLoop) {
   2308     for (int j = 0; j < lanesPerLoop; j++) {
   2309       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
   2310     }
   2311   }
   2312   dst.ClearForWrite(vform);
   2313   for (int i = 0; i < laneCount; ++i) {
   2314     dst.SetUint(vform, i, result[i]);
   2315   }
   2316   return dst;
   2317 }
   2318 
   2319 
   2320 LogicVRegister Simulator::rev16(VectorFormat vform,
   2321                                 LogicVRegister dst,
   2322                                 const LogicVRegister& src) {
   2323   return rev(vform, dst, src, 2);
   2324 }
   2325 
   2326 
   2327 LogicVRegister Simulator::rev32(VectorFormat vform,
   2328                                 LogicVRegister dst,
   2329                                 const LogicVRegister& src) {
   2330   return rev(vform, dst, src, 4);
   2331 }
   2332 
   2333 
   2334 LogicVRegister Simulator::rev64(VectorFormat vform,
   2335                                 LogicVRegister dst,
   2336                                 const LogicVRegister& src) {
   2337   return rev(vform, dst, src, 8);
   2338 }
   2339 
   2340 
   2341 LogicVRegister Simulator::addlp(VectorFormat vform,
   2342                                 LogicVRegister dst,
   2343                                 const LogicVRegister& src,
   2344                                 bool is_signed,
   2345                                 bool do_accumulate) {
   2346   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
   2347   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
   2348   VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
   2349 
   2350   uint64_t result[8];
   2351   int lane_count = LaneCountFromFormat(vform);
   2352   for (int i = 0; i < lane_count; i++) {
   2353     if (is_signed) {
   2354       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
   2355                                         src.Int(vformsrc, 2 * i + 1));
   2356     } else {
   2357       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
   2358     }
   2359   }
   2360 
   2361   dst.ClearForWrite(vform);
   2362   for (int i = 0; i < lane_count; ++i) {
   2363     if (do_accumulate) {
   2364       result[i] += dst.Uint(vform, i);
   2365     }
   2366     dst.SetUint(vform, i, result[i]);
   2367   }
   2368 
   2369   return dst;
   2370 }
   2371 
   2372 
   2373 LogicVRegister Simulator::saddlp(VectorFormat vform,
   2374                                  LogicVRegister dst,
   2375                                  const LogicVRegister& src) {
   2376   return addlp(vform, dst, src, true, false);
   2377 }
   2378 
   2379 
   2380 LogicVRegister Simulator::uaddlp(VectorFormat vform,
   2381                                  LogicVRegister dst,
   2382                                  const LogicVRegister& src) {
   2383   return addlp(vform, dst, src, false, false);
   2384 }
   2385 
   2386 
   2387 LogicVRegister Simulator::sadalp(VectorFormat vform,
   2388                                  LogicVRegister dst,
   2389                                  const LogicVRegister& src) {
   2390   return addlp(vform, dst, src, true, true);
   2391 }
   2392 
   2393 
   2394 LogicVRegister Simulator::uadalp(VectorFormat vform,
   2395                                  LogicVRegister dst,
   2396                                  const LogicVRegister& src) {
   2397   return addlp(vform, dst, src, false, true);
   2398 }
   2399 
   2400 
   2401 LogicVRegister Simulator::ext(VectorFormat vform,
   2402                               LogicVRegister dst,
   2403                               const LogicVRegister& src1,
   2404                               const LogicVRegister& src2,
   2405                               int index) {
   2406   uint8_t result[16];
   2407   int laneCount = LaneCountFromFormat(vform);
   2408   for (int i = 0; i < laneCount - index; ++i) {
   2409     result[i] = src1.Uint(vform, i + index);
   2410   }
   2411   for (int i = 0; i < index; ++i) {
   2412     result[laneCount - index + i] = src2.Uint(vform, i);
   2413   }
   2414   dst.ClearForWrite(vform);
   2415   for (int i = 0; i < laneCount; ++i) {
   2416     dst.SetUint(vform, i, result[i]);
   2417   }
   2418   return dst;
   2419 }
   2420 
   2421 
   2422 LogicVRegister Simulator::dup_element(VectorFormat vform,
   2423                                       LogicVRegister dst,
   2424                                       const LogicVRegister& src,
   2425                                       int src_index) {
   2426   int laneCount = LaneCountFromFormat(vform);
   2427   uint64_t value = src.Uint(vform, src_index);
   2428   dst.ClearForWrite(vform);
   2429   for (int i = 0; i < laneCount; ++i) {
   2430     dst.SetUint(vform, i, value);
   2431   }
   2432   return dst;
   2433 }
   2434 
   2435 
   2436 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
   2437                                         LogicVRegister dst,
   2438                                         uint64_t imm) {
   2439   int laneCount = LaneCountFromFormat(vform);
   2440   uint64_t value = imm & MaxUintFromFormat(vform);
   2441   dst.ClearForWrite(vform);
   2442   for (int i = 0; i < laneCount; ++i) {
   2443     dst.SetUint(vform, i, value);
   2444   }
   2445   return dst;
   2446 }
   2447 
   2448 
   2449 LogicVRegister Simulator::ins_element(VectorFormat vform,
   2450                                       LogicVRegister dst,
   2451                                       int dst_index,
   2452                                       const LogicVRegister& src,
   2453                                       int src_index) {
   2454   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
   2455   return dst;
   2456 }
   2457 
   2458 
   2459 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
   2460                                         LogicVRegister dst,
   2461                                         int dst_index,
   2462                                         uint64_t imm) {
   2463   uint64_t value = imm & MaxUintFromFormat(vform);
   2464   dst.SetUint(vform, dst_index, value);
   2465   return dst;
   2466 }
   2467 
   2468 
   2469 LogicVRegister Simulator::movi(VectorFormat vform,
   2470                                LogicVRegister dst,
   2471                                uint64_t imm) {
   2472   int laneCount = LaneCountFromFormat(vform);
   2473   dst.ClearForWrite(vform);
   2474   for (int i = 0; i < laneCount; ++i) {
   2475     dst.SetUint(vform, i, imm);
   2476   }
   2477   return dst;
   2478 }
   2479 
   2480 
   2481 LogicVRegister Simulator::mvni(VectorFormat vform,
   2482                                LogicVRegister dst,
   2483                                uint64_t imm) {
   2484   int laneCount = LaneCountFromFormat(vform);
   2485   dst.ClearForWrite(vform);
   2486   for (int i = 0; i < laneCount; ++i) {
   2487     dst.SetUint(vform, i, ~imm);
   2488   }
   2489   return dst;
   2490 }
   2491 
   2492 
   2493 LogicVRegister Simulator::orr(VectorFormat vform,
   2494                               LogicVRegister dst,
   2495                               const LogicVRegister& src,
   2496                               uint64_t imm) {
   2497   uint64_t result[16];
   2498   int laneCount = LaneCountFromFormat(vform);
   2499   for (int i = 0; i < laneCount; ++i) {
   2500     result[i] = src.Uint(vform, i) | imm;
   2501   }
   2502   dst.ClearForWrite(vform);
   2503   for (int i = 0; i < laneCount; ++i) {
   2504     dst.SetUint(vform, i, result[i]);
   2505   }
   2506   return dst;
   2507 }
   2508 
   2509 
   2510 LogicVRegister Simulator::uxtl(VectorFormat vform,
   2511                                LogicVRegister dst,
   2512                                const LogicVRegister& src) {
   2513   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2514 
   2515   dst.ClearForWrite(vform);
   2516   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2517     dst.SetUint(vform, i, src.Uint(vform_half, i));
   2518   }
   2519   return dst;
   2520 }
   2521 
   2522 
   2523 LogicVRegister Simulator::sxtl(VectorFormat vform,
   2524                                LogicVRegister dst,
   2525                                const LogicVRegister& src) {
   2526   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2527 
   2528   dst.ClearForWrite(vform);
   2529   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2530     dst.SetInt(vform, i, src.Int(vform_half, i));
   2531   }
   2532   return dst;
   2533 }
   2534 
   2535 
   2536 LogicVRegister Simulator::uxtl2(VectorFormat vform,
   2537                                 LogicVRegister dst,
   2538                                 const LogicVRegister& src) {
   2539   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2540   int lane_count = LaneCountFromFormat(vform);
   2541 
   2542   dst.ClearForWrite(vform);
   2543   for (int i = 0; i < lane_count; i++) {
   2544     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
   2545   }
   2546   return dst;
   2547 }
   2548 
   2549 
   2550 LogicVRegister Simulator::sxtl2(VectorFormat vform,
   2551                                 LogicVRegister dst,
   2552                                 const LogicVRegister& src) {
   2553   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2554   int lane_count = LaneCountFromFormat(vform);
   2555 
   2556   dst.ClearForWrite(vform);
   2557   for (int i = 0; i < lane_count; i++) {
   2558     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
   2559   }
   2560   return dst;
   2561 }
   2562 
   2563 
   2564 LogicVRegister Simulator::shrn(VectorFormat vform,
   2565                                LogicVRegister dst,
   2566                                const LogicVRegister& src,
   2567                                int shift) {
   2568   SimVRegister temp;
   2569   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
   2570   VectorFormat vform_dst = vform;
   2571   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
   2572   return extractnarrow(vform_dst, dst, false, shifted_src, false);
   2573 }
   2574 
   2575 
   2576 LogicVRegister Simulator::shrn2(VectorFormat vform,
   2577                                 LogicVRegister dst,
   2578                                 const LogicVRegister& src,
   2579                                 int shift) {
   2580   SimVRegister temp;
   2581   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2582   VectorFormat vformdst = vform;
   2583   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
   2584   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2585 }
   2586 
   2587 
   2588 LogicVRegister Simulator::rshrn(VectorFormat vform,
   2589                                 LogicVRegister dst,
   2590                                 const LogicVRegister& src,
   2591                                 int shift) {
   2592   SimVRegister temp;
   2593   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2594   VectorFormat vformdst = vform;
   2595   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2596   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2597 }
   2598 
   2599 
   2600 LogicVRegister Simulator::rshrn2(VectorFormat vform,
   2601                                  LogicVRegister dst,
   2602                                  const LogicVRegister& src,
   2603                                  int shift) {
   2604   SimVRegister temp;
   2605   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2606   VectorFormat vformdst = vform;
   2607   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2608   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2609 }
   2610 
   2611 
   2612 LogicVRegister Simulator::Table(VectorFormat vform,
   2613                                 LogicVRegister dst,
   2614                                 const LogicVRegister& ind,
   2615                                 bool zero_out_of_bounds,
   2616                                 const LogicVRegister* tab1,
   2617                                 const LogicVRegister* tab2,
   2618                                 const LogicVRegister* tab3,
   2619                                 const LogicVRegister* tab4) {
   2620   VIXL_ASSERT(tab1 != NULL);
   2621   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
   2622   uint64_t result[kMaxLanesPerVector];
   2623   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2624     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
   2625   }
   2626   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2627     uint64_t j = ind.Uint(vform, i);
   2628     int tab_idx = static_cast<int>(j >> 4);
   2629     int j_idx = static_cast<int>(j & 15);
   2630     if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
   2631       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
   2632     }
   2633   }
   2634   dst.SetUintArray(vform, result);
   2635   return dst;
   2636 }
   2637 
   2638 
   2639 LogicVRegister Simulator::tbl(VectorFormat vform,
   2640                               LogicVRegister dst,
   2641                               const LogicVRegister& tab,
   2642                               const LogicVRegister& ind) {
   2643   return Table(vform, dst, ind, true, &tab);
   2644 }
   2645 
   2646 
   2647 LogicVRegister Simulator::tbl(VectorFormat vform,
   2648                               LogicVRegister dst,
   2649                               const LogicVRegister& tab,
   2650                               const LogicVRegister& tab2,
   2651                               const LogicVRegister& ind) {
   2652   return Table(vform, dst, ind, true, &tab, &tab2);
   2653 }
   2654 
   2655 
   2656 LogicVRegister Simulator::tbl(VectorFormat vform,
   2657                               LogicVRegister dst,
   2658                               const LogicVRegister& tab,
   2659                               const LogicVRegister& tab2,
   2660                               const LogicVRegister& tab3,
   2661                               const LogicVRegister& ind) {
   2662   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
   2663 }
   2664 
   2665 
   2666 LogicVRegister Simulator::tbl(VectorFormat vform,
   2667                               LogicVRegister dst,
   2668                               const LogicVRegister& tab,
   2669                               const LogicVRegister& tab2,
   2670                               const LogicVRegister& tab3,
   2671                               const LogicVRegister& tab4,
   2672                               const LogicVRegister& ind) {
   2673   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
   2674 }
   2675 
   2676 
   2677 LogicVRegister Simulator::tbx(VectorFormat vform,
   2678                               LogicVRegister dst,
   2679                               const LogicVRegister& tab,
   2680                               const LogicVRegister& ind) {
   2681   return Table(vform, dst, ind, false, &tab);
   2682 }
   2683 
   2684 
   2685 LogicVRegister Simulator::tbx(VectorFormat vform,
   2686                               LogicVRegister dst,
   2687                               const LogicVRegister& tab,
   2688                               const LogicVRegister& tab2,
   2689                               const LogicVRegister& ind) {
   2690   return Table(vform, dst, ind, false, &tab, &tab2);
   2691 }
   2692 
   2693 
   2694 LogicVRegister Simulator::tbx(VectorFormat vform,
   2695                               LogicVRegister dst,
   2696                               const LogicVRegister& tab,
   2697                               const LogicVRegister& tab2,
   2698                               const LogicVRegister& tab3,
   2699                               const LogicVRegister& ind) {
   2700   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
   2701 }
   2702 
   2703 
   2704 LogicVRegister Simulator::tbx(VectorFormat vform,
   2705                               LogicVRegister dst,
   2706                               const LogicVRegister& tab,
   2707                               const LogicVRegister& tab2,
   2708                               const LogicVRegister& tab3,
   2709                               const LogicVRegister& tab4,
   2710                               const LogicVRegister& ind) {
   2711   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
   2712 }
   2713 
   2714 
   2715 LogicVRegister Simulator::uqshrn(VectorFormat vform,
   2716                                  LogicVRegister dst,
   2717                                  const LogicVRegister& src,
   2718                                  int shift) {
   2719   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2720 }
   2721 
   2722 
   2723 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
   2724                                   LogicVRegister dst,
   2725                                   const LogicVRegister& src,
   2726                                   int shift) {
   2727   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2728 }
   2729 
   2730 
   2731 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
   2732                                   LogicVRegister dst,
   2733                                   const LogicVRegister& src,
   2734                                   int shift) {
   2735   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2736 }
   2737 
   2738 
   2739 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
   2740                                    LogicVRegister dst,
   2741                                    const LogicVRegister& src,
   2742                                    int shift) {
   2743   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2744 }
   2745 
   2746 
   2747 LogicVRegister Simulator::sqshrn(VectorFormat vform,
   2748                                  LogicVRegister dst,
   2749                                  const LogicVRegister& src,
   2750                                  int shift) {
   2751   SimVRegister temp;
   2752   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2753   VectorFormat vformdst = vform;
   2754   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2755   return sqxtn(vformdst, dst, shifted_src);
   2756 }
   2757 
   2758 
   2759 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
   2760                                   LogicVRegister dst,
   2761                                   const LogicVRegister& src,
   2762                                   int shift) {
   2763   SimVRegister temp;
   2764   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2765   VectorFormat vformdst = vform;
   2766   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2767   return sqxtn(vformdst, dst, shifted_src);
   2768 }
   2769 
   2770 
   2771 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
   2772                                   LogicVRegister dst,
   2773                                   const LogicVRegister& src,
   2774                                   int shift) {
   2775   SimVRegister temp;
   2776   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2777   VectorFormat vformdst = vform;
   2778   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2779   return sqxtn(vformdst, dst, shifted_src);
   2780 }
   2781 
   2782 
   2783 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
   2784                                    LogicVRegister dst,
   2785                                    const LogicVRegister& src,
   2786                                    int shift) {
   2787   SimVRegister temp;
   2788   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2789   VectorFormat vformdst = vform;
   2790   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2791   return sqxtn(vformdst, dst, shifted_src);
   2792 }
   2793 
   2794 
   2795 LogicVRegister Simulator::sqshrun(VectorFormat vform,
   2796                                   LogicVRegister dst,
   2797                                   const LogicVRegister& src,
   2798                                   int shift) {
   2799   SimVRegister temp;
   2800   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2801   VectorFormat vformdst = vform;
   2802   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2803   return sqxtun(vformdst, dst, shifted_src);
   2804 }
   2805 
   2806 
   2807 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
   2808                                    LogicVRegister dst,
   2809                                    const LogicVRegister& src,
   2810                                    int shift) {
   2811   SimVRegister temp;
   2812   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2813   VectorFormat vformdst = vform;
   2814   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2815   return sqxtun(vformdst, dst, shifted_src);
   2816 }
   2817 
   2818 
   2819 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
   2820                                    LogicVRegister dst,
   2821                                    const LogicVRegister& src,
   2822                                    int shift) {
   2823   SimVRegister temp;
   2824   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2825   VectorFormat vformdst = vform;
   2826   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2827   return sqxtun(vformdst, dst, shifted_src);
   2828 }
   2829 
   2830 
   2831 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
   2832                                     LogicVRegister dst,
   2833                                     const LogicVRegister& src,
   2834                                     int shift) {
   2835   SimVRegister temp;
   2836   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2837   VectorFormat vformdst = vform;
   2838   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2839   return sqxtun(vformdst, dst, shifted_src);
   2840 }
   2841 
   2842 
   2843 LogicVRegister Simulator::uaddl(VectorFormat vform,
   2844                                 LogicVRegister dst,
   2845                                 const LogicVRegister& src1,
   2846                                 const LogicVRegister& src2) {
   2847   SimVRegister temp1, temp2;
   2848   uxtl(vform, temp1, src1);
   2849   uxtl(vform, temp2, src2);
   2850   add(vform, dst, temp1, temp2);
   2851   return dst;
   2852 }
   2853 
   2854 
   2855 LogicVRegister Simulator::uaddl2(VectorFormat vform,
   2856                                  LogicVRegister dst,
   2857                                  const LogicVRegister& src1,
   2858                                  const LogicVRegister& src2) {
   2859   SimVRegister temp1, temp2;
   2860   uxtl2(vform, temp1, src1);
   2861   uxtl2(vform, temp2, src2);
   2862   add(vform, dst, temp1, temp2);
   2863   return dst;
   2864 }
   2865 
   2866 
   2867 LogicVRegister Simulator::uaddw(VectorFormat vform,
   2868                                 LogicVRegister dst,
   2869                                 const LogicVRegister& src1,
   2870                                 const LogicVRegister& src2) {
   2871   SimVRegister temp;
   2872   uxtl(vform, temp, src2);
   2873   add(vform, dst, src1, temp);
   2874   return dst;
   2875 }
   2876 
   2877 
   2878 LogicVRegister Simulator::uaddw2(VectorFormat vform,
   2879                                  LogicVRegister dst,
   2880                                  const LogicVRegister& src1,
   2881                                  const LogicVRegister& src2) {
   2882   SimVRegister temp;
   2883   uxtl2(vform, temp, src2);
   2884   add(vform, dst, src1, temp);
   2885   return dst;
   2886 }
   2887 
   2888 
   2889 LogicVRegister Simulator::saddl(VectorFormat vform,
   2890                                 LogicVRegister dst,
   2891                                 const LogicVRegister& src1,
   2892                                 const LogicVRegister& src2) {
   2893   SimVRegister temp1, temp2;
   2894   sxtl(vform, temp1, src1);
   2895   sxtl(vform, temp2, src2);
   2896   add(vform, dst, temp1, temp2);
   2897   return dst;
   2898 }
   2899 
   2900 
   2901 LogicVRegister Simulator::saddl2(VectorFormat vform,
   2902                                  LogicVRegister dst,
   2903                                  const LogicVRegister& src1,
   2904                                  const LogicVRegister& src2) {
   2905   SimVRegister temp1, temp2;
   2906   sxtl2(vform, temp1, src1);
   2907   sxtl2(vform, temp2, src2);
   2908   add(vform, dst, temp1, temp2);
   2909   return dst;
   2910 }
   2911 
   2912 
   2913 LogicVRegister Simulator::saddw(VectorFormat vform,
   2914                                 LogicVRegister dst,
   2915                                 const LogicVRegister& src1,
   2916                                 const LogicVRegister& src2) {
   2917   SimVRegister temp;
   2918   sxtl(vform, temp, src2);
   2919   add(vform, dst, src1, temp);
   2920   return dst;
   2921 }
   2922 
   2923 
   2924 LogicVRegister Simulator::saddw2(VectorFormat vform,
   2925                                  LogicVRegister dst,
   2926                                  const LogicVRegister& src1,
   2927                                  const LogicVRegister& src2) {
   2928   SimVRegister temp;
   2929   sxtl2(vform, temp, src2);
   2930   add(vform, dst, src1, temp);
   2931   return dst;
   2932 }
   2933 
   2934 
   2935 LogicVRegister Simulator::usubl(VectorFormat vform,
   2936                                 LogicVRegister dst,
   2937                                 const LogicVRegister& src1,
   2938                                 const LogicVRegister& src2) {
   2939   SimVRegister temp1, temp2;
   2940   uxtl(vform, temp1, src1);
   2941   uxtl(vform, temp2, src2);
   2942   sub(vform, dst, temp1, temp2);
   2943   return dst;
   2944 }
   2945 
   2946 
   2947 LogicVRegister Simulator::usubl2(VectorFormat vform,
   2948                                  LogicVRegister dst,
   2949                                  const LogicVRegister& src1,
   2950                                  const LogicVRegister& src2) {
   2951   SimVRegister temp1, temp2;
   2952   uxtl2(vform, temp1, src1);
   2953   uxtl2(vform, temp2, src2);
   2954   sub(vform, dst, temp1, temp2);
   2955   return dst;
   2956 }
   2957 
   2958 
   2959 LogicVRegister Simulator::usubw(VectorFormat vform,
   2960                                 LogicVRegister dst,
   2961                                 const LogicVRegister& src1,
   2962                                 const LogicVRegister& src2) {
   2963   SimVRegister temp;
   2964   uxtl(vform, temp, src2);
   2965   sub(vform, dst, src1, temp);
   2966   return dst;
   2967 }
   2968 
   2969 
   2970 LogicVRegister Simulator::usubw2(VectorFormat vform,
   2971                                  LogicVRegister dst,
   2972                                  const LogicVRegister& src1,
   2973                                  const LogicVRegister& src2) {
   2974   SimVRegister temp;
   2975   uxtl2(vform, temp, src2);
   2976   sub(vform, dst, src1, temp);
   2977   return dst;
   2978 }
   2979 
   2980 
   2981 LogicVRegister Simulator::ssubl(VectorFormat vform,
   2982                                 LogicVRegister dst,
   2983                                 const LogicVRegister& src1,
   2984                                 const LogicVRegister& src2) {
   2985   SimVRegister temp1, temp2;
   2986   sxtl(vform, temp1, src1);
   2987   sxtl(vform, temp2, src2);
   2988   sub(vform, dst, temp1, temp2);
   2989   return dst;
   2990 }
   2991 
   2992 
   2993 LogicVRegister Simulator::ssubl2(VectorFormat vform,
   2994                                  LogicVRegister dst,
   2995                                  const LogicVRegister& src1,
   2996                                  const LogicVRegister& src2) {
   2997   SimVRegister temp1, temp2;
   2998   sxtl2(vform, temp1, src1);
   2999   sxtl2(vform, temp2, src2);
   3000   sub(vform, dst, temp1, temp2);
   3001   return dst;
   3002 }
   3003 
   3004 
   3005 LogicVRegister Simulator::ssubw(VectorFormat vform,
   3006                                 LogicVRegister dst,
   3007                                 const LogicVRegister& src1,
   3008                                 const LogicVRegister& src2) {
   3009   SimVRegister temp;
   3010   sxtl(vform, temp, src2);
   3011   sub(vform, dst, src1, temp);
   3012   return dst;
   3013 }
   3014 
   3015 
   3016 LogicVRegister Simulator::ssubw2(VectorFormat vform,
   3017                                  LogicVRegister dst,
   3018                                  const LogicVRegister& src1,
   3019                                  const LogicVRegister& src2) {
   3020   SimVRegister temp;
   3021   sxtl2(vform, temp, src2);
   3022   sub(vform, dst, src1, temp);
   3023   return dst;
   3024 }
   3025 
   3026 
   3027 LogicVRegister Simulator::uabal(VectorFormat vform,
   3028                                 LogicVRegister dst,
   3029                                 const LogicVRegister& src1,
   3030                                 const LogicVRegister& src2) {
   3031   SimVRegister temp1, temp2;
   3032   uxtl(vform, temp1, src1);
   3033   uxtl(vform, temp2, src2);
   3034   uaba(vform, dst, temp1, temp2);
   3035   return dst;
   3036 }
   3037 
   3038 
   3039 LogicVRegister Simulator::uabal2(VectorFormat vform,
   3040                                  LogicVRegister dst,
   3041                                  const LogicVRegister& src1,
   3042                                  const LogicVRegister& src2) {
   3043   SimVRegister temp1, temp2;
   3044   uxtl2(vform, temp1, src1);
   3045   uxtl2(vform, temp2, src2);
   3046   uaba(vform, dst, temp1, temp2);
   3047   return dst;
   3048 }
   3049 
   3050 
   3051 LogicVRegister Simulator::sabal(VectorFormat vform,
   3052                                 LogicVRegister dst,
   3053                                 const LogicVRegister& src1,
   3054                                 const LogicVRegister& src2) {
   3055   SimVRegister temp1, temp2;
   3056   sxtl(vform, temp1, src1);
   3057   sxtl(vform, temp2, src2);
   3058   saba(vform, dst, temp1, temp2);
   3059   return dst;
   3060 }
   3061 
   3062 
   3063 LogicVRegister Simulator::sabal2(VectorFormat vform,
   3064                                  LogicVRegister dst,
   3065                                  const LogicVRegister& src1,
   3066                                  const LogicVRegister& src2) {
   3067   SimVRegister temp1, temp2;
   3068   sxtl2(vform, temp1, src1);
   3069   sxtl2(vform, temp2, src2);
   3070   saba(vform, dst, temp1, temp2);
   3071   return dst;
   3072 }
   3073 
   3074 
   3075 LogicVRegister Simulator::uabdl(VectorFormat vform,
   3076                                 LogicVRegister dst,
   3077                                 const LogicVRegister& src1,
   3078                                 const LogicVRegister& src2) {
   3079   SimVRegister temp1, temp2;
   3080   uxtl(vform, temp1, src1);
   3081   uxtl(vform, temp2, src2);
   3082   absdiff(vform, dst, temp1, temp2, false);
   3083   return dst;
   3084 }
   3085 
   3086 
   3087 LogicVRegister Simulator::uabdl2(VectorFormat vform,
   3088                                  LogicVRegister dst,
   3089                                  const LogicVRegister& src1,
   3090                                  const LogicVRegister& src2) {
   3091   SimVRegister temp1, temp2;
   3092   uxtl2(vform, temp1, src1);
   3093   uxtl2(vform, temp2, src2);
   3094   absdiff(vform, dst, temp1, temp2, false);
   3095   return dst;
   3096 }
   3097 
   3098 
   3099 LogicVRegister Simulator::sabdl(VectorFormat vform,
   3100                                 LogicVRegister dst,
   3101                                 const LogicVRegister& src1,
   3102                                 const LogicVRegister& src2) {
   3103   SimVRegister temp1, temp2;
   3104   sxtl(vform, temp1, src1);
   3105   sxtl(vform, temp2, src2);
   3106   absdiff(vform, dst, temp1, temp2, true);
   3107   return dst;
   3108 }
   3109 
   3110 
   3111 LogicVRegister Simulator::sabdl2(VectorFormat vform,
   3112                                  LogicVRegister dst,
   3113                                  const LogicVRegister& src1,
   3114                                  const LogicVRegister& src2) {
   3115   SimVRegister temp1, temp2;
   3116   sxtl2(vform, temp1, src1);
   3117   sxtl2(vform, temp2, src2);
   3118   absdiff(vform, dst, temp1, temp2, true);
   3119   return dst;
   3120 }
   3121 
   3122 
   3123 LogicVRegister Simulator::umull(VectorFormat vform,
   3124                                 LogicVRegister dst,
   3125                                 const LogicVRegister& src1,
   3126                                 const LogicVRegister& src2) {
   3127   SimVRegister temp1, temp2;
   3128   uxtl(vform, temp1, src1);
   3129   uxtl(vform, temp2, src2);
   3130   mul(vform, dst, temp1, temp2);
   3131   return dst;
   3132 }
   3133 
   3134 
   3135 LogicVRegister Simulator::umull2(VectorFormat vform,
   3136                                  LogicVRegister dst,
   3137                                  const LogicVRegister& src1,
   3138                                  const LogicVRegister& src2) {
   3139   SimVRegister temp1, temp2;
   3140   uxtl2(vform, temp1, src1);
   3141   uxtl2(vform, temp2, src2);
   3142   mul(vform, dst, temp1, temp2);
   3143   return dst;
   3144 }
   3145 
   3146 
   3147 LogicVRegister Simulator::smull(VectorFormat vform,
   3148                                 LogicVRegister dst,
   3149                                 const LogicVRegister& src1,
   3150                                 const LogicVRegister& src2) {
   3151   SimVRegister temp1, temp2;
   3152   sxtl(vform, temp1, src1);
   3153   sxtl(vform, temp2, src2);
   3154   mul(vform, dst, temp1, temp2);
   3155   return dst;
   3156 }
   3157 
   3158 
   3159 LogicVRegister Simulator::smull2(VectorFormat vform,
   3160                                  LogicVRegister dst,
   3161                                  const LogicVRegister& src1,
   3162                                  const LogicVRegister& src2) {
   3163   SimVRegister temp1, temp2;
   3164   sxtl2(vform, temp1, src1);
   3165   sxtl2(vform, temp2, src2);
   3166   mul(vform, dst, temp1, temp2);
   3167   return dst;
   3168 }
   3169 
   3170 
   3171 LogicVRegister Simulator::umlsl(VectorFormat vform,
   3172                                 LogicVRegister dst,
   3173                                 const LogicVRegister& src1,
   3174                                 const LogicVRegister& src2) {
   3175   SimVRegister temp1, temp2;
   3176   uxtl(vform, temp1, src1);
   3177   uxtl(vform, temp2, src2);
   3178   mls(vform, dst, temp1, temp2);
   3179   return dst;
   3180 }
   3181 
   3182 
   3183 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   3184                                  LogicVRegister dst,
   3185                                  const LogicVRegister& src1,
   3186                                  const LogicVRegister& src2) {
   3187   SimVRegister temp1, temp2;
   3188   uxtl2(vform, temp1, src1);
   3189   uxtl2(vform, temp2, src2);
   3190   mls(vform, dst, temp1, temp2);
   3191   return dst;
   3192 }
   3193 
   3194 
   3195 LogicVRegister Simulator::smlsl(VectorFormat vform,
   3196                                 LogicVRegister dst,
   3197                                 const LogicVRegister& src1,
   3198                                 const LogicVRegister& src2) {
   3199   SimVRegister temp1, temp2;
   3200   sxtl(vform, temp1, src1);
   3201   sxtl(vform, temp2, src2);
   3202   mls(vform, dst, temp1, temp2);
   3203   return dst;
   3204 }
   3205 
   3206 
   3207 LogicVRegister Simulator::smlsl2(VectorFormat vform,
   3208                                  LogicVRegister dst,
   3209                                  const LogicVRegister& src1,
   3210                                  const LogicVRegister& src2) {
   3211   SimVRegister temp1, temp2;
   3212   sxtl2(vform, temp1, src1);
   3213   sxtl2(vform, temp2, src2);
   3214   mls(vform, dst, temp1, temp2);
   3215   return dst;
   3216 }
   3217 
   3218 
   3219 LogicVRegister Simulator::umlal(VectorFormat vform,
   3220                                 LogicVRegister dst,
   3221                                 const LogicVRegister& src1,
   3222                                 const LogicVRegister& src2) {
   3223   SimVRegister temp1, temp2;
   3224   uxtl(vform, temp1, src1);
   3225   uxtl(vform, temp2, src2);
   3226   mla(vform, dst, temp1, temp2);
   3227   return dst;
   3228 }
   3229 
   3230 
   3231 LogicVRegister Simulator::umlal2(VectorFormat vform,
   3232                                  LogicVRegister dst,
   3233                                  const LogicVRegister& src1,
   3234                                  const LogicVRegister& src2) {
   3235   SimVRegister temp1, temp2;
   3236   uxtl2(vform, temp1, src1);
   3237   uxtl2(vform, temp2, src2);
   3238   mla(vform, dst, temp1, temp2);
   3239   return dst;
   3240 }
   3241 
   3242 
   3243 LogicVRegister Simulator::smlal(VectorFormat vform,
   3244                                 LogicVRegister dst,
   3245                                 const LogicVRegister& src1,
   3246                                 const LogicVRegister& src2) {
   3247   SimVRegister temp1, temp2;
   3248   sxtl(vform, temp1, src1);
   3249   sxtl(vform, temp2, src2);
   3250   mla(vform, dst, temp1, temp2);
   3251   return dst;
   3252 }
   3253 
   3254 
   3255 LogicVRegister Simulator::smlal2(VectorFormat vform,
   3256                                  LogicVRegister dst,
   3257                                  const LogicVRegister& src1,
   3258                                  const LogicVRegister& src2) {
   3259   SimVRegister temp1, temp2;
   3260   sxtl2(vform, temp1, src1);
   3261   sxtl2(vform, temp2, src2);
   3262   mla(vform, dst, temp1, temp2);
   3263   return dst;
   3264 }
   3265 
   3266 
   3267 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   3268                                   LogicVRegister dst,
   3269                                   const LogicVRegister& src1,
   3270                                   const LogicVRegister& src2) {
   3271   SimVRegister temp;
   3272   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3273   return add(vform, dst, dst, product).SignedSaturate(vform);
   3274 }
   3275 
   3276 
   3277 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   3278                                    LogicVRegister dst,
   3279                                    const LogicVRegister& src1,
   3280                                    const LogicVRegister& src2) {
   3281   SimVRegister temp;
   3282   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3283   return add(vform, dst, dst, product).SignedSaturate(vform);
   3284 }
   3285 
   3286 
   3287 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   3288                                   LogicVRegister dst,
   3289                                   const LogicVRegister& src1,
   3290                                   const LogicVRegister& src2) {
   3291   SimVRegister temp;
   3292   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3293   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3294 }
   3295 
   3296 
   3297 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   3298                                    LogicVRegister dst,
   3299                                    const LogicVRegister& src1,
   3300                                    const LogicVRegister& src2) {
   3301   SimVRegister temp;
   3302   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3303   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3304 }
   3305 
   3306 
   3307 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   3308                                   LogicVRegister dst,
   3309                                   const LogicVRegister& src1,
   3310                                   const LogicVRegister& src2) {
   3311   SimVRegister temp;
   3312   LogicVRegister product = smull(vform, temp, src1, src2);
   3313   return add(vform, dst, product, product).SignedSaturate(vform);
   3314 }
   3315 
   3316 
   3317 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   3318                                    LogicVRegister dst,
   3319                                    const LogicVRegister& src1,
   3320                                    const LogicVRegister& src2) {
   3321   SimVRegister temp;
   3322   LogicVRegister product = smull2(vform, temp, src1, src2);
   3323   return add(vform, dst, product, product).SignedSaturate(vform);
   3324 }
   3325 
   3326 
   3327 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   3328                                    LogicVRegister dst,
   3329                                    const LogicVRegister& src1,
   3330                                    const LogicVRegister& src2,
   3331                                    bool round) {
   3332   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   3333   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   3334   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   3335 
   3336   int esize = LaneSizeInBitsFromFormat(vform);
   3337   int round_const = round ? (1 << (esize - 2)) : 0;
   3338   int64_t product;
   3339 
   3340   dst.ClearForWrite(vform);
   3341   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3342     product = src1.Int(vform, i) * src2.Int(vform, i);
   3343     product += round_const;
   3344     product = product >> (esize - 1);
   3345 
   3346     if (product > MaxIntFromFormat(vform)) {
   3347       product = MaxIntFromFormat(vform);
   3348     } else if (product < MinIntFromFormat(vform)) {
   3349       product = MinIntFromFormat(vform);
   3350     }
   3351     dst.SetInt(vform, i, product);
   3352   }
   3353   return dst;
   3354 }
   3355 
   3356 
   3357 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   3358                                   LogicVRegister dst,
   3359                                   const LogicVRegister& src1,
   3360                                   const LogicVRegister& src2) {
   3361   return sqrdmulh(vform, dst, src1, src2, false);
   3362 }
   3363 
   3364 
   3365 LogicVRegister Simulator::addhn(VectorFormat vform,
   3366                                 LogicVRegister dst,
   3367                                 const LogicVRegister& src1,
   3368                                 const LogicVRegister& src2) {
   3369   SimVRegister temp;
   3370   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3371   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3372   return dst;
   3373 }
   3374 
   3375 
   3376 LogicVRegister Simulator::addhn2(VectorFormat vform,
   3377                                  LogicVRegister dst,
   3378                                  const LogicVRegister& src1,
   3379                                  const LogicVRegister& src2) {
   3380   SimVRegister temp;
   3381   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3382   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3383   return dst;
   3384 }
   3385 
   3386 
   3387 LogicVRegister Simulator::raddhn(VectorFormat vform,
   3388                                  LogicVRegister dst,
   3389                                  const LogicVRegister& src1,
   3390                                  const LogicVRegister& src2) {
   3391   SimVRegister temp;
   3392   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3393   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3394   return dst;
   3395 }
   3396 
   3397 
   3398 LogicVRegister Simulator::raddhn2(VectorFormat vform,
   3399                                   LogicVRegister dst,
   3400                                   const LogicVRegister& src1,
   3401                                   const LogicVRegister& src2) {
   3402   SimVRegister temp;
   3403   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3404   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3405   return dst;
   3406 }
   3407 
   3408 
   3409 LogicVRegister Simulator::subhn(VectorFormat vform,
   3410                                 LogicVRegister dst,
   3411                                 const LogicVRegister& src1,
   3412                                 const LogicVRegister& src2) {
   3413   SimVRegister temp;
   3414   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3415   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3416   return dst;
   3417 }
   3418 
   3419 
   3420 LogicVRegister Simulator::subhn2(VectorFormat vform,
   3421                                  LogicVRegister dst,
   3422                                  const LogicVRegister& src1,
   3423                                  const LogicVRegister& src2) {
   3424   SimVRegister temp;
   3425   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3426   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3427   return dst;
   3428 }
   3429 
   3430 
   3431 LogicVRegister Simulator::rsubhn(VectorFormat vform,
   3432                                  LogicVRegister dst,
   3433                                  const LogicVRegister& src1,
   3434                                  const LogicVRegister& src2) {
   3435   SimVRegister temp;
   3436   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3437   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3438   return dst;
   3439 }
   3440 
   3441 
   3442 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
   3443                                   LogicVRegister dst,
   3444                                   const LogicVRegister& src1,
   3445                                   const LogicVRegister& src2) {
   3446   SimVRegister temp;
   3447   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3448   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3449   return dst;
   3450 }
   3451 
   3452 
   3453 LogicVRegister Simulator::trn1(VectorFormat vform,
   3454                                LogicVRegister dst,
   3455                                const LogicVRegister& src1,
   3456                                const LogicVRegister& src2) {
   3457   uint64_t result[16];
   3458   int laneCount = LaneCountFromFormat(vform);
   3459   int pairs = laneCount / 2;
   3460   for (int i = 0; i < pairs; ++i) {
   3461     result[2 * i] = src1.Uint(vform, 2 * i);
   3462     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   3463   }
   3464 
   3465   dst.ClearForWrite(vform);
   3466   for (int i = 0; i < laneCount; ++i) {
   3467     dst.SetUint(vform, i, result[i]);
   3468   }
   3469   return dst;
   3470 }
   3471 
   3472 
   3473 LogicVRegister Simulator::trn2(VectorFormat vform,
   3474                                LogicVRegister dst,
   3475                                const LogicVRegister& src1,
   3476                                const LogicVRegister& src2) {
   3477   uint64_t result[16];
   3478   int laneCount = LaneCountFromFormat(vform);
   3479   int pairs = laneCount / 2;
   3480   for (int i = 0; i < pairs; ++i) {
   3481     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
   3482     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   3483   }
   3484 
   3485   dst.ClearForWrite(vform);
   3486   for (int i = 0; i < laneCount; ++i) {
   3487     dst.SetUint(vform, i, result[i]);
   3488   }
   3489   return dst;
   3490 }
   3491 
   3492 
   3493 LogicVRegister Simulator::zip1(VectorFormat vform,
   3494                                LogicVRegister dst,
   3495                                const LogicVRegister& src1,
   3496                                const LogicVRegister& src2) {
   3497   uint64_t result[16];
   3498   int laneCount = LaneCountFromFormat(vform);
   3499   int pairs = laneCount / 2;
   3500   for (int i = 0; i < pairs; ++i) {
   3501     result[2 * i] = src1.Uint(vform, i);
   3502     result[(2 * i) + 1] = src2.Uint(vform, i);
   3503   }
   3504 
   3505   dst.ClearForWrite(vform);
   3506   for (int i = 0; i < laneCount; ++i) {
   3507     dst.SetUint(vform, i, result[i]);
   3508   }
   3509   return dst;
   3510 }
   3511 
   3512 
   3513 LogicVRegister Simulator::zip2(VectorFormat vform,
   3514                                LogicVRegister dst,
   3515                                const LogicVRegister& src1,
   3516                                const LogicVRegister& src2) {
   3517   uint64_t result[16];
   3518   int laneCount = LaneCountFromFormat(vform);
   3519   int pairs = laneCount / 2;
   3520   for (int i = 0; i < pairs; ++i) {
   3521     result[2 * i] = src1.Uint(vform, pairs + i);
   3522     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   3523   }
   3524 
   3525   dst.ClearForWrite(vform);
   3526   for (int i = 0; i < laneCount; ++i) {
   3527     dst.SetUint(vform, i, result[i]);
   3528   }
   3529   return dst;
   3530 }
   3531 
   3532 
   3533 LogicVRegister Simulator::uzp1(VectorFormat vform,
   3534                                LogicVRegister dst,
   3535                                const LogicVRegister& src1,
   3536                                const LogicVRegister& src2) {
   3537   uint64_t result[32];
   3538   int laneCount = LaneCountFromFormat(vform);
   3539   for (int i = 0; i < laneCount; ++i) {
   3540     result[i] = src1.Uint(vform, i);
   3541     result[laneCount + i] = src2.Uint(vform, i);
   3542   }
   3543 
   3544   dst.ClearForWrite(vform);
   3545   for (int i = 0; i < laneCount; ++i) {
   3546     dst.SetUint(vform, i, result[2 * i]);
   3547   }
   3548   return dst;
   3549 }
   3550 
   3551 
   3552 LogicVRegister Simulator::uzp2(VectorFormat vform,
   3553                                LogicVRegister dst,
   3554                                const LogicVRegister& src1,
   3555                                const LogicVRegister& src2) {
   3556   uint64_t result[32];
   3557   int laneCount = LaneCountFromFormat(vform);
   3558   for (int i = 0; i < laneCount; ++i) {
   3559     result[i] = src1.Uint(vform, i);
   3560     result[laneCount + i] = src2.Uint(vform, i);
   3561   }
   3562 
   3563   dst.ClearForWrite(vform);
   3564   for (int i = 0; i < laneCount; ++i) {
   3565     dst.SetUint(vform, i, result[(2 * i) + 1]);
   3566   }
   3567   return dst;
   3568 }
   3569 
   3570 
   3571 template <typename T>
   3572 T Simulator::FPAdd(T op1, T op2) {
   3573   T result = FPProcessNaNs(op1, op2);
   3574   if (std::isnan(result)) return result;
   3575 
   3576   if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
   3577     // inf + -inf returns the default NaN.
   3578     FPProcessException();
   3579     return FPDefaultNaN<T>();
   3580   } else {
   3581     // Other cases should be handled by standard arithmetic.
   3582     return op1 + op2;
   3583   }
   3584 }
   3585 
   3586 
   3587 template <typename T>
   3588 T Simulator::FPSub(T op1, T op2) {
   3589   // NaNs should be handled elsewhere.
   3590   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3591 
   3592   if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
   3593     // inf - inf returns the default NaN.
   3594     FPProcessException();
   3595     return FPDefaultNaN<T>();
   3596   } else {
   3597     // Other cases should be handled by standard arithmetic.
   3598     return op1 - op2;
   3599   }
   3600 }
   3601 
   3602 
   3603 template <typename T>
   3604 T Simulator::FPMul(T op1, T op2) {
   3605   // NaNs should be handled elsewhere.
   3606   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3607 
   3608   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3609     // inf * 0.0 returns the default NaN.
   3610     FPProcessException();
   3611     return FPDefaultNaN<T>();
   3612   } else {
   3613     // Other cases should be handled by standard arithmetic.
   3614     return op1 * op2;
   3615   }
   3616 }
   3617 
   3618 
   3619 template <typename T>
   3620 T Simulator::FPMulx(T op1, T op2) {
   3621   if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
   3622     // inf * 0.0 returns +/-2.0.
   3623     T two = 2.0;
   3624     return copysign(1.0, op1) * copysign(1.0, op2) * two;
   3625   }
   3626   return FPMul(op1, op2);
   3627 }
   3628 
   3629 
   3630 template <typename T>
   3631 T Simulator::FPMulAdd(T a, T op1, T op2) {
   3632   T result = FPProcessNaNs3(a, op1, op2);
   3633 
   3634   T sign_a = copysign(1.0, a);
   3635   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
   3636   bool isinf_prod = std::isinf(op1) || std::isinf(op2);
   3637   bool operation_generates_nan =
   3638       (std::isinf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
   3639       (std::isinf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
   3640       (std::isinf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
   3641 
   3642   if (std::isnan(result)) {
   3643     // Generated NaNs override quiet NaNs propagated from a.
   3644     if (operation_generates_nan && IsQuietNaN(a)) {
   3645       FPProcessException();
   3646       return FPDefaultNaN<T>();
   3647     } else {
   3648       return result;
   3649     }
   3650   }
   3651 
   3652   // If the operation would produce a NaN, return the default NaN.
   3653   if (operation_generates_nan) {
   3654     FPProcessException();
   3655     return FPDefaultNaN<T>();
   3656   }
   3657 
   3658   // Work around broken fma implementations for exact zero results: The sign of
   3659   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   3660   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
   3661     return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
   3662   }
   3663 
   3664   result = FusedMultiplyAdd(op1, op2, a);
   3665   VIXL_ASSERT(!std::isnan(result));
   3666 
   3667   // Work around broken fma implementations for rounded zero results: If a is
   3668   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   3669   if ((a == 0.0) && (result == 0.0)) {
   3670     return copysign(0.0, sign_prod);
   3671   }
   3672 
   3673   return result;
   3674 }
   3675 
   3676 
   3677 template <typename T>
   3678 T Simulator::FPDiv(T op1, T op2) {
   3679   // NaNs should be handled elsewhere.
   3680   VIXL_ASSERT(!std::isnan(op1) && !std::isnan(op2));
   3681 
   3682   if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
   3683     // inf / inf and 0.0 / 0.0 return the default NaN.
   3684     FPProcessException();
   3685     return FPDefaultNaN<T>();
   3686   } else {
   3687     if (op2 == 0.0) {
   3688       FPProcessException();
   3689       if (!std::isnan(op1)) {
   3690         double op1_sign = copysign(1.0, op1);
   3691         double op2_sign = copysign(1.0, op2);
   3692         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
   3693       }
   3694     }
   3695 
   3696     // Other cases should be handled by standard arithmetic.
   3697     return op1 / op2;
   3698   }
   3699 }
   3700 
   3701 
   3702 template <typename T>
   3703 T Simulator::FPSqrt(T op) {
   3704   if (std::isnan(op)) {
   3705     return FPProcessNaN(op);
   3706   } else if (op < 0.0) {
   3707     FPProcessException();
   3708     return FPDefaultNaN<T>();
   3709   } else {
   3710     return sqrt(op);
   3711   }
   3712 }
   3713 
   3714 
   3715 template <typename T>
   3716 T Simulator::FPMax(T a, T b) {
   3717   T result = FPProcessNaNs(a, b);
   3718   if (std::isnan(result)) return result;
   3719 
   3720   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3721     // a and b are zero, and the sign differs: return +0.0.
   3722     return 0.0;
   3723   } else {
   3724     return (a > b) ? a : b;
   3725   }
   3726 }
   3727 
   3728 
   3729 template <typename T>
   3730 T Simulator::FPMaxNM(T a, T b) {
   3731   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3732     a = kFP64NegativeInfinity;
   3733   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3734     b = kFP64NegativeInfinity;
   3735   }
   3736 
   3737   T result = FPProcessNaNs(a, b);
   3738   return std::isnan(result) ? result : FPMax(a, b);
   3739 }
   3740 
   3741 
   3742 template <typename T>
   3743 T Simulator::FPMin(T a, T b) {
   3744   T result = FPProcessNaNs(a, b);
   3745   if (std::isnan(result)) return result;
   3746 
   3747   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3748     // a and b are zero, and the sign differs: return -0.0.
   3749     return -0.0;
   3750   } else {
   3751     return (a < b) ? a : b;
   3752   }
   3753 }
   3754 
   3755 
   3756 template <typename T>
   3757 T Simulator::FPMinNM(T a, T b) {
   3758   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3759     a = kFP64PositiveInfinity;
   3760   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3761     b = kFP64PositiveInfinity;
   3762   }
   3763 
   3764   T result = FPProcessNaNs(a, b);
   3765   return std::isnan(result) ? result : FPMin(a, b);
   3766 }
   3767 
   3768 
   3769 template <typename T>
   3770 T Simulator::FPRecipStepFused(T op1, T op2) {
   3771   const T two = 2.0;
   3772   if ((std::isinf(op1) && (op2 == 0.0)) ||
   3773       ((op1 == 0.0) && (std::isinf(op2)))) {
   3774     return two;
   3775   } else if (std::isinf(op1) || std::isinf(op2)) {
   3776     // Return +inf if signs match, otherwise -inf.
   3777     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3778                                           : kFP64NegativeInfinity;
   3779   } else {
   3780     return FusedMultiplyAdd(op1, op2, two);
   3781   }
   3782 }
   3783 
   3784 
   3785 template <typename T>
   3786 T Simulator::FPRSqrtStepFused(T op1, T op2) {
   3787   const T one_point_five = 1.5;
   3788   const T two = 2.0;
   3789 
   3790   if ((std::isinf(op1) && (op2 == 0.0)) ||
   3791       ((op1 == 0.0) && (std::isinf(op2)))) {
   3792     return one_point_five;
   3793   } else if (std::isinf(op1) || std::isinf(op2)) {
   3794     // Return +inf if signs match, otherwise -inf.
   3795     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3796                                           : kFP64NegativeInfinity;
   3797   } else {
   3798     // The multiply-add-halve operation must be fully fused, so avoid interim
   3799     // rounding by checking which operand can be losslessly divided by two
   3800     // before doing the multiply-add.
   3801     if (std::isnormal(op1 / two)) {
   3802       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
   3803     } else if (std::isnormal(op2 / two)) {
   3804       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
   3805     } else {
   3806       // Neither operand is normal after halving: the result is dominated by
   3807       // the addition term, so just return that.
   3808       return one_point_five;
   3809     }
   3810   }
   3811 }
   3812 
   3813 
   3814 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
   3815   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   3816       (value == kFP64NegativeInfinity)) {
   3817     return value;
   3818   } else if (std::isnan(value)) {
   3819     return FPProcessNaN(value);
   3820   }
   3821 
   3822   double int_result = std::floor(value);
   3823   double error = value - int_result;
   3824   switch (round_mode) {
   3825     case FPTieAway: {
   3826       // Take care of correctly handling the range ]-0.5, -0.0], which must
   3827       // yield -0.0.
   3828       if ((-0.5 < value) && (value < 0.0)) {
   3829         int_result = -0.0;
   3830 
   3831       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
   3832         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3833         // result is positive, round up.
   3834         int_result++;
   3835       }
   3836       break;
   3837     }
   3838     case FPTieEven: {
   3839       // Take care of correctly handling the range [-0.5, -0.0], which must
   3840       // yield -0.0.
   3841       if ((-0.5 <= value) && (value < 0.0)) {
   3842         int_result = -0.0;
   3843 
   3844         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   3845         // result is odd, round up.
   3846       } else if ((error > 0.5) ||
   3847                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
   3848         int_result++;
   3849       }
   3850       break;
   3851     }
   3852     case FPZero: {
   3853       // If value>0 then we take floor(value)
   3854       // otherwise, ceil(value).
   3855       if (value < 0) {
   3856         int_result = ceil(value);
   3857       }
   3858       break;
   3859     }
   3860     case FPNegativeInfinity: {
   3861       // We always use floor(value).
   3862       break;
   3863     }
   3864     case FPPositiveInfinity: {
   3865       // Take care of correctly handling the range ]-1.0, -0.0], which must
   3866       // yield -0.0.
   3867       if ((-1.0 < value) && (value < 0.0)) {
   3868         int_result = -0.0;
   3869 
   3870         // If the error is non-zero, round up.
   3871       } else if (error > 0.0) {
   3872         int_result++;
   3873       }
   3874       break;
   3875     }
   3876     default:
   3877       VIXL_UNIMPLEMENTED();
   3878   }
   3879   return int_result;
   3880 }
   3881 
   3882 
   3883 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
   3884   value = FPRoundInt(value, rmode);
   3885   if (value >= kWMaxInt) {
   3886     return kWMaxInt;
   3887   } else if (value < kWMinInt) {
   3888     return kWMinInt;
   3889   }
   3890   return std::isnan(value) ? 0 : static_cast<int32_t>(value);
   3891 }
   3892 
   3893 
   3894 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
   3895   value = FPRoundInt(value, rmode);
   3896   if (value >= kXMaxInt) {
   3897     return kXMaxInt;
   3898   } else if (value < kXMinInt) {
   3899     return kXMinInt;
   3900   }
   3901   return std::isnan(value) ? 0 : static_cast<int64_t>(value);
   3902 }
   3903 
   3904 
   3905 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
   3906   value = FPRoundInt(value, rmode);
   3907   if (value >= kWMaxUInt) {
   3908     return kWMaxUInt;
   3909   } else if (value < 0.0) {
   3910     return 0;
   3911   }
   3912   return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
   3913 }
   3914 
   3915 
   3916 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
   3917   value = FPRoundInt(value, rmode);
   3918   if (value >= kXMaxUInt) {
   3919     return kXMaxUInt;
   3920   } else if (value < 0.0) {
   3921     return 0;
   3922   }
   3923   return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
   3924 }
   3925 
   3926 
   3927 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
   3928   template <typename T>                                          \
   3929   LogicVRegister Simulator::FN(VectorFormat vform,               \
   3930                                LogicVRegister dst,               \
   3931                                const LogicVRegister& src1,       \
   3932                                const LogicVRegister& src2) {     \
   3933     dst.ClearForWrite(vform);                                    \
   3934     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
   3935       T op1 = src1.Float<T>(i);                                  \
   3936       T op2 = src2.Float<T>(i);                                  \
   3937       T result;                                                  \
   3938       if (PROCNAN) {                                             \
   3939         result = FPProcessNaNs(op1, op2);                        \
   3940         if (!std::isnan(result)) {                               \
   3941           result = OP(op1, op2);                                 \
   3942         }                                                        \
   3943       } else {                                                   \
   3944         result = OP(op1, op2);                                   \
   3945       }                                                          \
   3946       dst.SetFloat(i, result);                                   \
   3947     }                                                            \
   3948     return dst;                                                  \
   3949   }                                                              \
   3950                                                                  \
   3951   LogicVRegister Simulator::FN(VectorFormat vform,               \
   3952                                LogicVRegister dst,               \
   3953                                const LogicVRegister& src1,       \
   3954                                const LogicVRegister& src2) {     \
   3955     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {          \
   3956       FN<float>(vform, dst, src1, src2);                         \
   3957     } else {                                                     \
   3958       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
   3959       FN<double>(vform, dst, src1, src2);                        \
   3960     }                                                            \
   3961     return dst;                                                  \
   3962   }
   3963 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
   3964 #undef DEFINE_NEON_FP_VECTOR_OP
   3965 
   3966 
   3967 LogicVRegister Simulator::fnmul(VectorFormat vform,
   3968                                 LogicVRegister dst,
   3969                                 const LogicVRegister& src1,
   3970                                 const LogicVRegister& src2) {
   3971   SimVRegister temp;
   3972   LogicVRegister product = fmul(vform, temp, src1, src2);
   3973   return fneg(vform, dst, product);
   3974 }
   3975 
   3976 
   3977 template <typename T>
   3978 LogicVRegister Simulator::frecps(VectorFormat vform,
   3979                                  LogicVRegister dst,
   3980                                  const LogicVRegister& src1,
   3981                                  const LogicVRegister& src2) {
   3982   dst.ClearForWrite(vform);
   3983   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3984     T op1 = -src1.Float<T>(i);
   3985     T op2 = src2.Float<T>(i);
   3986     T result = FPProcessNaNs(op1, op2);
   3987     dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
   3988   }
   3989   return dst;
   3990 }
   3991 
   3992 
   3993 LogicVRegister Simulator::frecps(VectorFormat vform,
   3994                                  LogicVRegister dst,
   3995                                  const LogicVRegister& src1,
   3996                                  const LogicVRegister& src2) {
   3997   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   3998     frecps<float>(vform, dst, src1, src2);
   3999   } else {
   4000     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4001     frecps<double>(vform, dst, src1, src2);
   4002   }
   4003   return dst;
   4004 }
   4005 
   4006 
   4007 template <typename T>
   4008 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   4009                                   LogicVRegister dst,
   4010                                   const LogicVRegister& src1,
   4011                                   const LogicVRegister& src2) {
   4012   dst.ClearForWrite(vform);
   4013   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4014     T op1 = -src1.Float<T>(i);
   4015     T op2 = src2.Float<T>(i);
   4016     T result = FPProcessNaNs(op1, op2);
   4017     dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
   4018   }
   4019   return dst;
   4020 }
   4021 
   4022 
   4023 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   4024                                   LogicVRegister dst,
   4025                                   const LogicVRegister& src1,
   4026                                   const LogicVRegister& src2) {
   4027   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4028     frsqrts<float>(vform, dst, src1, src2);
   4029   } else {
   4030     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4031     frsqrts<double>(vform, dst, src1, src2);
   4032   }
   4033   return dst;
   4034 }
   4035 
   4036 
   4037 template <typename T>
   4038 LogicVRegister Simulator::fcmp(VectorFormat vform,
   4039                                LogicVRegister dst,
   4040                                const LogicVRegister& src1,
   4041                                const LogicVRegister& src2,
   4042                                Condition cond) {
   4043   dst.ClearForWrite(vform);
   4044   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4045     bool result = false;
   4046     T op1 = src1.Float<T>(i);
   4047     T op2 = src2.Float<T>(i);
   4048     T nan_result = FPProcessNaNs(op1, op2);
   4049     if (!std::isnan(nan_result)) {
   4050       switch (cond) {
   4051         case eq:
   4052           result = (op1 == op2);
   4053           break;
   4054         case ge:
   4055           result = (op1 >= op2);
   4056           break;
   4057         case gt:
   4058           result = (op1 > op2);
   4059           break;
   4060         case le:
   4061           result = (op1 <= op2);
   4062           break;
   4063         case lt:
   4064           result = (op1 < op2);
   4065           break;
   4066         default:
   4067           VIXL_UNREACHABLE();
   4068           break;
   4069       }
   4070     }
   4071     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   4072   }
   4073   return dst;
   4074 }
   4075 
   4076 
   4077 LogicVRegister Simulator::fcmp(VectorFormat vform,
   4078                                LogicVRegister dst,
   4079                                const LogicVRegister& src1,
   4080                                const LogicVRegister& src2,
   4081                                Condition cond) {
   4082   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4083     fcmp<float>(vform, dst, src1, src2, cond);
   4084   } else {
   4085     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4086     fcmp<double>(vform, dst, src1, src2, cond);
   4087   }
   4088   return dst;
   4089 }
   4090 
   4091 
   4092 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
   4093                                     LogicVRegister dst,
   4094                                     const LogicVRegister& src,
   4095                                     Condition cond) {
   4096   SimVRegister temp;
   4097   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4098     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
   4099     fcmp<float>(vform, dst, src, zero_reg, cond);
   4100   } else {
   4101     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4102     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
   4103     fcmp<double>(vform, dst, src, zero_reg, cond);
   4104   }
   4105   return dst;
   4106 }
   4107 
   4108 
   4109 LogicVRegister Simulator::fabscmp(VectorFormat vform,
   4110                                   LogicVRegister dst,
   4111                                   const LogicVRegister& src1,
   4112                                   const LogicVRegister& src2,
   4113                                   Condition cond) {
   4114   SimVRegister temp1, temp2;
   4115   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4116     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
   4117     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
   4118     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
   4119   } else {
   4120     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4121     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
   4122     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
   4123     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
   4124   }
   4125   return dst;
   4126 }
   4127 
   4128 
   4129 template <typename T>
   4130 LogicVRegister Simulator::fmla(VectorFormat vform,
   4131                                LogicVRegister dst,
   4132                                const LogicVRegister& src1,
   4133                                const LogicVRegister& src2) {
   4134   dst.ClearForWrite(vform);
   4135   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4136     T op1 = src1.Float<T>(i);
   4137     T op2 = src2.Float<T>(i);
   4138     T acc = dst.Float<T>(i);
   4139     T result = FPMulAdd(acc, op1, op2);
   4140     dst.SetFloat(i, result);
   4141   }
   4142   return dst;
   4143 }
   4144 
   4145 
   4146 LogicVRegister Simulator::fmla(VectorFormat vform,
   4147                                LogicVRegister dst,
   4148                                const LogicVRegister& src1,
   4149                                const LogicVRegister& src2) {
   4150   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4151     fmla<float>(vform, dst, src1, src2);
   4152   } else {
   4153     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4154     fmla<double>(vform, dst, src1, src2);
   4155   }
   4156   return dst;
   4157 }
   4158 
   4159 
   4160 template <typename T>
   4161 LogicVRegister Simulator::fmls(VectorFormat vform,
   4162                                LogicVRegister dst,
   4163                                const LogicVRegister& src1,
   4164                                const LogicVRegister& src2) {
   4165   dst.ClearForWrite(vform);
   4166   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4167     T op1 = -src1.Float<T>(i);
   4168     T op2 = src2.Float<T>(i);
   4169     T acc = dst.Float<T>(i);
   4170     T result = FPMulAdd(acc, op1, op2);
   4171     dst.SetFloat(i, result);
   4172   }
   4173   return dst;
   4174 }
   4175 
   4176 
   4177 LogicVRegister Simulator::fmls(VectorFormat vform,
   4178                                LogicVRegister dst,
   4179                                const LogicVRegister& src1,
   4180                                const LogicVRegister& src2) {
   4181   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4182     fmls<float>(vform, dst, src1, src2);
   4183   } else {
   4184     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4185     fmls<double>(vform, dst, src1, src2);
   4186   }
   4187   return dst;
   4188 }
   4189 
   4190 
   4191 template <typename T>
   4192 LogicVRegister Simulator::fneg(VectorFormat vform,
   4193                                LogicVRegister dst,
   4194                                const LogicVRegister& src) {
   4195   dst.ClearForWrite(vform);
   4196   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4197     T op = src.Float<T>(i);
   4198     op = -op;
   4199     dst.SetFloat(i, op);
   4200   }
   4201   return dst;
   4202 }
   4203 
   4204 
   4205 LogicVRegister Simulator::fneg(VectorFormat vform,
   4206                                LogicVRegister dst,
   4207                                const LogicVRegister& src) {
   4208   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4209     fneg<float>(vform, dst, src);
   4210   } else {
   4211     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4212     fneg<double>(vform, dst, src);
   4213   }
   4214   return dst;
   4215 }
   4216 
   4217 
   4218 template <typename T>
   4219 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4220                                 LogicVRegister dst,
   4221                                 const LogicVRegister& src) {
   4222   dst.ClearForWrite(vform);
   4223   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4224     T op = src.Float<T>(i);
   4225     if (copysign(1.0, op) < 0.0) {
   4226       op = -op;
   4227     }
   4228     dst.SetFloat(i, op);
   4229   }
   4230   return dst;
   4231 }
   4232 
   4233 
   4234 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4235                                 LogicVRegister dst,
   4236                                 const LogicVRegister& src) {
   4237   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4238     fabs_<float>(vform, dst, src);
   4239   } else {
   4240     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4241     fabs_<double>(vform, dst, src);
   4242   }
   4243   return dst;
   4244 }
   4245 
   4246 
   4247 LogicVRegister Simulator::fabd(VectorFormat vform,
   4248                                LogicVRegister dst,
   4249                                const LogicVRegister& src1,
   4250                                const LogicVRegister& src2) {
   4251   SimVRegister temp;
   4252   fsub(vform, temp, src1, src2);
   4253   fabs_(vform, dst, temp);
   4254   return dst;
   4255 }
   4256 
   4257 
   4258 LogicVRegister Simulator::fsqrt(VectorFormat vform,
   4259                                 LogicVRegister dst,
   4260                                 const LogicVRegister& src) {
   4261   dst.ClearForWrite(vform);
   4262   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4263     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4264       float result = FPSqrt(src.Float<float>(i));
   4265       dst.SetFloat(i, result);
   4266     }
   4267   } else {
   4268     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4269     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4270       double result = FPSqrt(src.Float<double>(i));
   4271       dst.SetFloat(i, result);
   4272     }
   4273   }
   4274   return dst;
   4275 }
   4276 
   4277 
   4278 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                           \
   4279   LogicVRegister Simulator::FNP(VectorFormat vform,                   \
   4280                                 LogicVRegister dst,                   \
   4281                                 const LogicVRegister& src1,           \
   4282                                 const LogicVRegister& src2) {         \
   4283     SimVRegister temp1, temp2;                                        \
   4284     uzp1(vform, temp1, src1, src2);                                   \
   4285     uzp2(vform, temp2, src1, src2);                                   \
   4286     FN(vform, dst, temp1, temp2);                                     \
   4287     return dst;                                                       \
   4288   }                                                                   \
   4289                                                                       \
   4290   LogicVRegister Simulator::FNP(VectorFormat vform,                   \
   4291                                 LogicVRegister dst,                   \
   4292                                 const LogicVRegister& src) {          \
   4293     if (vform == kFormatS) {                                          \
   4294       float result = OP(src.Float<float>(0), src.Float<float>(1));    \
   4295       dst.SetFloat(0, result);                                        \
   4296     } else {                                                          \
   4297       VIXL_ASSERT(vform == kFormatD);                                 \
   4298       double result = OP(src.Float<double>(0), src.Float<double>(1)); \
   4299       dst.SetFloat(0, result);                                        \
   4300     }                                                                 \
   4301     dst.ClearForWrite(vform);                                         \
   4302     return dst;                                                       \
   4303   }
   4304 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
   4305 #undef DEFINE_NEON_FP_PAIR_OP
   4306 
   4307 
   4308 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
   4309                                    LogicVRegister dst,
   4310                                    const LogicVRegister& src,
   4311                                    FPMinMaxOp Op) {
   4312   VIXL_ASSERT(vform == kFormat4S);
   4313   USE(vform);
   4314   float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
   4315   float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
   4316   float result = (this->*Op)(result1, result2);
   4317   dst.ClearForWrite(kFormatS);
   4318   dst.SetFloat<float>(0, result);
   4319   return dst;
   4320 }
   4321 
   4322 
   4323 LogicVRegister Simulator::fmaxv(VectorFormat vform,
   4324                                 LogicVRegister dst,
   4325                                 const LogicVRegister& src) {
   4326   return fminmaxv(vform, dst, src, &Simulator::FPMax);
   4327 }
   4328 
   4329 
   4330 LogicVRegister Simulator::fminv(VectorFormat vform,
   4331                                 LogicVRegister dst,
   4332                                 const LogicVRegister& src) {
   4333   return fminmaxv(vform, dst, src, &Simulator::FPMin);
   4334 }
   4335 
   4336 
   4337 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
   4338                                   LogicVRegister dst,
   4339                                   const LogicVRegister& src) {
   4340   return fminmaxv(vform, dst, src, &Simulator::FPMaxNM);
   4341 }
   4342 
   4343 
   4344 LogicVRegister Simulator::fminnmv(VectorFormat vform,
   4345                                   LogicVRegister dst,
   4346                                   const LogicVRegister& src) {
   4347   return fminmaxv(vform, dst, src, &Simulator::FPMinNM);
   4348 }
   4349 
   4350 
   4351 LogicVRegister Simulator::fmul(VectorFormat vform,
   4352                                LogicVRegister dst,
   4353                                const LogicVRegister& src1,
   4354                                const LogicVRegister& src2,
   4355                                int index) {
   4356   dst.ClearForWrite(vform);
   4357   SimVRegister temp;
   4358   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4359     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4360     fmul<float>(vform, dst, src1, index_reg);
   4361 
   4362   } else {
   4363     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4364     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4365     fmul<double>(vform, dst, src1, index_reg);
   4366   }
   4367   return dst;
   4368 }
   4369 
   4370 
   4371 LogicVRegister Simulator::fmla(VectorFormat vform,
   4372                                LogicVRegister dst,
   4373                                const LogicVRegister& src1,
   4374                                const LogicVRegister& src2,
   4375                                int index) {
   4376   dst.ClearForWrite(vform);
   4377   SimVRegister temp;
   4378   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4379     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4380     fmla<float>(vform, dst, src1, index_reg);
   4381 
   4382   } else {
   4383     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4384     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4385     fmla<double>(vform, dst, src1, index_reg);
   4386   }
   4387   return dst;
   4388 }
   4389 
   4390 
   4391 LogicVRegister Simulator::fmls(VectorFormat vform,
   4392                                LogicVRegister dst,
   4393                                const LogicVRegister& src1,
   4394                                const LogicVRegister& src2,
   4395                                int index) {
   4396   dst.ClearForWrite(vform);
   4397   SimVRegister temp;
   4398   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4399     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4400     fmls<float>(vform, dst, src1, index_reg);
   4401 
   4402   } else {
   4403     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4404     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4405     fmls<double>(vform, dst, src1, index_reg);
   4406   }
   4407   return dst;
   4408 }
   4409 
   4410 
   4411 LogicVRegister Simulator::fmulx(VectorFormat vform,
   4412                                 LogicVRegister dst,
   4413                                 const LogicVRegister& src1,
   4414                                 const LogicVRegister& src2,
   4415                                 int index) {
   4416   dst.ClearForWrite(vform);
   4417   SimVRegister temp;
   4418   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4419     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4420     fmulx<float>(vform, dst, src1, index_reg);
   4421 
   4422   } else {
   4423     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4424     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4425     fmulx<double>(vform, dst, src1, index_reg);
   4426   }
   4427   return dst;
   4428 }
   4429 
   4430 
   4431 LogicVRegister Simulator::frint(VectorFormat vform,
   4432                                 LogicVRegister dst,
   4433                                 const LogicVRegister& src,
   4434                                 FPRounding rounding_mode,
   4435                                 bool inexact_exception) {
   4436   dst.ClearForWrite(vform);
   4437   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4438     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4439       float input = src.Float<float>(i);
   4440       float rounded = FPRoundInt(input, rounding_mode);
   4441       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4442         FPProcessException();
   4443       }
   4444       dst.SetFloat<float>(i, rounded);
   4445     }
   4446   } else {
   4447     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4448     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4449       double input = src.Float<double>(i);
   4450       double rounded = FPRoundInt(input, rounding_mode);
   4451       if (inexact_exception && !std::isnan(input) && (input != rounded)) {
   4452         FPProcessException();
   4453       }
   4454       dst.SetFloat<double>(i, rounded);
   4455     }
   4456   }
   4457   return dst;
   4458 }
   4459 
   4460 
   4461 LogicVRegister Simulator::fcvts(VectorFormat vform,
   4462                                 LogicVRegister dst,
   4463                                 const LogicVRegister& src,
   4464                                 FPRounding rounding_mode,
   4465                                 int fbits) {
   4466   dst.ClearForWrite(vform);
   4467   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4468     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4469       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4470       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
   4471     }
   4472   } else {
   4473     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4474     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4475       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4476       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
   4477     }
   4478   }
   4479   return dst;
   4480 }
   4481 
   4482 
   4483 LogicVRegister Simulator::fcvtu(VectorFormat vform,
   4484                                 LogicVRegister dst,
   4485                                 const LogicVRegister& src,
   4486                                 FPRounding rounding_mode,
   4487                                 int fbits) {
   4488   dst.ClearForWrite(vform);
   4489   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4490     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4491       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4492       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
   4493     }
   4494   } else {
   4495     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4496     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4497       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4498       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
   4499     }
   4500   }
   4501   return dst;
   4502 }
   4503 
   4504 
   4505 LogicVRegister Simulator::fcvtl(VectorFormat vform,
   4506                                 LogicVRegister dst,
   4507                                 const LogicVRegister& src) {
   4508   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4509     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4510       dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
   4511     }
   4512   } else {
   4513     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4514     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4515       dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
   4516     }
   4517   }
   4518   return dst;
   4519 }
   4520 
   4521 
   4522 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
   4523                                  LogicVRegister dst,
   4524                                  const LogicVRegister& src) {
   4525   int lane_count = LaneCountFromFormat(vform);
   4526   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4527     for (int i = 0; i < lane_count; i++) {
   4528       dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
   4529     }
   4530   } else {
   4531     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4532     for (int i = 0; i < lane_count; i++) {
   4533       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
   4534     }
   4535   }
   4536   return dst;
   4537 }
   4538 
   4539 
   4540 LogicVRegister Simulator::fcvtn(VectorFormat vform,
   4541                                 LogicVRegister dst,
   4542                                 const LogicVRegister& src) {
   4543   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4544     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4545       dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
   4546     }
   4547   } else {
   4548     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4549     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4550       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
   4551     }
   4552   }
   4553   return dst;
   4554 }
   4555 
   4556 
   4557 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
   4558                                  LogicVRegister dst,
   4559                                  const LogicVRegister& src) {
   4560   int lane_count = LaneCountFromFormat(vform) / 2;
   4561   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4562     for (int i = lane_count - 1; i >= 0; i--) {
   4563       dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
   4564     }
   4565   } else {
   4566     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4567     for (int i = lane_count - 1; i >= 0; i--) {
   4568       dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
   4569     }
   4570   }
   4571   return dst;
   4572 }
   4573 
   4574 
   4575 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
   4576                                  LogicVRegister dst,
   4577                                  const LogicVRegister& src) {
   4578   dst.ClearForWrite(vform);
   4579   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4580   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4581     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4582   }
   4583   return dst;
   4584 }
   4585 
   4586 
   4587 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
   4588                                   LogicVRegister dst,
   4589                                   const LogicVRegister& src) {
   4590   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4591   int lane_count = LaneCountFromFormat(vform) / 2;
   4592   for (int i = lane_count - 1; i >= 0; i--) {
   4593     dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
   4594   }
   4595   return dst;
   4596 }
   4597 
   4598 
   4599 // Based on reference C function recip_sqrt_estimate from ARM ARM.
   4600 double Simulator::recip_sqrt_estimate(double a) {
   4601   int q0, q1, s;
   4602   double r;
   4603   if (a < 0.5) {
   4604     q0 = static_cast<int>(a * 512.0);
   4605     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
   4606   } else {
   4607     q1 = static_cast<int>(a * 256.0);
   4608     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
   4609   }
   4610   s = static_cast<int>(256.0 * r + 0.5);
   4611   return static_cast<double>(s) / 256.0;
   4612 }
   4613 
   4614 
   4615 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
   4616   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
   4617 }
   4618 
   4619 
   4620 template <typename T>
   4621 T Simulator::FPRecipSqrtEstimate(T op) {
   4622   if (std::isnan(op)) {
   4623     return FPProcessNaN(op);
   4624   } else if (op == 0.0) {
   4625     if (copysign(1.0, op) < 0.0) {
   4626       return kFP64NegativeInfinity;
   4627     } else {
   4628       return kFP64PositiveInfinity;
   4629     }
   4630   } else if (copysign(1.0, op) < 0.0) {
   4631     FPProcessException();
   4632     return FPDefaultNaN<T>();
   4633   } else if (std::isinf(op)) {
   4634     return 0.0;
   4635   } else {
   4636     uint64_t fraction;
   4637     int exp, result_exp;
   4638 
   4639     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4640       exp = FloatExp(op);
   4641       fraction = FloatMantissa(op);
   4642       fraction <<= 29;
   4643     } else {
   4644       exp = DoubleExp(op);
   4645       fraction = DoubleMantissa(op);
   4646     }
   4647 
   4648     if (exp == 0) {
   4649       while (Bits(fraction, 51, 51) == 0) {
   4650         fraction = Bits(fraction, 50, 0) << 1;
   4651         exp -= 1;
   4652       }
   4653       fraction = Bits(fraction, 50, 0) << 1;
   4654     }
   4655 
   4656     double scaled;
   4657     if (Bits(exp, 0, 0) == 0) {
   4658       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
   4659     } else {
   4660       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
   4661     }
   4662 
   4663     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4664       result_exp = (380 - exp) / 2;
   4665     } else {
   4666       result_exp = (3068 - exp) / 2;
   4667     }
   4668 
   4669     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
   4670 
   4671     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4672       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   4673       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
   4674       return FloatPack(0, exp_bits, est_bits);
   4675     } else {
   4676       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
   4677     }
   4678   }
   4679 }
   4680 
   4681 
   4682 LogicVRegister Simulator::frsqrte(VectorFormat vform,
   4683                                   LogicVRegister dst,
   4684                                   const LogicVRegister& src) {
   4685   dst.ClearForWrite(vform);
   4686   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4687     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4688       float input = src.Float<float>(i);
   4689       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
   4690     }
   4691   } else {
   4692     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4693     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4694       double input = src.Float<double>(i);
   4695       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
   4696     }
   4697   }
   4698   return dst;
   4699 }
   4700 
   4701 template <typename T>
   4702 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   4703   uint32_t sign;
   4704 
   4705   if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4706     sign = FloatSign(op);
   4707   } else {
   4708     sign = DoubleSign(op);
   4709   }
   4710 
   4711   if (std::isnan(op)) {
   4712     return FPProcessNaN(op);
   4713   } else if (std::isinf(op)) {
   4714     return (sign == 1) ? -0.0 : 0.0;
   4715   } else if (op == 0.0) {
   4716     FPProcessException();  // FPExc_DivideByZero exception.
   4717     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4718   } else if (((sizeof(T) == sizeof(float)) &&  // NOLINT(runtime/sizeof)
   4719               (std::fabs(op) < std::pow(2.0, -128.0))) ||
   4720              ((sizeof(T) == sizeof(double)) &&  // NOLINT(runtime/sizeof)
   4721               (std::fabs(op) < std::pow(2.0, -1024.0)))) {
   4722     bool overflow_to_inf = false;
   4723     switch (rounding) {
   4724       case FPTieEven:
   4725         overflow_to_inf = true;
   4726         break;
   4727       case FPPositiveInfinity:
   4728         overflow_to_inf = (sign == 0);
   4729         break;
   4730       case FPNegativeInfinity:
   4731         overflow_to_inf = (sign == 1);
   4732         break;
   4733       case FPZero:
   4734         overflow_to_inf = false;
   4735         break;
   4736       default:
   4737         break;
   4738     }
   4739     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
   4740     if (overflow_to_inf) {
   4741       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   4742     } else {
   4743       // Return FPMaxNormal(sign).
   4744       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4745         return FloatPack(sign, 0xfe, 0x07fffff);
   4746       } else {
   4747         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
   4748       }
   4749     }
   4750   } else {
   4751     uint64_t fraction;
   4752     int exp, result_exp;
   4753     uint32_t sign;
   4754 
   4755     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4756       sign = FloatSign(op);
   4757       exp = FloatExp(op);
   4758       fraction = FloatMantissa(op);
   4759       fraction <<= 29;
   4760     } else {
   4761       sign = DoubleSign(op);
   4762       exp = DoubleExp(op);
   4763       fraction = DoubleMantissa(op);
   4764     }
   4765 
   4766     if (exp == 0) {
   4767       if (Bits(fraction, 51, 51) == 0) {
   4768         exp -= 1;
   4769         fraction = Bits(fraction, 49, 0) << 2;
   4770       } else {
   4771         fraction = Bits(fraction, 50, 0) << 1;
   4772       }
   4773     }
   4774 
   4775     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
   4776 
   4777     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4778       result_exp = (253 - exp);        // In range 253-254 = -1 to 253+1 = 254.
   4779     } else {
   4780       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
   4781     }
   4782 
   4783     double estimate = recip_estimate(scaled);
   4784 
   4785     fraction = DoubleMantissa(estimate);
   4786     if (result_exp == 0) {
   4787       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
   4788     } else if (result_exp == -1) {
   4789       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
   4790       result_exp = 0;
   4791     }
   4792     if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4793       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   4794       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
   4795       return FloatPack(sign, exp_bits, frac_bits);
   4796     } else {
   4797       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
   4798     }
   4799   }
   4800 }
   4801 
   4802 
   4803 LogicVRegister Simulator::frecpe(VectorFormat vform,
   4804                                  LogicVRegister dst,
   4805                                  const LogicVRegister& src,
   4806                                  FPRounding round) {
   4807   dst.ClearForWrite(vform);
   4808   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4809     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4810       float input = src.Float<float>(i);
   4811       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
   4812     }
   4813   } else {
   4814     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4815     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4816       double input = src.Float<double>(i);
   4817       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
   4818     }
   4819   }
   4820   return dst;
   4821 }
   4822 
   4823 
   4824 LogicVRegister Simulator::ursqrte(VectorFormat vform,
   4825                                   LogicVRegister dst,
   4826                                   const LogicVRegister& src) {
   4827   dst.ClearForWrite(vform);
   4828   uint64_t operand;
   4829   uint32_t result;
   4830   double dp_operand, dp_result;
   4831   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4832     operand = src.Uint(vform, i);
   4833     if (operand <= 0x3FFFFFFF) {
   4834       result = 0xFFFFFFFF;
   4835     } else {
   4836       dp_operand = operand * std::pow(2.0, -32);
   4837       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
   4838       result = static_cast<uint32_t>(dp_result);
   4839     }
   4840     dst.SetUint(vform, i, result);
   4841   }
   4842   return dst;
   4843 }
   4844 
   4845 
   4846 // Based on reference C function recip_estimate from ARM ARM.
   4847 double Simulator::recip_estimate(double a) {
   4848   int q, s;
   4849   double r;
   4850   q = static_cast<int>(a * 512.0);
   4851   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
   4852   s = static_cast<int>(256.0 * r + 0.5);
   4853   return static_cast<double>(s) / 256.0;
   4854 }
   4855 
   4856 
   4857 LogicVRegister Simulator::urecpe(VectorFormat vform,
   4858                                  LogicVRegister dst,
   4859                                  const LogicVRegister& src) {
   4860   dst.ClearForWrite(vform);
   4861   uint64_t operand;
   4862   uint32_t result;
   4863   double dp_operand, dp_result;
   4864   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4865     operand = src.Uint(vform, i);
   4866     if (operand <= 0x7FFFFFFF) {
   4867       result = 0xFFFFFFFF;
   4868     } else {
   4869       dp_operand = operand * std::pow(2.0, -32);
   4870       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
   4871       result = static_cast<uint32_t>(dp_result);
   4872     }
   4873     dst.SetUint(vform, i, result);
   4874   }
   4875   return dst;
   4876 }
   4877 
   4878 template <typename T>
   4879 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4880                                  LogicVRegister dst,
   4881                                  const LogicVRegister& src) {
   4882   dst.ClearForWrite(vform);
   4883   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4884     T op = src.Float<T>(i);
   4885     T result;
   4886     if (std::isnan(op)) {
   4887       result = FPProcessNaN(op);
   4888     } else {
   4889       int exp;
   4890       uint32_t sign;
   4891       if (sizeof(T) == sizeof(float)) {  // NOLINT(runtime/sizeof)
   4892         sign = FloatSign(op);
   4893         exp = FloatExp(op);
   4894         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
   4895         result = FloatPack(sign, exp, 0);
   4896       } else {
   4897         sign = DoubleSign(op);
   4898         exp = DoubleExp(op);
   4899         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
   4900         result = DoublePack(sign, exp, 0);
   4901       }
   4902     }
   4903     dst.SetFloat(i, result);
   4904   }
   4905   return dst;
   4906 }
   4907 
   4908 
   4909 LogicVRegister Simulator::frecpx(VectorFormat vform,
   4910                                  LogicVRegister dst,
   4911                                  const LogicVRegister& src) {
   4912   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4913     frecpx<float>(vform, dst, src);
   4914   } else {
   4915     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4916     frecpx<double>(vform, dst, src);
   4917   }
   4918   return dst;
   4919 }
   4920 
   4921 LogicVRegister Simulator::scvtf(VectorFormat vform,
   4922                                 LogicVRegister dst,
   4923                                 const LogicVRegister& src,
   4924                                 int fbits,
   4925                                 FPRounding round) {
   4926   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4927     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4928       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
   4929       dst.SetFloat<float>(i, result);
   4930     } else {
   4931       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4932       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
   4933       dst.SetFloat<double>(i, result);
   4934     }
   4935   }
   4936   return dst;
   4937 }
   4938 
   4939 
   4940 LogicVRegister Simulator::ucvtf(VectorFormat vform,
   4941                                 LogicVRegister dst,
   4942                                 const LogicVRegister& src,
   4943                                 int fbits,
   4944                                 FPRounding round) {
   4945   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4946     if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4947       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
   4948       dst.SetFloat<float>(i, result);
   4949     } else {
   4950       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4951       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
   4952       dst.SetFloat<double>(i, result);
   4953     }
   4954   }
   4955   return dst;
   4956 }
   4957 
   4958 
   4959 }  // namespace aarch64
   4960 }  // namespace vixl
   4961 
   4962 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
   4963