Home | History | Annotate | Download | only in aarch64
      1 // Copyright 2015, VIXL authors
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are met:
      6 //
      7 //   * Redistributions of source code must retain the above copyright notice,
      8 //     this list of conditions and the following disclaimer.
      9 //   * Redistributions in binary form must reproduce the above copyright notice,
     10 //     this list of conditions and the following disclaimer in the documentation
     11 //     and/or other materials provided with the distribution.
     12 //   * Neither the name of ARM Limited nor the names of its contributors may be
     13 //     used to endorse or promote products derived from this software without
     14 //     specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
     17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
     20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
     24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26 
     27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
     28 
     29 #include <cmath>
     30 
     31 #include "simulator-aarch64.h"
     32 
     33 namespace vixl {
     34 namespace aarch64 {
     35 
     36 using vixl::internal::SimFloat16;
     37 
     38 template <typename T>
     39 bool IsFloat64() {
     40   return false;
     41 }
     42 template <>
     43 bool IsFloat64<double>() {
     44   return true;
     45 }
     46 
     47 template <typename T>
     48 bool IsFloat32() {
     49   return false;
     50 }
     51 template <>
     52 bool IsFloat32<float>() {
     53   return true;
     54 }
     55 
     56 template <typename T>
     57 bool IsFloat16() {
     58   return false;
     59 }
     60 template <>
     61 bool IsFloat16<Float16>() {
     62   return true;
     63 }
     64 template <>
     65 bool IsFloat16<SimFloat16>() {
     66   return true;
     67 }
     68 
     69 template <>
     70 double Simulator::FPDefaultNaN<double>() {
     71   return kFP64DefaultNaN;
     72 }
     73 
     74 
     75 template <>
     76 float Simulator::FPDefaultNaN<float>() {
     77   return kFP32DefaultNaN;
     78 }
     79 
     80 
     81 template <>
     82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
     83   return SimFloat16(kFP16DefaultNaN);
     84 }
     85 
     86 
     87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
     88   if (src >= 0) {
     89     return UFixedToDouble(src, fbits, round);
     90   } else if (src == INT64_MIN) {
     91     return -UFixedToDouble(src, fbits, round);
     92   } else {
     93     return -UFixedToDouble(-src, fbits, round);
     94   }
     95 }
     96 
     97 
     98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
     99   // An input of 0 is a special case because the result is effectively
    100   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    101   if (src == 0) {
    102     return 0.0;
    103   }
    104 
    105   // Calculate the exponent. The highest significant bit will have the value
    106   // 2^exponent.
    107   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    108   const int64_t exponent = highest_significant_bit - fbits;
    109 
    110   return FPRoundToDouble(0, exponent, src, round);
    111 }
    112 
    113 
    114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
    115   if (src >= 0) {
    116     return UFixedToFloat(src, fbits, round);
    117   } else if (src == INT64_MIN) {
    118     return -UFixedToFloat(src, fbits, round);
    119   } else {
    120     return -UFixedToFloat(-src, fbits, round);
    121   }
    122 }
    123 
    124 
    125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
    126   // An input of 0 is a special case because the result is effectively
    127   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    128   if (src == 0) {
    129     return 0.0f;
    130   }
    131 
    132   // Calculate the exponent. The highest significant bit will have the value
    133   // 2^exponent.
    134   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    135   const int32_t exponent = highest_significant_bit - fbits;
    136 
    137   return FPRoundToFloat(0, exponent, src, round);
    138 }
    139 
    140 
    141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
    142   if (src >= 0) {
    143     return UFixedToFloat16(src, fbits, round);
    144   } else if (src == INT64_MIN) {
    145     return -UFixedToFloat16(src, fbits, round);
    146   } else {
    147     return -UFixedToFloat16(-src, fbits, round);
    148   }
    149 }
    150 
    151 
    152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
    153                                       int fbits,
    154                                       FPRounding round) {
    155   // An input of 0 is a special case because the result is effectively
    156   // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
    157   if (src == 0) {
    158     return 0.0f;
    159   }
    160 
    161   // Calculate the exponent. The highest significant bit will have the value
    162   // 2^exponent.
    163   const int highest_significant_bit = 63 - CountLeadingZeros(src);
    164   const int16_t exponent = highest_significant_bit - fbits;
    165 
    166   return FPRoundToFloat16(0, exponent, src, round);
    167 }
    168 
    169 
    170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    171   dst.ClearForWrite(vform);
    172   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    173     dst.ReadUintFromMem(vform, i, addr);
    174     addr += LaneSizeInBytesFromFormat(vform);
    175   }
    176 }
    177 
    178 
    179 void Simulator::ld1(VectorFormat vform,
    180                     LogicVRegister dst,
    181                     int index,
    182                     uint64_t addr) {
    183   dst.ReadUintFromMem(vform, index, addr);
    184 }
    185 
    186 
    187 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
    188   dst.ClearForWrite(vform);
    189   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    190     dst.ReadUintFromMem(vform, i, addr);
    191   }
    192 }
    193 
    194 
    195 void Simulator::ld2(VectorFormat vform,
    196                     LogicVRegister dst1,
    197                     LogicVRegister dst2,
    198                     uint64_t addr1) {
    199   dst1.ClearForWrite(vform);
    200   dst2.ClearForWrite(vform);
    201   int esize = LaneSizeInBytesFromFormat(vform);
    202   uint64_t addr2 = addr1 + esize;
    203   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    204     dst1.ReadUintFromMem(vform, i, addr1);
    205     dst2.ReadUintFromMem(vform, i, addr2);
    206     addr1 += 2 * esize;
    207     addr2 += 2 * esize;
    208   }
    209 }
    210 
    211 
    212 void Simulator::ld2(VectorFormat vform,
    213                     LogicVRegister dst1,
    214                     LogicVRegister dst2,
    215                     int index,
    216                     uint64_t addr1) {
    217   dst1.ClearForWrite(vform);
    218   dst2.ClearForWrite(vform);
    219   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    220   dst1.ReadUintFromMem(vform, index, addr1);
    221   dst2.ReadUintFromMem(vform, index, addr2);
    222 }
    223 
    224 
    225 void Simulator::ld2r(VectorFormat vform,
    226                      LogicVRegister dst1,
    227                      LogicVRegister dst2,
    228                      uint64_t addr) {
    229   dst1.ClearForWrite(vform);
    230   dst2.ClearForWrite(vform);
    231   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    232   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    233     dst1.ReadUintFromMem(vform, i, addr);
    234     dst2.ReadUintFromMem(vform, i, addr2);
    235   }
    236 }
    237 
    238 
    239 void Simulator::ld3(VectorFormat vform,
    240                     LogicVRegister dst1,
    241                     LogicVRegister dst2,
    242                     LogicVRegister dst3,
    243                     uint64_t addr1) {
    244   dst1.ClearForWrite(vform);
    245   dst2.ClearForWrite(vform);
    246   dst3.ClearForWrite(vform);
    247   int esize = LaneSizeInBytesFromFormat(vform);
    248   uint64_t addr2 = addr1 + esize;
    249   uint64_t addr3 = addr2 + esize;
    250   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    251     dst1.ReadUintFromMem(vform, i, addr1);
    252     dst2.ReadUintFromMem(vform, i, addr2);
    253     dst3.ReadUintFromMem(vform, i, addr3);
    254     addr1 += 3 * esize;
    255     addr2 += 3 * esize;
    256     addr3 += 3 * esize;
    257   }
    258 }
    259 
    260 
    261 void Simulator::ld3(VectorFormat vform,
    262                     LogicVRegister dst1,
    263                     LogicVRegister dst2,
    264                     LogicVRegister dst3,
    265                     int index,
    266                     uint64_t addr1) {
    267   dst1.ClearForWrite(vform);
    268   dst2.ClearForWrite(vform);
    269   dst3.ClearForWrite(vform);
    270   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    271   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    272   dst1.ReadUintFromMem(vform, index, addr1);
    273   dst2.ReadUintFromMem(vform, index, addr2);
    274   dst3.ReadUintFromMem(vform, index, addr3);
    275 }
    276 
    277 
    278 void Simulator::ld3r(VectorFormat vform,
    279                      LogicVRegister dst1,
    280                      LogicVRegister dst2,
    281                      LogicVRegister dst3,
    282                      uint64_t addr) {
    283   dst1.ClearForWrite(vform);
    284   dst2.ClearForWrite(vform);
    285   dst3.ClearForWrite(vform);
    286   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    287   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    288   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    289     dst1.ReadUintFromMem(vform, i, addr);
    290     dst2.ReadUintFromMem(vform, i, addr2);
    291     dst3.ReadUintFromMem(vform, i, addr3);
    292   }
    293 }
    294 
    295 
    296 void Simulator::ld4(VectorFormat vform,
    297                     LogicVRegister dst1,
    298                     LogicVRegister dst2,
    299                     LogicVRegister dst3,
    300                     LogicVRegister dst4,
    301                     uint64_t addr1) {
    302   dst1.ClearForWrite(vform);
    303   dst2.ClearForWrite(vform);
    304   dst3.ClearForWrite(vform);
    305   dst4.ClearForWrite(vform);
    306   int esize = LaneSizeInBytesFromFormat(vform);
    307   uint64_t addr2 = addr1 + esize;
    308   uint64_t addr3 = addr2 + esize;
    309   uint64_t addr4 = addr3 + esize;
    310   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    311     dst1.ReadUintFromMem(vform, i, addr1);
    312     dst2.ReadUintFromMem(vform, i, addr2);
    313     dst3.ReadUintFromMem(vform, i, addr3);
    314     dst4.ReadUintFromMem(vform, i, addr4);
    315     addr1 += 4 * esize;
    316     addr2 += 4 * esize;
    317     addr3 += 4 * esize;
    318     addr4 += 4 * esize;
    319   }
    320 }
    321 
    322 
    323 void Simulator::ld4(VectorFormat vform,
    324                     LogicVRegister dst1,
    325                     LogicVRegister dst2,
    326                     LogicVRegister dst3,
    327                     LogicVRegister dst4,
    328                     int index,
    329                     uint64_t addr1) {
    330   dst1.ClearForWrite(vform);
    331   dst2.ClearForWrite(vform);
    332   dst3.ClearForWrite(vform);
    333   dst4.ClearForWrite(vform);
    334   uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
    335   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    336   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    337   dst1.ReadUintFromMem(vform, index, addr1);
    338   dst2.ReadUintFromMem(vform, index, addr2);
    339   dst3.ReadUintFromMem(vform, index, addr3);
    340   dst4.ReadUintFromMem(vform, index, addr4);
    341 }
    342 
    343 
    344 void Simulator::ld4r(VectorFormat vform,
    345                      LogicVRegister dst1,
    346                      LogicVRegister dst2,
    347                      LogicVRegister dst3,
    348                      LogicVRegister dst4,
    349                      uint64_t addr) {
    350   dst1.ClearForWrite(vform);
    351   dst2.ClearForWrite(vform);
    352   dst3.ClearForWrite(vform);
    353   dst4.ClearForWrite(vform);
    354   uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
    355   uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
    356   uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
    357   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    358     dst1.ReadUintFromMem(vform, i, addr);
    359     dst2.ReadUintFromMem(vform, i, addr2);
    360     dst3.ReadUintFromMem(vform, i, addr3);
    361     dst4.ReadUintFromMem(vform, i, addr4);
    362   }
    363 }
    364 
    365 
    366 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
    367   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    368     src.WriteUintToMem(vform, i, addr);
    369     addr += LaneSizeInBytesFromFormat(vform);
    370   }
    371 }
    372 
    373 
    374 void Simulator::st1(VectorFormat vform,
    375                     LogicVRegister src,
    376                     int index,
    377                     uint64_t addr) {
    378   src.WriteUintToMem(vform, index, addr);
    379 }
    380 
    381 
    382 void Simulator::st2(VectorFormat vform,
    383                     LogicVRegister dst,
    384                     LogicVRegister dst2,
    385                     uint64_t addr) {
    386   int esize = LaneSizeInBytesFromFormat(vform);
    387   uint64_t addr2 = addr + esize;
    388   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    389     dst.WriteUintToMem(vform, i, addr);
    390     dst2.WriteUintToMem(vform, i, addr2);
    391     addr += 2 * esize;
    392     addr2 += 2 * esize;
    393   }
    394 }
    395 
    396 
    397 void Simulator::st2(VectorFormat vform,
    398                     LogicVRegister dst,
    399                     LogicVRegister dst2,
    400                     int index,
    401                     uint64_t addr) {
    402   int esize = LaneSizeInBytesFromFormat(vform);
    403   dst.WriteUintToMem(vform, index, addr);
    404   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    405 }
    406 
    407 
    408 void Simulator::st3(VectorFormat vform,
    409                     LogicVRegister dst,
    410                     LogicVRegister dst2,
    411                     LogicVRegister dst3,
    412                     uint64_t addr) {
    413   int esize = LaneSizeInBytesFromFormat(vform);
    414   uint64_t addr2 = addr + esize;
    415   uint64_t addr3 = addr2 + esize;
    416   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    417     dst.WriteUintToMem(vform, i, addr);
    418     dst2.WriteUintToMem(vform, i, addr2);
    419     dst3.WriteUintToMem(vform, i, addr3);
    420     addr += 3 * esize;
    421     addr2 += 3 * esize;
    422     addr3 += 3 * esize;
    423   }
    424 }
    425 
    426 
    427 void Simulator::st3(VectorFormat vform,
    428                     LogicVRegister dst,
    429                     LogicVRegister dst2,
    430                     LogicVRegister dst3,
    431                     int index,
    432                     uint64_t addr) {
    433   int esize = LaneSizeInBytesFromFormat(vform);
    434   dst.WriteUintToMem(vform, index, addr);
    435   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    436   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    437 }
    438 
    439 
    440 void Simulator::st4(VectorFormat vform,
    441                     LogicVRegister dst,
    442                     LogicVRegister dst2,
    443                     LogicVRegister dst3,
    444                     LogicVRegister dst4,
    445                     uint64_t addr) {
    446   int esize = LaneSizeInBytesFromFormat(vform);
    447   uint64_t addr2 = addr + esize;
    448   uint64_t addr3 = addr2 + esize;
    449   uint64_t addr4 = addr3 + esize;
    450   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    451     dst.WriteUintToMem(vform, i, addr);
    452     dst2.WriteUintToMem(vform, i, addr2);
    453     dst3.WriteUintToMem(vform, i, addr3);
    454     dst4.WriteUintToMem(vform, i, addr4);
    455     addr += 4 * esize;
    456     addr2 += 4 * esize;
    457     addr3 += 4 * esize;
    458     addr4 += 4 * esize;
    459   }
    460 }
    461 
    462 
    463 void Simulator::st4(VectorFormat vform,
    464                     LogicVRegister dst,
    465                     LogicVRegister dst2,
    466                     LogicVRegister dst3,
    467                     LogicVRegister dst4,
    468                     int index,
    469                     uint64_t addr) {
    470   int esize = LaneSizeInBytesFromFormat(vform);
    471   dst.WriteUintToMem(vform, index, addr);
    472   dst2.WriteUintToMem(vform, index, addr + 1 * esize);
    473   dst3.WriteUintToMem(vform, index, addr + 2 * esize);
    474   dst4.WriteUintToMem(vform, index, addr + 3 * esize);
    475 }
    476 
    477 
    478 LogicVRegister Simulator::cmp(VectorFormat vform,
    479                               LogicVRegister dst,
    480                               const LogicVRegister& src1,
    481                               const LogicVRegister& src2,
    482                               Condition cond) {
    483   dst.ClearForWrite(vform);
    484   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    485     int64_t sa = src1.Int(vform, i);
    486     int64_t sb = src2.Int(vform, i);
    487     uint64_t ua = src1.Uint(vform, i);
    488     uint64_t ub = src2.Uint(vform, i);
    489     bool result = false;
    490     switch (cond) {
    491       case eq:
    492         result = (ua == ub);
    493         break;
    494       case ge:
    495         result = (sa >= sb);
    496         break;
    497       case gt:
    498         result = (sa > sb);
    499         break;
    500       case hi:
    501         result = (ua > ub);
    502         break;
    503       case hs:
    504         result = (ua >= ub);
    505         break;
    506       case lt:
    507         result = (sa < sb);
    508         break;
    509       case le:
    510         result = (sa <= sb);
    511         break;
    512       default:
    513         VIXL_UNREACHABLE();
    514         break;
    515     }
    516     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
    517   }
    518   return dst;
    519 }
    520 
    521 
    522 LogicVRegister Simulator::cmp(VectorFormat vform,
    523                               LogicVRegister dst,
    524                               const LogicVRegister& src1,
    525                               int imm,
    526                               Condition cond) {
    527   SimVRegister temp;
    528   LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
    529   return cmp(vform, dst, src1, imm_reg, cond);
    530 }
    531 
    532 
    533 LogicVRegister Simulator::cmptst(VectorFormat vform,
    534                                  LogicVRegister dst,
    535                                  const LogicVRegister& src1,
    536                                  const LogicVRegister& src2) {
    537   dst.ClearForWrite(vform);
    538   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    539     uint64_t ua = src1.Uint(vform, i);
    540     uint64_t ub = src2.Uint(vform, i);
    541     dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
    542   }
    543   return dst;
    544 }
    545 
    546 
    547 LogicVRegister Simulator::add(VectorFormat vform,
    548                               LogicVRegister dst,
    549                               const LogicVRegister& src1,
    550                               const LogicVRegister& src2) {
    551   int lane_size = LaneSizeInBitsFromFormat(vform);
    552   dst.ClearForWrite(vform);
    553   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    554     // Test for unsigned saturation.
    555     uint64_t ua = src1.UintLeftJustified(vform, i);
    556     uint64_t ub = src2.UintLeftJustified(vform, i);
    557     uint64_t ur = ua + ub;
    558     if (ur < ua) {
    559       dst.SetUnsignedSat(i, true);
    560     }
    561 
    562     // Test for signed saturation.
    563     bool pos_a = (ua >> 63) == 0;
    564     bool pos_b = (ub >> 63) == 0;
    565     bool pos_r = (ur >> 63) == 0;
    566     // If the signs of the operands are the same, but different from the result,
    567     // there was an overflow.
    568     if ((pos_a == pos_b) && (pos_a != pos_r)) {
    569       dst.SetSignedSat(i, pos_a);
    570     }
    571 
    572     dst.SetInt(vform, i, ur >> (64 - lane_size));
    573   }
    574   return dst;
    575 }
    576 
    577 
    578 LogicVRegister Simulator::addp(VectorFormat vform,
    579                                LogicVRegister dst,
    580                                const LogicVRegister& src1,
    581                                const LogicVRegister& src2) {
    582   SimVRegister temp1, temp2;
    583   uzp1(vform, temp1, src1, src2);
    584   uzp2(vform, temp2, src1, src2);
    585   add(vform, dst, temp1, temp2);
    586   return dst;
    587 }
    588 
    589 
    590 LogicVRegister Simulator::mla(VectorFormat vform,
    591                               LogicVRegister dst,
    592                               const LogicVRegister& src1,
    593                               const LogicVRegister& src2) {
    594   SimVRegister temp;
    595   mul(vform, temp, src1, src2);
    596   add(vform, dst, dst, temp);
    597   return dst;
    598 }
    599 
    600 
    601 LogicVRegister Simulator::mls(VectorFormat vform,
    602                               LogicVRegister dst,
    603                               const LogicVRegister& src1,
    604                               const LogicVRegister& src2) {
    605   SimVRegister temp;
    606   mul(vform, temp, src1, src2);
    607   sub(vform, dst, dst, temp);
    608   return dst;
    609 }
    610 
    611 
    612 LogicVRegister Simulator::mul(VectorFormat vform,
    613                               LogicVRegister dst,
    614                               const LogicVRegister& src1,
    615                               const LogicVRegister& src2) {
    616   dst.ClearForWrite(vform);
    617   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    618     dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
    619   }
    620   return dst;
    621 }
    622 
    623 
    624 LogicVRegister Simulator::mul(VectorFormat vform,
    625                               LogicVRegister dst,
    626                               const LogicVRegister& src1,
    627                               const LogicVRegister& src2,
    628                               int index) {
    629   SimVRegister temp;
    630   VectorFormat indexform = VectorFormatFillQ(vform);
    631   return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
    632 }
    633 
    634 
    635 LogicVRegister Simulator::mla(VectorFormat vform,
    636                               LogicVRegister dst,
    637                               const LogicVRegister& src1,
    638                               const LogicVRegister& src2,
    639                               int index) {
    640   SimVRegister temp;
    641   VectorFormat indexform = VectorFormatFillQ(vform);
    642   return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
    643 }
    644 
    645 
    646 LogicVRegister Simulator::mls(VectorFormat vform,
    647                               LogicVRegister dst,
    648                               const LogicVRegister& src1,
    649                               const LogicVRegister& src2,
    650                               int index) {
    651   SimVRegister temp;
    652   VectorFormat indexform = VectorFormatFillQ(vform);
    653   return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
    654 }
    655 
    656 
    657 LogicVRegister Simulator::smull(VectorFormat vform,
    658                                 LogicVRegister dst,
    659                                 const LogicVRegister& src1,
    660                                 const LogicVRegister& src2,
    661                                 int index) {
    662   SimVRegister temp;
    663   VectorFormat indexform =
    664       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    665   return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    666 }
    667 
    668 
    669 LogicVRegister Simulator::smull2(VectorFormat vform,
    670                                  LogicVRegister dst,
    671                                  const LogicVRegister& src1,
    672                                  const LogicVRegister& src2,
    673                                  int index) {
    674   SimVRegister temp;
    675   VectorFormat indexform =
    676       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    677   return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    678 }
    679 
    680 
    681 LogicVRegister Simulator::umull(VectorFormat vform,
    682                                 LogicVRegister dst,
    683                                 const LogicVRegister& src1,
    684                                 const LogicVRegister& src2,
    685                                 int index) {
    686   SimVRegister temp;
    687   VectorFormat indexform =
    688       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    689   return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    690 }
    691 
    692 
    693 LogicVRegister Simulator::umull2(VectorFormat vform,
    694                                  LogicVRegister dst,
    695                                  const LogicVRegister& src1,
    696                                  const LogicVRegister& src2,
    697                                  int index) {
    698   SimVRegister temp;
    699   VectorFormat indexform =
    700       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    701   return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    702 }
    703 
    704 
    705 LogicVRegister Simulator::smlal(VectorFormat vform,
    706                                 LogicVRegister dst,
    707                                 const LogicVRegister& src1,
    708                                 const LogicVRegister& src2,
    709                                 int index) {
    710   SimVRegister temp;
    711   VectorFormat indexform =
    712       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    713   return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    714 }
    715 
    716 
    717 LogicVRegister Simulator::smlal2(VectorFormat vform,
    718                                  LogicVRegister dst,
    719                                  const LogicVRegister& src1,
    720                                  const LogicVRegister& src2,
    721                                  int index) {
    722   SimVRegister temp;
    723   VectorFormat indexform =
    724       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    725   return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    726 }
    727 
    728 
    729 LogicVRegister Simulator::umlal(VectorFormat vform,
    730                                 LogicVRegister dst,
    731                                 const LogicVRegister& src1,
    732                                 const LogicVRegister& src2,
    733                                 int index) {
    734   SimVRegister temp;
    735   VectorFormat indexform =
    736       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    737   return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    738 }
    739 
    740 
    741 LogicVRegister Simulator::umlal2(VectorFormat vform,
    742                                  LogicVRegister dst,
    743                                  const LogicVRegister& src1,
    744                                  const LogicVRegister& src2,
    745                                  int index) {
    746   SimVRegister temp;
    747   VectorFormat indexform =
    748       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    749   return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    750 }
    751 
    752 
    753 LogicVRegister Simulator::smlsl(VectorFormat vform,
    754                                 LogicVRegister dst,
    755                                 const LogicVRegister& src1,
    756                                 const LogicVRegister& src2,
    757                                 int index) {
    758   SimVRegister temp;
    759   VectorFormat indexform =
    760       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    761   return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    762 }
    763 
    764 
    765 LogicVRegister Simulator::smlsl2(VectorFormat vform,
    766                                  LogicVRegister dst,
    767                                  const LogicVRegister& src1,
    768                                  const LogicVRegister& src2,
    769                                  int index) {
    770   SimVRegister temp;
    771   VectorFormat indexform =
    772       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    773   return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    774 }
    775 
    776 
    777 LogicVRegister Simulator::umlsl(VectorFormat vform,
    778                                 LogicVRegister dst,
    779                                 const LogicVRegister& src1,
    780                                 const LogicVRegister& src2,
    781                                 int index) {
    782   SimVRegister temp;
    783   VectorFormat indexform =
    784       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    785   return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    786 }
    787 
    788 
    789 LogicVRegister Simulator::umlsl2(VectorFormat vform,
    790                                  LogicVRegister dst,
    791                                  const LogicVRegister& src1,
    792                                  const LogicVRegister& src2,
    793                                  int index) {
    794   SimVRegister temp;
    795   VectorFormat indexform =
    796       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    797   return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    798 }
    799 
    800 
    801 LogicVRegister Simulator::sqdmull(VectorFormat vform,
    802                                   LogicVRegister dst,
    803                                   const LogicVRegister& src1,
    804                                   const LogicVRegister& src2,
    805                                   int index) {
    806   SimVRegister temp;
    807   VectorFormat indexform =
    808       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    809   return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
    810 }
    811 
    812 
    813 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
    814                                    LogicVRegister dst,
    815                                    const LogicVRegister& src1,
    816                                    const LogicVRegister& src2,
    817                                    int index) {
    818   SimVRegister temp;
    819   VectorFormat indexform =
    820       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    821   return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    822 }
    823 
    824 
    825 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
    826                                   LogicVRegister dst,
    827                                   const LogicVRegister& src1,
    828                                   const LogicVRegister& src2,
    829                                   int index) {
    830   SimVRegister temp;
    831   VectorFormat indexform =
    832       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    833   return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
    834 }
    835 
    836 
    837 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
    838                                    LogicVRegister dst,
    839                                    const LogicVRegister& src1,
    840                                    const LogicVRegister& src2,
    841                                    int index) {
    842   SimVRegister temp;
    843   VectorFormat indexform =
    844       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    845   return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    846 }
    847 
    848 
    849 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
    850                                   LogicVRegister dst,
    851                                   const LogicVRegister& src1,
    852                                   const LogicVRegister& src2,
    853                                   int index) {
    854   SimVRegister temp;
    855   VectorFormat indexform =
    856       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    857   return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
    858 }
    859 
    860 
    861 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
    862                                    LogicVRegister dst,
    863                                    const LogicVRegister& src1,
    864                                    const LogicVRegister& src2,
    865                                    int index) {
    866   SimVRegister temp;
    867   VectorFormat indexform =
    868       VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
    869   return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
    870 }
    871 
    872 
    873 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
    874                                   LogicVRegister dst,
    875                                   const LogicVRegister& src1,
    876                                   const LogicVRegister& src2,
    877                                   int index) {
    878   SimVRegister temp;
    879   VectorFormat indexform = VectorFormatFillQ(vform);
    880   return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
    881 }
    882 
    883 
    884 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
    885                                    LogicVRegister dst,
    886                                    const LogicVRegister& src1,
    887                                    const LogicVRegister& src2,
    888                                    int index) {
    889   SimVRegister temp;
    890   VectorFormat indexform = VectorFormatFillQ(vform);
    891   return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
    892 }
    893 
    894 
    895 LogicVRegister Simulator::sdot(VectorFormat vform,
    896                                LogicVRegister dst,
    897                                const LogicVRegister& src1,
    898                                const LogicVRegister& src2,
    899                                int index) {
    900   SimVRegister temp;
    901   VectorFormat indexform = VectorFormatFillQ(vform);
    902   return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index));
    903 }
    904 
    905 
    906 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
    907                                    LogicVRegister dst,
    908                                    const LogicVRegister& src1,
    909                                    const LogicVRegister& src2,
    910                                    int index) {
    911   SimVRegister temp;
    912   VectorFormat indexform = VectorFormatFillQ(vform);
    913   return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
    914 }
    915 
    916 
    917 LogicVRegister Simulator::udot(VectorFormat vform,
    918                                LogicVRegister dst,
    919                                const LogicVRegister& src1,
    920                                const LogicVRegister& src2,
    921                                int index) {
    922   SimVRegister temp;
    923   VectorFormat indexform = VectorFormatFillQ(vform);
    924   return udot(vform, dst, src1, dup_element(indexform, temp, src2, index));
    925 }
    926 
    927 
    928 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
    929                                    LogicVRegister dst,
    930                                    const LogicVRegister& src1,
    931                                    const LogicVRegister& src2,
    932                                    int index) {
    933   SimVRegister temp;
    934   VectorFormat indexform = VectorFormatFillQ(vform);
    935   return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
    936 }
    937 
    938 
    939 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
    940   uint16_t result = 0;
    941   uint16_t extended_op2 = op2;
    942   for (int i = 0; i < 8; ++i) {
    943     if ((op1 >> i) & 1) {
    944       result = result ^ (extended_op2 << i);
    945     }
    946   }
    947   return result;
    948 }
    949 
    950 
    951 LogicVRegister Simulator::pmul(VectorFormat vform,
    952                                LogicVRegister dst,
    953                                const LogicVRegister& src1,
    954                                const LogicVRegister& src2) {
    955   dst.ClearForWrite(vform);
    956   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    957     dst.SetUint(vform,
    958                 i,
    959                 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
    960   }
    961   return dst;
    962 }
    963 
    964 
    965 LogicVRegister Simulator::pmull(VectorFormat vform,
    966                                 LogicVRegister dst,
    967                                 const LogicVRegister& src1,
    968                                 const LogicVRegister& src2) {
    969   VectorFormat vform_src = VectorFormatHalfWidth(vform);
    970   dst.ClearForWrite(vform);
    971   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
    972     dst.SetUint(vform,
    973                 i,
    974                 PolynomialMult(src1.Uint(vform_src, i),
    975                                src2.Uint(vform_src, i)));
    976   }
    977   return dst;
    978 }
    979 
    980 
    981 LogicVRegister Simulator::pmull2(VectorFormat vform,
    982                                  LogicVRegister dst,
    983                                  const LogicVRegister& src1,
    984                                  const LogicVRegister& src2) {
    985   VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
    986   dst.ClearForWrite(vform);
    987   int lane_count = LaneCountFromFormat(vform);
    988   for (int i = 0; i < lane_count; i++) {
    989     dst.SetUint(vform,
    990                 i,
    991                 PolynomialMult(src1.Uint(vform_src, lane_count + i),
    992                                src2.Uint(vform_src, lane_count + i)));
    993   }
    994   return dst;
    995 }
    996 
    997 
    998 LogicVRegister Simulator::sub(VectorFormat vform,
    999                               LogicVRegister dst,
   1000                               const LogicVRegister& src1,
   1001                               const LogicVRegister& src2) {
   1002   int lane_size = LaneSizeInBitsFromFormat(vform);
   1003   dst.ClearForWrite(vform);
   1004   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1005     // Test for unsigned saturation.
   1006     uint64_t ua = src1.UintLeftJustified(vform, i);
   1007     uint64_t ub = src2.UintLeftJustified(vform, i);
   1008     uint64_t ur = ua - ub;
   1009     if (ub > ua) {
   1010       dst.SetUnsignedSat(i, false);
   1011     }
   1012 
   1013     // Test for signed saturation.
   1014     bool pos_a = (ua >> 63) == 0;
   1015     bool pos_b = (ub >> 63) == 0;
   1016     bool pos_r = (ur >> 63) == 0;
   1017     // If the signs of the operands are different, and the sign of the first
   1018     // operand doesn't match the result, there was an overflow.
   1019     if ((pos_a != pos_b) && (pos_a != pos_r)) {
   1020       dst.SetSignedSat(i, pos_a);
   1021     }
   1022 
   1023     dst.SetInt(vform, i, ur >> (64 - lane_size));
   1024   }
   1025   return dst;
   1026 }
   1027 
   1028 
   1029 LogicVRegister Simulator::and_(VectorFormat vform,
   1030                                LogicVRegister dst,
   1031                                const LogicVRegister& src1,
   1032                                const LogicVRegister& src2) {
   1033   dst.ClearForWrite(vform);
   1034   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1035     dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
   1036   }
   1037   return dst;
   1038 }
   1039 
   1040 
   1041 LogicVRegister Simulator::orr(VectorFormat vform,
   1042                               LogicVRegister dst,
   1043                               const LogicVRegister& src1,
   1044                               const LogicVRegister& src2) {
   1045   dst.ClearForWrite(vform);
   1046   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1047     dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
   1048   }
   1049   return dst;
   1050 }
   1051 
   1052 
   1053 LogicVRegister Simulator::orn(VectorFormat vform,
   1054                               LogicVRegister dst,
   1055                               const LogicVRegister& src1,
   1056                               const LogicVRegister& src2) {
   1057   dst.ClearForWrite(vform);
   1058   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1059     dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
   1060   }
   1061   return dst;
   1062 }
   1063 
   1064 
   1065 LogicVRegister Simulator::eor(VectorFormat vform,
   1066                               LogicVRegister dst,
   1067                               const LogicVRegister& src1,
   1068                               const LogicVRegister& src2) {
   1069   dst.ClearForWrite(vform);
   1070   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1071     dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
   1072   }
   1073   return dst;
   1074 }
   1075 
   1076 
   1077 LogicVRegister Simulator::bic(VectorFormat vform,
   1078                               LogicVRegister dst,
   1079                               const LogicVRegister& src1,
   1080                               const LogicVRegister& src2) {
   1081   dst.ClearForWrite(vform);
   1082   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1083     dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
   1084   }
   1085   return dst;
   1086 }
   1087 
   1088 
   1089 LogicVRegister Simulator::bic(VectorFormat vform,
   1090                               LogicVRegister dst,
   1091                               const LogicVRegister& src,
   1092                               uint64_t imm) {
   1093   uint64_t result[16];
   1094   int laneCount = LaneCountFromFormat(vform);
   1095   for (int i = 0; i < laneCount; ++i) {
   1096     result[i] = src.Uint(vform, i) & ~imm;
   1097   }
   1098   dst.ClearForWrite(vform);
   1099   for (int i = 0; i < laneCount; ++i) {
   1100     dst.SetUint(vform, i, result[i]);
   1101   }
   1102   return dst;
   1103 }
   1104 
   1105 
   1106 LogicVRegister Simulator::bif(VectorFormat vform,
   1107                               LogicVRegister dst,
   1108                               const LogicVRegister& src1,
   1109                               const LogicVRegister& src2) {
   1110   dst.ClearForWrite(vform);
   1111   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1112     uint64_t operand1 = dst.Uint(vform, i);
   1113     uint64_t operand2 = ~src2.Uint(vform, i);
   1114     uint64_t operand3 = src1.Uint(vform, i);
   1115     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1116     dst.SetUint(vform, i, result);
   1117   }
   1118   return dst;
   1119 }
   1120 
   1121 
   1122 LogicVRegister Simulator::bit(VectorFormat vform,
   1123                               LogicVRegister dst,
   1124                               const LogicVRegister& src1,
   1125                               const LogicVRegister& src2) {
   1126   dst.ClearForWrite(vform);
   1127   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1128     uint64_t operand1 = dst.Uint(vform, i);
   1129     uint64_t operand2 = src2.Uint(vform, i);
   1130     uint64_t operand3 = src1.Uint(vform, i);
   1131     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1132     dst.SetUint(vform, i, result);
   1133   }
   1134   return dst;
   1135 }
   1136 
   1137 
   1138 LogicVRegister Simulator::bsl(VectorFormat vform,
   1139                               LogicVRegister dst,
   1140                               const LogicVRegister& src1,
   1141                               const LogicVRegister& src2) {
   1142   dst.ClearForWrite(vform);
   1143   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1144     uint64_t operand1 = src2.Uint(vform, i);
   1145     uint64_t operand2 = dst.Uint(vform, i);
   1146     uint64_t operand3 = src1.Uint(vform, i);
   1147     uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
   1148     dst.SetUint(vform, i, result);
   1149   }
   1150   return dst;
   1151 }
   1152 
   1153 
   1154 LogicVRegister Simulator::sminmax(VectorFormat vform,
   1155                                   LogicVRegister dst,
   1156                                   const LogicVRegister& src1,
   1157                                   const LogicVRegister& src2,
   1158                                   bool max) {
   1159   dst.ClearForWrite(vform);
   1160   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1161     int64_t src1_val = src1.Int(vform, i);
   1162     int64_t src2_val = src2.Int(vform, i);
   1163     int64_t dst_val;
   1164     if (max) {
   1165       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1166     } else {
   1167       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1168     }
   1169     dst.SetInt(vform, i, dst_val);
   1170   }
   1171   return dst;
   1172 }
   1173 
   1174 
   1175 LogicVRegister Simulator::smax(VectorFormat vform,
   1176                                LogicVRegister dst,
   1177                                const LogicVRegister& src1,
   1178                                const LogicVRegister& src2) {
   1179   return sminmax(vform, dst, src1, src2, true);
   1180 }
   1181 
   1182 
   1183 LogicVRegister Simulator::smin(VectorFormat vform,
   1184                                LogicVRegister dst,
   1185                                const LogicVRegister& src1,
   1186                                const LogicVRegister& src2) {
   1187   return sminmax(vform, dst, src1, src2, false);
   1188 }
   1189 
   1190 
   1191 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
   1192                                    LogicVRegister dst,
   1193                                    const LogicVRegister& src1,
   1194                                    const LogicVRegister& src2,
   1195                                    bool max) {
   1196   int lanes = LaneCountFromFormat(vform);
   1197   int64_t result[kMaxLanesPerVector];
   1198   const LogicVRegister* src = &src1;
   1199   for (int j = 0; j < 2; j++) {
   1200     for (int i = 0; i < lanes; i += 2) {
   1201       int64_t first_val = src->Int(vform, i);
   1202       int64_t second_val = src->Int(vform, i + 1);
   1203       int64_t dst_val;
   1204       if (max) {
   1205         dst_val = (first_val > second_val) ? first_val : second_val;
   1206       } else {
   1207         dst_val = (first_val < second_val) ? first_val : second_val;
   1208       }
   1209       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
   1210       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1211     }
   1212     src = &src2;
   1213   }
   1214   dst.SetIntArray(vform, result);
   1215   return dst;
   1216 }
   1217 
   1218 
   1219 LogicVRegister Simulator::smaxp(VectorFormat vform,
   1220                                 LogicVRegister dst,
   1221                                 const LogicVRegister& src1,
   1222                                 const LogicVRegister& src2) {
   1223   return sminmaxp(vform, dst, src1, src2, true);
   1224 }
   1225 
   1226 
   1227 LogicVRegister Simulator::sminp(VectorFormat vform,
   1228                                 LogicVRegister dst,
   1229                                 const LogicVRegister& src1,
   1230                                 const LogicVRegister& src2) {
   1231   return sminmaxp(vform, dst, src1, src2, false);
   1232 }
   1233 
   1234 
   1235 LogicVRegister Simulator::addp(VectorFormat vform,
   1236                                LogicVRegister dst,
   1237                                const LogicVRegister& src) {
   1238   VIXL_ASSERT(vform == kFormatD);
   1239 
   1240   uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
   1241   dst.ClearForWrite(vform);
   1242   dst.SetUint(vform, 0, dst_val);
   1243   return dst;
   1244 }
   1245 
   1246 
   1247 LogicVRegister Simulator::addv(VectorFormat vform,
   1248                                LogicVRegister dst,
   1249                                const LogicVRegister& src) {
   1250   VectorFormat vform_dst =
   1251       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
   1252 
   1253 
   1254   int64_t dst_val = 0;
   1255   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1256     dst_val += src.Int(vform, i);
   1257   }
   1258 
   1259   dst.ClearForWrite(vform_dst);
   1260   dst.SetInt(vform_dst, 0, dst_val);
   1261   return dst;
   1262 }
   1263 
   1264 
   1265 LogicVRegister Simulator::saddlv(VectorFormat vform,
   1266                                  LogicVRegister dst,
   1267                                  const LogicVRegister& src) {
   1268   VectorFormat vform_dst =
   1269       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1270 
   1271   int64_t dst_val = 0;
   1272   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1273     dst_val += src.Int(vform, i);
   1274   }
   1275 
   1276   dst.ClearForWrite(vform_dst);
   1277   dst.SetInt(vform_dst, 0, dst_val);
   1278   return dst;
   1279 }
   1280 
   1281 
   1282 LogicVRegister Simulator::uaddlv(VectorFormat vform,
   1283                                  LogicVRegister dst,
   1284                                  const LogicVRegister& src) {
   1285   VectorFormat vform_dst =
   1286       ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
   1287 
   1288   uint64_t dst_val = 0;
   1289   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1290     dst_val += src.Uint(vform, i);
   1291   }
   1292 
   1293   dst.ClearForWrite(vform_dst);
   1294   dst.SetUint(vform_dst, 0, dst_val);
   1295   return dst;
   1296 }
   1297 
   1298 
   1299 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
   1300                                    LogicVRegister dst,
   1301                                    const LogicVRegister& src,
   1302                                    bool max) {
   1303   int64_t dst_val = max ? INT64_MIN : INT64_MAX;
   1304   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1305     int64_t src_val = src.Int(vform, i);
   1306     if (max) {
   1307       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1308     } else {
   1309       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1310     }
   1311   }
   1312   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1313   dst.SetInt(vform, 0, dst_val);
   1314   return dst;
   1315 }
   1316 
   1317 
   1318 LogicVRegister Simulator::smaxv(VectorFormat vform,
   1319                                 LogicVRegister dst,
   1320                                 const LogicVRegister& src) {
   1321   sminmaxv(vform, dst, src, true);
   1322   return dst;
   1323 }
   1324 
   1325 
   1326 LogicVRegister Simulator::sminv(VectorFormat vform,
   1327                                 LogicVRegister dst,
   1328                                 const LogicVRegister& src) {
   1329   sminmaxv(vform, dst, src, false);
   1330   return dst;
   1331 }
   1332 
   1333 
   1334 LogicVRegister Simulator::uminmax(VectorFormat vform,
   1335                                   LogicVRegister dst,
   1336                                   const LogicVRegister& src1,
   1337                                   const LogicVRegister& src2,
   1338                                   bool max) {
   1339   dst.ClearForWrite(vform);
   1340   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1341     uint64_t src1_val = src1.Uint(vform, i);
   1342     uint64_t src2_val = src2.Uint(vform, i);
   1343     uint64_t dst_val;
   1344     if (max) {
   1345       dst_val = (src1_val > src2_val) ? src1_val : src2_val;
   1346     } else {
   1347       dst_val = (src1_val < src2_val) ? src1_val : src2_val;
   1348     }
   1349     dst.SetUint(vform, i, dst_val);
   1350   }
   1351   return dst;
   1352 }
   1353 
   1354 
   1355 LogicVRegister Simulator::umax(VectorFormat vform,
   1356                                LogicVRegister dst,
   1357                                const LogicVRegister& src1,
   1358                                const LogicVRegister& src2) {
   1359   return uminmax(vform, dst, src1, src2, true);
   1360 }
   1361 
   1362 
   1363 LogicVRegister Simulator::umin(VectorFormat vform,
   1364                                LogicVRegister dst,
   1365                                const LogicVRegister& src1,
   1366                                const LogicVRegister& src2) {
   1367   return uminmax(vform, dst, src1, src2, false);
   1368 }
   1369 
   1370 
   1371 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
   1372                                    LogicVRegister dst,
   1373                                    const LogicVRegister& src1,
   1374                                    const LogicVRegister& src2,
   1375                                    bool max) {
   1376   int lanes = LaneCountFromFormat(vform);
   1377   uint64_t result[kMaxLanesPerVector];
   1378   const LogicVRegister* src = &src1;
   1379   for (int j = 0; j < 2; j++) {
   1380     for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
   1381       uint64_t first_val = src->Uint(vform, i);
   1382       uint64_t second_val = src->Uint(vform, i + 1);
   1383       uint64_t dst_val;
   1384       if (max) {
   1385         dst_val = (first_val > second_val) ? first_val : second_val;
   1386       } else {
   1387         dst_val = (first_val < second_val) ? first_val : second_val;
   1388       }
   1389       VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
   1390       result[(i >> 1) + (j * lanes / 2)] = dst_val;
   1391     }
   1392     src = &src2;
   1393   }
   1394   dst.SetUintArray(vform, result);
   1395   return dst;
   1396 }
   1397 
   1398 
   1399 LogicVRegister Simulator::umaxp(VectorFormat vform,
   1400                                 LogicVRegister dst,
   1401                                 const LogicVRegister& src1,
   1402                                 const LogicVRegister& src2) {
   1403   return uminmaxp(vform, dst, src1, src2, true);
   1404 }
   1405 
   1406 
   1407 LogicVRegister Simulator::uminp(VectorFormat vform,
   1408                                 LogicVRegister dst,
   1409                                 const LogicVRegister& src1,
   1410                                 const LogicVRegister& src2) {
   1411   return uminmaxp(vform, dst, src1, src2, false);
   1412 }
   1413 
   1414 
   1415 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
   1416                                    LogicVRegister dst,
   1417                                    const LogicVRegister& src,
   1418                                    bool max) {
   1419   uint64_t dst_val = max ? 0 : UINT64_MAX;
   1420   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1421     uint64_t src_val = src.Uint(vform, i);
   1422     if (max) {
   1423       dst_val = (src_val > dst_val) ? src_val : dst_val;
   1424     } else {
   1425       dst_val = (src_val < dst_val) ? src_val : dst_val;
   1426     }
   1427   }
   1428   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   1429   dst.SetUint(vform, 0, dst_val);
   1430   return dst;
   1431 }
   1432 
   1433 
   1434 LogicVRegister Simulator::umaxv(VectorFormat vform,
   1435                                 LogicVRegister dst,
   1436                                 const LogicVRegister& src) {
   1437   uminmaxv(vform, dst, src, true);
   1438   return dst;
   1439 }
   1440 
   1441 
   1442 LogicVRegister Simulator::uminv(VectorFormat vform,
   1443                                 LogicVRegister dst,
   1444                                 const LogicVRegister& src) {
   1445   uminmaxv(vform, dst, src, false);
   1446   return dst;
   1447 }
   1448 
   1449 
   1450 LogicVRegister Simulator::shl(VectorFormat vform,
   1451                               LogicVRegister dst,
   1452                               const LogicVRegister& src,
   1453                               int shift) {
   1454   VIXL_ASSERT(shift >= 0);
   1455   SimVRegister temp;
   1456   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1457   return ushl(vform, dst, src, shiftreg);
   1458 }
   1459 
   1460 
   1461 LogicVRegister Simulator::sshll(VectorFormat vform,
   1462                                 LogicVRegister dst,
   1463                                 const LogicVRegister& src,
   1464                                 int shift) {
   1465   VIXL_ASSERT(shift >= 0);
   1466   SimVRegister temp1, temp2;
   1467   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1468   LogicVRegister extendedreg = sxtl(vform, temp2, src);
   1469   return sshl(vform, dst, extendedreg, shiftreg);
   1470 }
   1471 
   1472 
   1473 LogicVRegister Simulator::sshll2(VectorFormat vform,
   1474                                  LogicVRegister dst,
   1475                                  const LogicVRegister& src,
   1476                                  int shift) {
   1477   VIXL_ASSERT(shift >= 0);
   1478   SimVRegister temp1, temp2;
   1479   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1480   LogicVRegister extendedreg = sxtl2(vform, temp2, src);
   1481   return sshl(vform, dst, extendedreg, shiftreg);
   1482 }
   1483 
   1484 
   1485 LogicVRegister Simulator::shll(VectorFormat vform,
   1486                                LogicVRegister dst,
   1487                                const LogicVRegister& src) {
   1488   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1489   return sshll(vform, dst, src, shift);
   1490 }
   1491 
   1492 
   1493 LogicVRegister Simulator::shll2(VectorFormat vform,
   1494                                 LogicVRegister dst,
   1495                                 const LogicVRegister& src) {
   1496   int shift = LaneSizeInBitsFromFormat(vform) / 2;
   1497   return sshll2(vform, dst, src, shift);
   1498 }
   1499 
   1500 
   1501 LogicVRegister Simulator::ushll(VectorFormat vform,
   1502                                 LogicVRegister dst,
   1503                                 const LogicVRegister& src,
   1504                                 int shift) {
   1505   VIXL_ASSERT(shift >= 0);
   1506   SimVRegister temp1, temp2;
   1507   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1508   LogicVRegister extendedreg = uxtl(vform, temp2, src);
   1509   return ushl(vform, dst, extendedreg, shiftreg);
   1510 }
   1511 
   1512 
   1513 LogicVRegister Simulator::ushll2(VectorFormat vform,
   1514                                  LogicVRegister dst,
   1515                                  const LogicVRegister& src,
   1516                                  int shift) {
   1517   VIXL_ASSERT(shift >= 0);
   1518   SimVRegister temp1, temp2;
   1519   LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
   1520   LogicVRegister extendedreg = uxtl2(vform, temp2, src);
   1521   return ushl(vform, dst, extendedreg, shiftreg);
   1522 }
   1523 
   1524 
   1525 LogicVRegister Simulator::sli(VectorFormat vform,
   1526                               LogicVRegister dst,
   1527                               const LogicVRegister& src,
   1528                               int shift) {
   1529   dst.ClearForWrite(vform);
   1530   int laneCount = LaneCountFromFormat(vform);
   1531   for (int i = 0; i < laneCount; i++) {
   1532     uint64_t src_lane = src.Uint(vform, i);
   1533     uint64_t dst_lane = dst.Uint(vform, i);
   1534     uint64_t shifted = src_lane << shift;
   1535     uint64_t mask = MaxUintFromFormat(vform) << shift;
   1536     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1537   }
   1538   return dst;
   1539 }
   1540 
   1541 
   1542 LogicVRegister Simulator::sqshl(VectorFormat vform,
   1543                                 LogicVRegister dst,
   1544                                 const LogicVRegister& src,
   1545                                 int shift) {
   1546   VIXL_ASSERT(shift >= 0);
   1547   SimVRegister temp;
   1548   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1549   return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
   1550 }
   1551 
   1552 
   1553 LogicVRegister Simulator::uqshl(VectorFormat vform,
   1554                                 LogicVRegister dst,
   1555                                 const LogicVRegister& src,
   1556                                 int shift) {
   1557   VIXL_ASSERT(shift >= 0);
   1558   SimVRegister temp;
   1559   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1560   return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1561 }
   1562 
   1563 
   1564 LogicVRegister Simulator::sqshlu(VectorFormat vform,
   1565                                  LogicVRegister dst,
   1566                                  const LogicVRegister& src,
   1567                                  int shift) {
   1568   VIXL_ASSERT(shift >= 0);
   1569   SimVRegister temp;
   1570   LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
   1571   return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
   1572 }
   1573 
   1574 
   1575 LogicVRegister Simulator::sri(VectorFormat vform,
   1576                               LogicVRegister dst,
   1577                               const LogicVRegister& src,
   1578                               int shift) {
   1579   dst.ClearForWrite(vform);
   1580   int laneCount = LaneCountFromFormat(vform);
   1581   VIXL_ASSERT((shift > 0) &&
   1582               (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
   1583   for (int i = 0; i < laneCount; i++) {
   1584     uint64_t src_lane = src.Uint(vform, i);
   1585     uint64_t dst_lane = dst.Uint(vform, i);
   1586     uint64_t shifted;
   1587     uint64_t mask;
   1588     if (shift == 64) {
   1589       shifted = 0;
   1590       mask = 0;
   1591     } else {
   1592       shifted = src_lane >> shift;
   1593       mask = MaxUintFromFormat(vform) >> shift;
   1594     }
   1595     dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
   1596   }
   1597   return dst;
   1598 }
   1599 
   1600 
   1601 LogicVRegister Simulator::ushr(VectorFormat vform,
   1602                                LogicVRegister dst,
   1603                                const LogicVRegister& src,
   1604                                int shift) {
   1605   VIXL_ASSERT(shift >= 0);
   1606   SimVRegister temp;
   1607   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1608   return ushl(vform, dst, src, shiftreg);
   1609 }
   1610 
   1611 
   1612 LogicVRegister Simulator::sshr(VectorFormat vform,
   1613                                LogicVRegister dst,
   1614                                const LogicVRegister& src,
   1615                                int shift) {
   1616   VIXL_ASSERT(shift >= 0);
   1617   SimVRegister temp;
   1618   LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
   1619   return sshl(vform, dst, src, shiftreg);
   1620 }
   1621 
   1622 
   1623 LogicVRegister Simulator::ssra(VectorFormat vform,
   1624                                LogicVRegister dst,
   1625                                const LogicVRegister& src,
   1626                                int shift) {
   1627   SimVRegister temp;
   1628   LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
   1629   return add(vform, dst, dst, shifted_reg);
   1630 }
   1631 
   1632 
   1633 LogicVRegister Simulator::usra(VectorFormat vform,
   1634                                LogicVRegister dst,
   1635                                const LogicVRegister& src,
   1636                                int shift) {
   1637   SimVRegister temp;
   1638   LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
   1639   return add(vform, dst, dst, shifted_reg);
   1640 }
   1641 
   1642 
   1643 LogicVRegister Simulator::srsra(VectorFormat vform,
   1644                                 LogicVRegister dst,
   1645                                 const LogicVRegister& src,
   1646                                 int shift) {
   1647   SimVRegister temp;
   1648   LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
   1649   return add(vform, dst, dst, shifted_reg);
   1650 }
   1651 
   1652 
   1653 LogicVRegister Simulator::ursra(VectorFormat vform,
   1654                                 LogicVRegister dst,
   1655                                 const LogicVRegister& src,
   1656                                 int shift) {
   1657   SimVRegister temp;
   1658   LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
   1659   return add(vform, dst, dst, shifted_reg);
   1660 }
   1661 
   1662 
   1663 LogicVRegister Simulator::cls(VectorFormat vform,
   1664                               LogicVRegister dst,
   1665                               const LogicVRegister& src) {
   1666   uint64_t result[16];
   1667   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1668   int laneCount = LaneCountFromFormat(vform);
   1669   for (int i = 0; i < laneCount; i++) {
   1670     result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
   1671   }
   1672 
   1673   dst.ClearForWrite(vform);
   1674   for (int i = 0; i < laneCount; ++i) {
   1675     dst.SetUint(vform, i, result[i]);
   1676   }
   1677   return dst;
   1678 }
   1679 
   1680 
   1681 LogicVRegister Simulator::clz(VectorFormat vform,
   1682                               LogicVRegister dst,
   1683                               const LogicVRegister& src) {
   1684   uint64_t result[16];
   1685   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1686   int laneCount = LaneCountFromFormat(vform);
   1687   for (int i = 0; i < laneCount; i++) {
   1688     result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
   1689   }
   1690 
   1691   dst.ClearForWrite(vform);
   1692   for (int i = 0; i < laneCount; ++i) {
   1693     dst.SetUint(vform, i, result[i]);
   1694   }
   1695   return dst;
   1696 }
   1697 
   1698 
   1699 LogicVRegister Simulator::cnt(VectorFormat vform,
   1700                               LogicVRegister dst,
   1701                               const LogicVRegister& src) {
   1702   uint64_t result[16];
   1703   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   1704   int laneCount = LaneCountFromFormat(vform);
   1705   for (int i = 0; i < laneCount; i++) {
   1706     uint64_t value = src.Uint(vform, i);
   1707     result[i] = 0;
   1708     for (int j = 0; j < laneSizeInBits; j++) {
   1709       result[i] += (value & 1);
   1710       value >>= 1;
   1711     }
   1712   }
   1713 
   1714   dst.ClearForWrite(vform);
   1715   for (int i = 0; i < laneCount; ++i) {
   1716     dst.SetUint(vform, i, result[i]);
   1717   }
   1718   return dst;
   1719 }
   1720 
   1721 
   1722 LogicVRegister Simulator::sshl(VectorFormat vform,
   1723                                LogicVRegister dst,
   1724                                const LogicVRegister& src1,
   1725                                const LogicVRegister& src2) {
   1726   dst.ClearForWrite(vform);
   1727   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1728     int8_t shift_val = src2.Int(vform, i);
   1729     int64_t lj_src_val = src1.IntLeftJustified(vform, i);
   1730 
   1731     // Set signed saturation state.
   1732     if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
   1733       dst.SetSignedSat(i, lj_src_val >= 0);
   1734     }
   1735 
   1736     // Set unsigned saturation state.
   1737     if (lj_src_val < 0) {
   1738       dst.SetUnsignedSat(i, false);
   1739     } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
   1740                (lj_src_val != 0)) {
   1741       dst.SetUnsignedSat(i, true);
   1742     }
   1743 
   1744     int64_t src_val = src1.Int(vform, i);
   1745     bool src_is_negative = src_val < 0;
   1746     if (shift_val > 63) {
   1747       dst.SetInt(vform, i, 0);
   1748     } else if (shift_val < -63) {
   1749       dst.SetRounding(i, src_is_negative);
   1750       dst.SetInt(vform, i, src_is_negative ? -1 : 0);
   1751     } else {
   1752       // Use unsigned types for shifts, as behaviour is undefined for signed
   1753       // lhs.
   1754       uint64_t usrc_val = static_cast<uint64_t>(src_val);
   1755 
   1756       if (shift_val < 0) {
   1757         // Convert to right shift.
   1758         shift_val = -shift_val;
   1759 
   1760         // Set rounding state by testing most-significant bit shifted out.
   1761         // Rounding only needed on right shifts.
   1762         if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
   1763           dst.SetRounding(i, true);
   1764         }
   1765 
   1766         usrc_val >>= shift_val;
   1767 
   1768         if (src_is_negative) {
   1769           // Simulate sign-extension.
   1770           usrc_val |= (~UINT64_C(0) << (64 - shift_val));
   1771         }
   1772       } else {
   1773         usrc_val <<= shift_val;
   1774       }
   1775       dst.SetUint(vform, i, usrc_val);
   1776     }
   1777   }
   1778   return dst;
   1779 }
   1780 
   1781 
   1782 LogicVRegister Simulator::ushl(VectorFormat vform,
   1783                                LogicVRegister dst,
   1784                                const LogicVRegister& src1,
   1785                                const LogicVRegister& src2) {
   1786   dst.ClearForWrite(vform);
   1787   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1788     int8_t shift_val = src2.Int(vform, i);
   1789     uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
   1790 
   1791     // Set saturation state.
   1792     if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
   1793       dst.SetUnsignedSat(i, true);
   1794     }
   1795 
   1796     uint64_t src_val = src1.Uint(vform, i);
   1797     if ((shift_val > 63) || (shift_val < -64)) {
   1798       dst.SetUint(vform, i, 0);
   1799     } else {
   1800       if (shift_val < 0) {
   1801         // Set rounding state. Rounding only needed on right shifts.
   1802         if (((src_val >> (-shift_val - 1)) & 1) == 1) {
   1803           dst.SetRounding(i, true);
   1804         }
   1805 
   1806         if (shift_val == -64) {
   1807           src_val = 0;
   1808         } else {
   1809           src_val >>= -shift_val;
   1810         }
   1811       } else {
   1812         src_val <<= shift_val;
   1813       }
   1814       dst.SetUint(vform, i, src_val);
   1815     }
   1816   }
   1817   return dst;
   1818 }
   1819 
   1820 
   1821 LogicVRegister Simulator::neg(VectorFormat vform,
   1822                               LogicVRegister dst,
   1823                               const LogicVRegister& src) {
   1824   dst.ClearForWrite(vform);
   1825   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1826     // Test for signed saturation.
   1827     int64_t sa = src.Int(vform, i);
   1828     if (sa == MinIntFromFormat(vform)) {
   1829       dst.SetSignedSat(i, true);
   1830     }
   1831     dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   1832   }
   1833   return dst;
   1834 }
   1835 
   1836 
   1837 LogicVRegister Simulator::suqadd(VectorFormat vform,
   1838                                  LogicVRegister dst,
   1839                                  const LogicVRegister& src) {
   1840   dst.ClearForWrite(vform);
   1841   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1842     int64_t sa = dst.IntLeftJustified(vform, i);
   1843     uint64_t ub = src.UintLeftJustified(vform, i);
   1844     uint64_t ur = sa + ub;
   1845 
   1846     int64_t sr;
   1847     memcpy(&sr, &ur, sizeof(sr));
   1848     if (sr < sa) {  // Test for signed positive saturation.
   1849       dst.SetInt(vform, i, MaxIntFromFormat(vform));
   1850     } else {
   1851       dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
   1852     }
   1853   }
   1854   return dst;
   1855 }
   1856 
   1857 
   1858 LogicVRegister Simulator::usqadd(VectorFormat vform,
   1859                                  LogicVRegister dst,
   1860                                  const LogicVRegister& src) {
   1861   dst.ClearForWrite(vform);
   1862   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1863     uint64_t ua = dst.UintLeftJustified(vform, i);
   1864     int64_t sb = src.IntLeftJustified(vform, i);
   1865     uint64_t ur = ua + sb;
   1866 
   1867     if ((sb > 0) && (ur <= ua)) {
   1868       dst.SetUint(vform, i, MaxUintFromFormat(vform));  // Positive saturation.
   1869     } else if ((sb < 0) && (ur >= ua)) {
   1870       dst.SetUint(vform, i, 0);  // Negative saturation.
   1871     } else {
   1872       dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
   1873     }
   1874   }
   1875   return dst;
   1876 }
   1877 
   1878 
   1879 LogicVRegister Simulator::abs(VectorFormat vform,
   1880                               LogicVRegister dst,
   1881                               const LogicVRegister& src) {
   1882   dst.ClearForWrite(vform);
   1883   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   1884     // Test for signed saturation.
   1885     int64_t sa = src.Int(vform, i);
   1886     if (sa == MinIntFromFormat(vform)) {
   1887       dst.SetSignedSat(i, true);
   1888     }
   1889     if (sa < 0) {
   1890       dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
   1891     } else {
   1892       dst.SetInt(vform, i, sa);
   1893     }
   1894   }
   1895   return dst;
   1896 }
   1897 
   1898 
   1899 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
   1900                                         LogicVRegister dst,
   1901                                         bool dstIsSigned,
   1902                                         const LogicVRegister& src,
   1903                                         bool srcIsSigned) {
   1904   bool upperhalf = false;
   1905   VectorFormat srcform = kFormatUndefined;
   1906   int64_t ssrc[8];
   1907   uint64_t usrc[8];
   1908 
   1909   switch (dstform) {
   1910     case kFormat8B:
   1911       upperhalf = false;
   1912       srcform = kFormat8H;
   1913       break;
   1914     case kFormat16B:
   1915       upperhalf = true;
   1916       srcform = kFormat8H;
   1917       break;
   1918     case kFormat4H:
   1919       upperhalf = false;
   1920       srcform = kFormat4S;
   1921       break;
   1922     case kFormat8H:
   1923       upperhalf = true;
   1924       srcform = kFormat4S;
   1925       break;
   1926     case kFormat2S:
   1927       upperhalf = false;
   1928       srcform = kFormat2D;
   1929       break;
   1930     case kFormat4S:
   1931       upperhalf = true;
   1932       srcform = kFormat2D;
   1933       break;
   1934     case kFormatB:
   1935       upperhalf = false;
   1936       srcform = kFormatH;
   1937       break;
   1938     case kFormatH:
   1939       upperhalf = false;
   1940       srcform = kFormatS;
   1941       break;
   1942     case kFormatS:
   1943       upperhalf = false;
   1944       srcform = kFormatD;
   1945       break;
   1946     default:
   1947       VIXL_UNIMPLEMENTED();
   1948   }
   1949 
   1950   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   1951     ssrc[i] = src.Int(srcform, i);
   1952     usrc[i] = src.Uint(srcform, i);
   1953   }
   1954 
   1955   int offset;
   1956   if (upperhalf) {
   1957     offset = LaneCountFromFormat(dstform) / 2;
   1958   } else {
   1959     offset = 0;
   1960     dst.ClearForWrite(dstform);
   1961   }
   1962 
   1963   for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
   1964     // Test for signed saturation
   1965     if (ssrc[i] > MaxIntFromFormat(dstform)) {
   1966       dst.SetSignedSat(offset + i, true);
   1967     } else if (ssrc[i] < MinIntFromFormat(dstform)) {
   1968       dst.SetSignedSat(offset + i, false);
   1969     }
   1970 
   1971     // Test for unsigned saturation
   1972     if (srcIsSigned) {
   1973       if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
   1974         dst.SetUnsignedSat(offset + i, true);
   1975       } else if (ssrc[i] < 0) {
   1976         dst.SetUnsignedSat(offset + i, false);
   1977       }
   1978     } else {
   1979       if (usrc[i] > MaxUintFromFormat(dstform)) {
   1980         dst.SetUnsignedSat(offset + i, true);
   1981       }
   1982     }
   1983 
   1984     int64_t result;
   1985     if (srcIsSigned) {
   1986       result = ssrc[i] & MaxUintFromFormat(dstform);
   1987     } else {
   1988       result = usrc[i] & MaxUintFromFormat(dstform);
   1989     }
   1990 
   1991     if (dstIsSigned) {
   1992       dst.SetInt(dstform, offset + i, result);
   1993     } else {
   1994       dst.SetUint(dstform, offset + i, result);
   1995     }
   1996   }
   1997   return dst;
   1998 }
   1999 
   2000 
   2001 LogicVRegister Simulator::xtn(VectorFormat vform,
   2002                               LogicVRegister dst,
   2003                               const LogicVRegister& src) {
   2004   return extractnarrow(vform, dst, true, src, true);
   2005 }
   2006 
   2007 
   2008 LogicVRegister Simulator::sqxtn(VectorFormat vform,
   2009                                 LogicVRegister dst,
   2010                                 const LogicVRegister& src) {
   2011   return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
   2012 }
   2013 
   2014 
   2015 LogicVRegister Simulator::sqxtun(VectorFormat vform,
   2016                                  LogicVRegister dst,
   2017                                  const LogicVRegister& src) {
   2018   return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
   2019 }
   2020 
   2021 
   2022 LogicVRegister Simulator::uqxtn(VectorFormat vform,
   2023                                 LogicVRegister dst,
   2024                                 const LogicVRegister& src) {
   2025   return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
   2026 }
   2027 
   2028 
   2029 LogicVRegister Simulator::absdiff(VectorFormat vform,
   2030                                   LogicVRegister dst,
   2031                                   const LogicVRegister& src1,
   2032                                   const LogicVRegister& src2,
   2033                                   bool issigned) {
   2034   dst.ClearForWrite(vform);
   2035   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2036     if (issigned) {
   2037       int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
   2038       sr = sr > 0 ? sr : -sr;
   2039       dst.SetInt(vform, i, sr);
   2040     } else {
   2041       int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
   2042       sr = sr > 0 ? sr : -sr;
   2043       dst.SetUint(vform, i, sr);
   2044     }
   2045   }
   2046   return dst;
   2047 }
   2048 
   2049 
   2050 LogicVRegister Simulator::saba(VectorFormat vform,
   2051                                LogicVRegister dst,
   2052                                const LogicVRegister& src1,
   2053                                const LogicVRegister& src2) {
   2054   SimVRegister temp;
   2055   dst.ClearForWrite(vform);
   2056   absdiff(vform, temp, src1, src2, true);
   2057   add(vform, dst, dst, temp);
   2058   return dst;
   2059 }
   2060 
   2061 
   2062 LogicVRegister Simulator::uaba(VectorFormat vform,
   2063                                LogicVRegister dst,
   2064                                const LogicVRegister& src1,
   2065                                const LogicVRegister& src2) {
   2066   SimVRegister temp;
   2067   dst.ClearForWrite(vform);
   2068   absdiff(vform, temp, src1, src2, false);
   2069   add(vform, dst, dst, temp);
   2070   return dst;
   2071 }
   2072 
   2073 
   2074 LogicVRegister Simulator::not_(VectorFormat vform,
   2075                                LogicVRegister dst,
   2076                                const LogicVRegister& src) {
   2077   dst.ClearForWrite(vform);
   2078   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2079     dst.SetUint(vform, i, ~src.Uint(vform, i));
   2080   }
   2081   return dst;
   2082 }
   2083 
   2084 
   2085 LogicVRegister Simulator::rbit(VectorFormat vform,
   2086                                LogicVRegister dst,
   2087                                const LogicVRegister& src) {
   2088   uint64_t result[16];
   2089   int laneCount = LaneCountFromFormat(vform);
   2090   int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
   2091   uint64_t reversed_value;
   2092   uint64_t value;
   2093   for (int i = 0; i < laneCount; i++) {
   2094     value = src.Uint(vform, i);
   2095     reversed_value = 0;
   2096     for (int j = 0; j < laneSizeInBits; j++) {
   2097       reversed_value = (reversed_value << 1) | (value & 1);
   2098       value >>= 1;
   2099     }
   2100     result[i] = reversed_value;
   2101   }
   2102 
   2103   dst.ClearForWrite(vform);
   2104   for (int i = 0; i < laneCount; ++i) {
   2105     dst.SetUint(vform, i, result[i]);
   2106   }
   2107   return dst;
   2108 }
   2109 
   2110 
   2111 LogicVRegister Simulator::rev(VectorFormat vform,
   2112                               LogicVRegister dst,
   2113                               const LogicVRegister& src,
   2114                               int revSize) {
   2115   uint64_t result[16];
   2116   int laneCount = LaneCountFromFormat(vform);
   2117   int laneSize = LaneSizeInBytesFromFormat(vform);
   2118   int lanesPerLoop = revSize / laneSize;
   2119   for (int i = 0; i < laneCount; i += lanesPerLoop) {
   2120     for (int j = 0; j < lanesPerLoop; j++) {
   2121       result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
   2122     }
   2123   }
   2124   dst.ClearForWrite(vform);
   2125   for (int i = 0; i < laneCount; ++i) {
   2126     dst.SetUint(vform, i, result[i]);
   2127   }
   2128   return dst;
   2129 }
   2130 
   2131 
   2132 LogicVRegister Simulator::rev16(VectorFormat vform,
   2133                                 LogicVRegister dst,
   2134                                 const LogicVRegister& src) {
   2135   return rev(vform, dst, src, 2);
   2136 }
   2137 
   2138 
   2139 LogicVRegister Simulator::rev32(VectorFormat vform,
   2140                                 LogicVRegister dst,
   2141                                 const LogicVRegister& src) {
   2142   return rev(vform, dst, src, 4);
   2143 }
   2144 
   2145 
   2146 LogicVRegister Simulator::rev64(VectorFormat vform,
   2147                                 LogicVRegister dst,
   2148                                 const LogicVRegister& src) {
   2149   return rev(vform, dst, src, 8);
   2150 }
   2151 
   2152 
   2153 LogicVRegister Simulator::addlp(VectorFormat vform,
   2154                                 LogicVRegister dst,
   2155                                 const LogicVRegister& src,
   2156                                 bool is_signed,
   2157                                 bool do_accumulate) {
   2158   VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
   2159   VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
   2160   VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
   2161 
   2162   uint64_t result[8];
   2163   int lane_count = LaneCountFromFormat(vform);
   2164   for (int i = 0; i < lane_count; i++) {
   2165     if (is_signed) {
   2166       result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
   2167                                         src.Int(vformsrc, 2 * i + 1));
   2168     } else {
   2169       result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
   2170     }
   2171   }
   2172 
   2173   dst.ClearForWrite(vform);
   2174   for (int i = 0; i < lane_count; ++i) {
   2175     if (do_accumulate) {
   2176       result[i] += dst.Uint(vform, i);
   2177     }
   2178     dst.SetUint(vform, i, result[i]);
   2179   }
   2180 
   2181   return dst;
   2182 }
   2183 
   2184 
   2185 LogicVRegister Simulator::saddlp(VectorFormat vform,
   2186                                  LogicVRegister dst,
   2187                                  const LogicVRegister& src) {
   2188   return addlp(vform, dst, src, true, false);
   2189 }
   2190 
   2191 
   2192 LogicVRegister Simulator::uaddlp(VectorFormat vform,
   2193                                  LogicVRegister dst,
   2194                                  const LogicVRegister& src) {
   2195   return addlp(vform, dst, src, false, false);
   2196 }
   2197 
   2198 
   2199 LogicVRegister Simulator::sadalp(VectorFormat vform,
   2200                                  LogicVRegister dst,
   2201                                  const LogicVRegister& src) {
   2202   return addlp(vform, dst, src, true, true);
   2203 }
   2204 
   2205 
   2206 LogicVRegister Simulator::uadalp(VectorFormat vform,
   2207                                  LogicVRegister dst,
   2208                                  const LogicVRegister& src) {
   2209   return addlp(vform, dst, src, false, true);
   2210 }
   2211 
   2212 
   2213 LogicVRegister Simulator::ext(VectorFormat vform,
   2214                               LogicVRegister dst,
   2215                               const LogicVRegister& src1,
   2216                               const LogicVRegister& src2,
   2217                               int index) {
   2218   uint8_t result[16];
   2219   int laneCount = LaneCountFromFormat(vform);
   2220   for (int i = 0; i < laneCount - index; ++i) {
   2221     result[i] = src1.Uint(vform, i + index);
   2222   }
   2223   for (int i = 0; i < index; ++i) {
   2224     result[laneCount - index + i] = src2.Uint(vform, i);
   2225   }
   2226   dst.ClearForWrite(vform);
   2227   for (int i = 0; i < laneCount; ++i) {
   2228     dst.SetUint(vform, i, result[i]);
   2229   }
   2230   return dst;
   2231 }
   2232 
   2233 template <typename T>
   2234 LogicVRegister Simulator::fcadd(VectorFormat vform,
   2235                                 LogicVRegister dst,          // d
   2236                                 const LogicVRegister& src1,  // n
   2237                                 const LogicVRegister& src2,  // m
   2238                                 int rot) {
   2239   int elements = LaneCountFromFormat(vform);
   2240 
   2241   T element1, element3;
   2242   rot = (rot == 1) ? 270 : 90;
   2243 
   2244   // Loop example:
   2245   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
   2246   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
   2247 
   2248   for (int e = 0; e <= (elements / 2) - 1; e++) {
   2249     switch (rot) {
   2250       case 90:
   2251         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
   2252         element3 = src2.Float<T>(e * 2);
   2253         break;
   2254       case 270:
   2255         element1 = src2.Float<T>(e * 2 + 1);
   2256         element3 = FPNeg(src2.Float<T>(e * 2));
   2257         break;
   2258       default:
   2259         VIXL_UNREACHABLE();
   2260         return dst;  // prevents "element(n) may be unintialized" errors
   2261     }
   2262     dst.ClearForWrite(vform);
   2263     dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
   2264     dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
   2265   }
   2266   return dst;
   2267 }
   2268 
   2269 
   2270 LogicVRegister Simulator::fcadd(VectorFormat vform,
   2271                                 LogicVRegister dst,          // d
   2272                                 const LogicVRegister& src1,  // n
   2273                                 const LogicVRegister& src2,  // m
   2274                                 int rot) {
   2275   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   2276     VIXL_UNIMPLEMENTED();
   2277   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   2278     fcadd<float>(vform, dst, src1, src2, rot);
   2279   } else {
   2280     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   2281     fcadd<double>(vform, dst, src1, src2, rot);
   2282   }
   2283   return dst;
   2284 }
   2285 
   2286 
   2287 template <typename T>
   2288 LogicVRegister Simulator::fcmla(VectorFormat vform,
   2289                                 LogicVRegister dst,          // d
   2290                                 const LogicVRegister& src1,  // n
   2291                                 const LogicVRegister& src2,  // m
   2292                                 int index,
   2293                                 int rot) {
   2294   int elements = LaneCountFromFormat(vform);
   2295 
   2296   T element1, element2, element3, element4;
   2297   rot *= 90;
   2298 
   2299   // Loop example:
   2300   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
   2301   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
   2302 
   2303   for (int e = 0; e <= (elements / 2) - 1; e++) {
   2304     switch (rot) {
   2305       case 0:
   2306         element1 = src2.Float<T>(index * 2);
   2307         element2 = src1.Float<T>(e * 2);
   2308         element3 = src2.Float<T>(index * 2 + 1);
   2309         element4 = src1.Float<T>(e * 2);
   2310         break;
   2311       case 90:
   2312         element1 = FPNeg(src2.Float<T>(index * 2 + 1));
   2313         element2 = src1.Float<T>(e * 2 + 1);
   2314         element3 = src2.Float<T>(index * 2);
   2315         element4 = src1.Float<T>(e * 2 + 1);
   2316         break;
   2317       case 180:
   2318         element1 = FPNeg(src2.Float<T>(index * 2));
   2319         element2 = src1.Float<T>(e * 2);
   2320         element3 = FPNeg(src2.Float<T>(index * 2 + 1));
   2321         element4 = src1.Float<T>(e * 2);
   2322         break;
   2323       case 270:
   2324         element1 = src2.Float<T>(index * 2 + 1);
   2325         element2 = src1.Float<T>(e * 2 + 1);
   2326         element3 = FPNeg(src2.Float<T>(index * 2));
   2327         element4 = src1.Float<T>(e * 2 + 1);
   2328         break;
   2329       default:
   2330         VIXL_UNREACHABLE();
   2331         return dst;  // prevents "element(n) may be unintialized" errors
   2332     }
   2333     dst.ClearForWrite(vform);
   2334     dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
   2335     dst.SetFloat<T>(e * 2 + 1,
   2336                     FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
   2337   }
   2338   return dst;
   2339 }
   2340 
   2341 
   2342 template <typename T>
   2343 LogicVRegister Simulator::fcmla(VectorFormat vform,
   2344                                 LogicVRegister dst,          // d
   2345                                 const LogicVRegister& src1,  // n
   2346                                 const LogicVRegister& src2,  // m
   2347                                 int rot) {
   2348   int elements = LaneCountFromFormat(vform);
   2349 
   2350   T element1, element2, element3, element4;
   2351   rot *= 90;
   2352 
   2353   // Loop example:
   2354   // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
   2355   // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
   2356 
   2357   for (int e = 0; e <= (elements / 2) - 1; e++) {
   2358     switch (rot) {
   2359       case 0:
   2360         element1 = src2.Float<T>(e * 2);
   2361         element2 = src1.Float<T>(e * 2);
   2362         element3 = src2.Float<T>(e * 2 + 1);
   2363         element4 = src1.Float<T>(e * 2);
   2364         break;
   2365       case 90:
   2366         element1 = FPNeg(src2.Float<T>(e * 2 + 1));
   2367         element2 = src1.Float<T>(e * 2 + 1);
   2368         element3 = src2.Float<T>(e * 2);
   2369         element4 = src1.Float<T>(e * 2 + 1);
   2370         break;
   2371       case 180:
   2372         element1 = FPNeg(src2.Float<T>(e * 2));
   2373         element2 = src1.Float<T>(e * 2);
   2374         element3 = FPNeg(src2.Float<T>(e * 2 + 1));
   2375         element4 = src1.Float<T>(e * 2);
   2376         break;
   2377       case 270:
   2378         element1 = src2.Float<T>(e * 2 + 1);
   2379         element2 = src1.Float<T>(e * 2 + 1);
   2380         element3 = FPNeg(src2.Float<T>(e * 2));
   2381         element4 = src1.Float<T>(e * 2 + 1);
   2382         break;
   2383       default:
   2384         VIXL_UNREACHABLE();
   2385         return dst;  // prevents "element(n) may be unintialized" errors
   2386     }
   2387     dst.ClearForWrite(vform);
   2388     dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
   2389     dst.SetFloat<T>(e * 2 + 1,
   2390                     FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
   2391   }
   2392   return dst;
   2393 }
   2394 
   2395 
   2396 LogicVRegister Simulator::fcmla(VectorFormat vform,
   2397                                 LogicVRegister dst,          // d
   2398                                 const LogicVRegister& src1,  // n
   2399                                 const LogicVRegister& src2,  // m
   2400                                 int rot) {
   2401   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   2402     VIXL_UNIMPLEMENTED();
   2403   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   2404     fcmla<float>(vform, dst, src1, src2, rot);
   2405   } else {
   2406     fcmla<double>(vform, dst, src1, src2, rot);
   2407   }
   2408   return dst;
   2409 }
   2410 
   2411 
   2412 LogicVRegister Simulator::fcmla(VectorFormat vform,
   2413                                 LogicVRegister dst,          // d
   2414                                 const LogicVRegister& src1,  // n
   2415                                 const LogicVRegister& src2,  // m
   2416                                 int index,
   2417                                 int rot) {
   2418   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   2419     VIXL_UNIMPLEMENTED();
   2420   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   2421     fcmla<float>(vform, dst, src1, src2, index, rot);
   2422   } else {
   2423     fcmla<double>(vform, dst, src1, src2, index, rot);
   2424   }
   2425   return dst;
   2426 }
   2427 
   2428 
   2429 LogicVRegister Simulator::dup_element(VectorFormat vform,
   2430                                       LogicVRegister dst,
   2431                                       const LogicVRegister& src,
   2432                                       int src_index) {
   2433   int laneCount = LaneCountFromFormat(vform);
   2434   uint64_t value = src.Uint(vform, src_index);
   2435   dst.ClearForWrite(vform);
   2436   for (int i = 0; i < laneCount; ++i) {
   2437     dst.SetUint(vform, i, value);
   2438   }
   2439   return dst;
   2440 }
   2441 
   2442 
   2443 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
   2444                                         LogicVRegister dst,
   2445                                         uint64_t imm) {
   2446   int laneCount = LaneCountFromFormat(vform);
   2447   uint64_t value = imm & MaxUintFromFormat(vform);
   2448   dst.ClearForWrite(vform);
   2449   for (int i = 0; i < laneCount; ++i) {
   2450     dst.SetUint(vform, i, value);
   2451   }
   2452   return dst;
   2453 }
   2454 
   2455 
   2456 LogicVRegister Simulator::ins_element(VectorFormat vform,
   2457                                       LogicVRegister dst,
   2458                                       int dst_index,
   2459                                       const LogicVRegister& src,
   2460                                       int src_index) {
   2461   dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
   2462   return dst;
   2463 }
   2464 
   2465 
   2466 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
   2467                                         LogicVRegister dst,
   2468                                         int dst_index,
   2469                                         uint64_t imm) {
   2470   uint64_t value = imm & MaxUintFromFormat(vform);
   2471   dst.SetUint(vform, dst_index, value);
   2472   return dst;
   2473 }
   2474 
   2475 
   2476 LogicVRegister Simulator::movi(VectorFormat vform,
   2477                                LogicVRegister dst,
   2478                                uint64_t imm) {
   2479   int laneCount = LaneCountFromFormat(vform);
   2480   dst.ClearForWrite(vform);
   2481   for (int i = 0; i < laneCount; ++i) {
   2482     dst.SetUint(vform, i, imm);
   2483   }
   2484   return dst;
   2485 }
   2486 
   2487 
   2488 LogicVRegister Simulator::mvni(VectorFormat vform,
   2489                                LogicVRegister dst,
   2490                                uint64_t imm) {
   2491   int laneCount = LaneCountFromFormat(vform);
   2492   dst.ClearForWrite(vform);
   2493   for (int i = 0; i < laneCount; ++i) {
   2494     dst.SetUint(vform, i, ~imm);
   2495   }
   2496   return dst;
   2497 }
   2498 
   2499 
   2500 LogicVRegister Simulator::orr(VectorFormat vform,
   2501                               LogicVRegister dst,
   2502                               const LogicVRegister& src,
   2503                               uint64_t imm) {
   2504   uint64_t result[16];
   2505   int laneCount = LaneCountFromFormat(vform);
   2506   for (int i = 0; i < laneCount; ++i) {
   2507     result[i] = src.Uint(vform, i) | imm;
   2508   }
   2509   dst.ClearForWrite(vform);
   2510   for (int i = 0; i < laneCount; ++i) {
   2511     dst.SetUint(vform, i, result[i]);
   2512   }
   2513   return dst;
   2514 }
   2515 
   2516 
   2517 LogicVRegister Simulator::uxtl(VectorFormat vform,
   2518                                LogicVRegister dst,
   2519                                const LogicVRegister& src) {
   2520   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2521 
   2522   dst.ClearForWrite(vform);
   2523   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2524     dst.SetUint(vform, i, src.Uint(vform_half, i));
   2525   }
   2526   return dst;
   2527 }
   2528 
   2529 
   2530 LogicVRegister Simulator::sxtl(VectorFormat vform,
   2531                                LogicVRegister dst,
   2532                                const LogicVRegister& src) {
   2533   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2534 
   2535   dst.ClearForWrite(vform);
   2536   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2537     dst.SetInt(vform, i, src.Int(vform_half, i));
   2538   }
   2539   return dst;
   2540 }
   2541 
   2542 
   2543 LogicVRegister Simulator::uxtl2(VectorFormat vform,
   2544                                 LogicVRegister dst,
   2545                                 const LogicVRegister& src) {
   2546   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2547   int lane_count = LaneCountFromFormat(vform);
   2548 
   2549   dst.ClearForWrite(vform);
   2550   for (int i = 0; i < lane_count; i++) {
   2551     dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
   2552   }
   2553   return dst;
   2554 }
   2555 
   2556 
   2557 LogicVRegister Simulator::sxtl2(VectorFormat vform,
   2558                                 LogicVRegister dst,
   2559                                 const LogicVRegister& src) {
   2560   VectorFormat vform_half = VectorFormatHalfWidth(vform);
   2561   int lane_count = LaneCountFromFormat(vform);
   2562 
   2563   dst.ClearForWrite(vform);
   2564   for (int i = 0; i < lane_count; i++) {
   2565     dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
   2566   }
   2567   return dst;
   2568 }
   2569 
   2570 
   2571 LogicVRegister Simulator::shrn(VectorFormat vform,
   2572                                LogicVRegister dst,
   2573                                const LogicVRegister& src,
   2574                                int shift) {
   2575   SimVRegister temp;
   2576   VectorFormat vform_src = VectorFormatDoubleWidth(vform);
   2577   VectorFormat vform_dst = vform;
   2578   LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
   2579   return extractnarrow(vform_dst, dst, false, shifted_src, false);
   2580 }
   2581 
   2582 
   2583 LogicVRegister Simulator::shrn2(VectorFormat vform,
   2584                                 LogicVRegister dst,
   2585                                 const LogicVRegister& src,
   2586                                 int shift) {
   2587   SimVRegister temp;
   2588   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2589   VectorFormat vformdst = vform;
   2590   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
   2591   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2592 }
   2593 
   2594 
   2595 LogicVRegister Simulator::rshrn(VectorFormat vform,
   2596                                 LogicVRegister dst,
   2597                                 const LogicVRegister& src,
   2598                                 int shift) {
   2599   SimVRegister temp;
   2600   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2601   VectorFormat vformdst = vform;
   2602   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2603   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2604 }
   2605 
   2606 
   2607 LogicVRegister Simulator::rshrn2(VectorFormat vform,
   2608                                  LogicVRegister dst,
   2609                                  const LogicVRegister& src,
   2610                                  int shift) {
   2611   SimVRegister temp;
   2612   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2613   VectorFormat vformdst = vform;
   2614   LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
   2615   return extractnarrow(vformdst, dst, false, shifted_src, false);
   2616 }
   2617 
   2618 
   2619 LogicVRegister Simulator::Table(VectorFormat vform,
   2620                                 LogicVRegister dst,
   2621                                 const LogicVRegister& ind,
   2622                                 bool zero_out_of_bounds,
   2623                                 const LogicVRegister* tab1,
   2624                                 const LogicVRegister* tab2,
   2625                                 const LogicVRegister* tab3,
   2626                                 const LogicVRegister* tab4) {
   2627   VIXL_ASSERT(tab1 != NULL);
   2628   const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
   2629   uint64_t result[kMaxLanesPerVector];
   2630   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2631     result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
   2632   }
   2633   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   2634     uint64_t j = ind.Uint(vform, i);
   2635     int tab_idx = static_cast<int>(j >> 4);
   2636     int j_idx = static_cast<int>(j & 15);
   2637     if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
   2638       result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
   2639     }
   2640   }
   2641   dst.SetUintArray(vform, result);
   2642   return dst;
   2643 }
   2644 
   2645 
   2646 LogicVRegister Simulator::tbl(VectorFormat vform,
   2647                               LogicVRegister dst,
   2648                               const LogicVRegister& tab,
   2649                               const LogicVRegister& ind) {
   2650   return Table(vform, dst, ind, true, &tab);
   2651 }
   2652 
   2653 
   2654 LogicVRegister Simulator::tbl(VectorFormat vform,
   2655                               LogicVRegister dst,
   2656                               const LogicVRegister& tab,
   2657                               const LogicVRegister& tab2,
   2658                               const LogicVRegister& ind) {
   2659   return Table(vform, dst, ind, true, &tab, &tab2);
   2660 }
   2661 
   2662 
   2663 LogicVRegister Simulator::tbl(VectorFormat vform,
   2664                               LogicVRegister dst,
   2665                               const LogicVRegister& tab,
   2666                               const LogicVRegister& tab2,
   2667                               const LogicVRegister& tab3,
   2668                               const LogicVRegister& ind) {
   2669   return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
   2670 }
   2671 
   2672 
   2673 LogicVRegister Simulator::tbl(VectorFormat vform,
   2674                               LogicVRegister dst,
   2675                               const LogicVRegister& tab,
   2676                               const LogicVRegister& tab2,
   2677                               const LogicVRegister& tab3,
   2678                               const LogicVRegister& tab4,
   2679                               const LogicVRegister& ind) {
   2680   return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
   2681 }
   2682 
   2683 
   2684 LogicVRegister Simulator::tbx(VectorFormat vform,
   2685                               LogicVRegister dst,
   2686                               const LogicVRegister& tab,
   2687                               const LogicVRegister& ind) {
   2688   return Table(vform, dst, ind, false, &tab);
   2689 }
   2690 
   2691 
   2692 LogicVRegister Simulator::tbx(VectorFormat vform,
   2693                               LogicVRegister dst,
   2694                               const LogicVRegister& tab,
   2695                               const LogicVRegister& tab2,
   2696                               const LogicVRegister& ind) {
   2697   return Table(vform, dst, ind, false, &tab, &tab2);
   2698 }
   2699 
   2700 
   2701 LogicVRegister Simulator::tbx(VectorFormat vform,
   2702                               LogicVRegister dst,
   2703                               const LogicVRegister& tab,
   2704                               const LogicVRegister& tab2,
   2705                               const LogicVRegister& tab3,
   2706                               const LogicVRegister& ind) {
   2707   return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
   2708 }
   2709 
   2710 
   2711 LogicVRegister Simulator::tbx(VectorFormat vform,
   2712                               LogicVRegister dst,
   2713                               const LogicVRegister& tab,
   2714                               const LogicVRegister& tab2,
   2715                               const LogicVRegister& tab3,
   2716                               const LogicVRegister& tab4,
   2717                               const LogicVRegister& ind) {
   2718   return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
   2719 }
   2720 
   2721 
   2722 LogicVRegister Simulator::uqshrn(VectorFormat vform,
   2723                                  LogicVRegister dst,
   2724                                  const LogicVRegister& src,
   2725                                  int shift) {
   2726   return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2727 }
   2728 
   2729 
   2730 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
   2731                                   LogicVRegister dst,
   2732                                   const LogicVRegister& src,
   2733                                   int shift) {
   2734   return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2735 }
   2736 
   2737 
   2738 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
   2739                                   LogicVRegister dst,
   2740                                   const LogicVRegister& src,
   2741                                   int shift) {
   2742   return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
   2743 }
   2744 
   2745 
   2746 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
   2747                                    LogicVRegister dst,
   2748                                    const LogicVRegister& src,
   2749                                    int shift) {
   2750   return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
   2751 }
   2752 
   2753 
   2754 LogicVRegister Simulator::sqshrn(VectorFormat vform,
   2755                                  LogicVRegister dst,
   2756                                  const LogicVRegister& src,
   2757                                  int shift) {
   2758   SimVRegister temp;
   2759   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2760   VectorFormat vformdst = vform;
   2761   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2762   return sqxtn(vformdst, dst, shifted_src);
   2763 }
   2764 
   2765 
   2766 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
   2767                                   LogicVRegister dst,
   2768                                   const LogicVRegister& src,
   2769                                   int shift) {
   2770   SimVRegister temp;
   2771   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2772   VectorFormat vformdst = vform;
   2773   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2774   return sqxtn(vformdst, dst, shifted_src);
   2775 }
   2776 
   2777 
   2778 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
   2779                                   LogicVRegister dst,
   2780                                   const LogicVRegister& src,
   2781                                   int shift) {
   2782   SimVRegister temp;
   2783   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2784   VectorFormat vformdst = vform;
   2785   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2786   return sqxtn(vformdst, dst, shifted_src);
   2787 }
   2788 
   2789 
   2790 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
   2791                                    LogicVRegister dst,
   2792                                    const LogicVRegister& src,
   2793                                    int shift) {
   2794   SimVRegister temp;
   2795   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2796   VectorFormat vformdst = vform;
   2797   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2798   return sqxtn(vformdst, dst, shifted_src);
   2799 }
   2800 
   2801 
   2802 LogicVRegister Simulator::sqshrun(VectorFormat vform,
   2803                                   LogicVRegister dst,
   2804                                   const LogicVRegister& src,
   2805                                   int shift) {
   2806   SimVRegister temp;
   2807   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2808   VectorFormat vformdst = vform;
   2809   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2810   return sqxtun(vformdst, dst, shifted_src);
   2811 }
   2812 
   2813 
   2814 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
   2815                                    LogicVRegister dst,
   2816                                    const LogicVRegister& src,
   2817                                    int shift) {
   2818   SimVRegister temp;
   2819   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2820   VectorFormat vformdst = vform;
   2821   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
   2822   return sqxtun(vformdst, dst, shifted_src);
   2823 }
   2824 
   2825 
   2826 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
   2827                                    LogicVRegister dst,
   2828                                    const LogicVRegister& src,
   2829                                    int shift) {
   2830   SimVRegister temp;
   2831   VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
   2832   VectorFormat vformdst = vform;
   2833   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2834   return sqxtun(vformdst, dst, shifted_src);
   2835 }
   2836 
   2837 
   2838 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
   2839                                     LogicVRegister dst,
   2840                                     const LogicVRegister& src,
   2841                                     int shift) {
   2842   SimVRegister temp;
   2843   VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
   2844   VectorFormat vformdst = vform;
   2845   LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
   2846   return sqxtun(vformdst, dst, shifted_src);
   2847 }
   2848 
   2849 
   2850 LogicVRegister Simulator::uaddl(VectorFormat vform,
   2851                                 LogicVRegister dst,
   2852                                 const LogicVRegister& src1,
   2853                                 const LogicVRegister& src2) {
   2854   SimVRegister temp1, temp2;
   2855   uxtl(vform, temp1, src1);
   2856   uxtl(vform, temp2, src2);
   2857   add(vform, dst, temp1, temp2);
   2858   return dst;
   2859 }
   2860 
   2861 
   2862 LogicVRegister Simulator::uaddl2(VectorFormat vform,
   2863                                  LogicVRegister dst,
   2864                                  const LogicVRegister& src1,
   2865                                  const LogicVRegister& src2) {
   2866   SimVRegister temp1, temp2;
   2867   uxtl2(vform, temp1, src1);
   2868   uxtl2(vform, temp2, src2);
   2869   add(vform, dst, temp1, temp2);
   2870   return dst;
   2871 }
   2872 
   2873 
   2874 LogicVRegister Simulator::uaddw(VectorFormat vform,
   2875                                 LogicVRegister dst,
   2876                                 const LogicVRegister& src1,
   2877                                 const LogicVRegister& src2) {
   2878   SimVRegister temp;
   2879   uxtl(vform, temp, src2);
   2880   add(vform, dst, src1, temp);
   2881   return dst;
   2882 }
   2883 
   2884 
   2885 LogicVRegister Simulator::uaddw2(VectorFormat vform,
   2886                                  LogicVRegister dst,
   2887                                  const LogicVRegister& src1,
   2888                                  const LogicVRegister& src2) {
   2889   SimVRegister temp;
   2890   uxtl2(vform, temp, src2);
   2891   add(vform, dst, src1, temp);
   2892   return dst;
   2893 }
   2894 
   2895 
   2896 LogicVRegister Simulator::saddl(VectorFormat vform,
   2897                                 LogicVRegister dst,
   2898                                 const LogicVRegister& src1,
   2899                                 const LogicVRegister& src2) {
   2900   SimVRegister temp1, temp2;
   2901   sxtl(vform, temp1, src1);
   2902   sxtl(vform, temp2, src2);
   2903   add(vform, dst, temp1, temp2);
   2904   return dst;
   2905 }
   2906 
   2907 
   2908 LogicVRegister Simulator::saddl2(VectorFormat vform,
   2909                                  LogicVRegister dst,
   2910                                  const LogicVRegister& src1,
   2911                                  const LogicVRegister& src2) {
   2912   SimVRegister temp1, temp2;
   2913   sxtl2(vform, temp1, src1);
   2914   sxtl2(vform, temp2, src2);
   2915   add(vform, dst, temp1, temp2);
   2916   return dst;
   2917 }
   2918 
   2919 
   2920 LogicVRegister Simulator::saddw(VectorFormat vform,
   2921                                 LogicVRegister dst,
   2922                                 const LogicVRegister& src1,
   2923                                 const LogicVRegister& src2) {
   2924   SimVRegister temp;
   2925   sxtl(vform, temp, src2);
   2926   add(vform, dst, src1, temp);
   2927   return dst;
   2928 }
   2929 
   2930 
   2931 LogicVRegister Simulator::saddw2(VectorFormat vform,
   2932                                  LogicVRegister dst,
   2933                                  const LogicVRegister& src1,
   2934                                  const LogicVRegister& src2) {
   2935   SimVRegister temp;
   2936   sxtl2(vform, temp, src2);
   2937   add(vform, dst, src1, temp);
   2938   return dst;
   2939 }
   2940 
   2941 
   2942 LogicVRegister Simulator::usubl(VectorFormat vform,
   2943                                 LogicVRegister dst,
   2944                                 const LogicVRegister& src1,
   2945                                 const LogicVRegister& src2) {
   2946   SimVRegister temp1, temp2;
   2947   uxtl(vform, temp1, src1);
   2948   uxtl(vform, temp2, src2);
   2949   sub(vform, dst, temp1, temp2);
   2950   return dst;
   2951 }
   2952 
   2953 
   2954 LogicVRegister Simulator::usubl2(VectorFormat vform,
   2955                                  LogicVRegister dst,
   2956                                  const LogicVRegister& src1,
   2957                                  const LogicVRegister& src2) {
   2958   SimVRegister temp1, temp2;
   2959   uxtl2(vform, temp1, src1);
   2960   uxtl2(vform, temp2, src2);
   2961   sub(vform, dst, temp1, temp2);
   2962   return dst;
   2963 }
   2964 
   2965 
   2966 LogicVRegister Simulator::usubw(VectorFormat vform,
   2967                                 LogicVRegister dst,
   2968                                 const LogicVRegister& src1,
   2969                                 const LogicVRegister& src2) {
   2970   SimVRegister temp;
   2971   uxtl(vform, temp, src2);
   2972   sub(vform, dst, src1, temp);
   2973   return dst;
   2974 }
   2975 
   2976 
   2977 LogicVRegister Simulator::usubw2(VectorFormat vform,
   2978                                  LogicVRegister dst,
   2979                                  const LogicVRegister& src1,
   2980                                  const LogicVRegister& src2) {
   2981   SimVRegister temp;
   2982   uxtl2(vform, temp, src2);
   2983   sub(vform, dst, src1, temp);
   2984   return dst;
   2985 }
   2986 
   2987 
   2988 LogicVRegister Simulator::ssubl(VectorFormat vform,
   2989                                 LogicVRegister dst,
   2990                                 const LogicVRegister& src1,
   2991                                 const LogicVRegister& src2) {
   2992   SimVRegister temp1, temp2;
   2993   sxtl(vform, temp1, src1);
   2994   sxtl(vform, temp2, src2);
   2995   sub(vform, dst, temp1, temp2);
   2996   return dst;
   2997 }
   2998 
   2999 
   3000 LogicVRegister Simulator::ssubl2(VectorFormat vform,
   3001                                  LogicVRegister dst,
   3002                                  const LogicVRegister& src1,
   3003                                  const LogicVRegister& src2) {
   3004   SimVRegister temp1, temp2;
   3005   sxtl2(vform, temp1, src1);
   3006   sxtl2(vform, temp2, src2);
   3007   sub(vform, dst, temp1, temp2);
   3008   return dst;
   3009 }
   3010 
   3011 
   3012 LogicVRegister Simulator::ssubw(VectorFormat vform,
   3013                                 LogicVRegister dst,
   3014                                 const LogicVRegister& src1,
   3015                                 const LogicVRegister& src2) {
   3016   SimVRegister temp;
   3017   sxtl(vform, temp, src2);
   3018   sub(vform, dst, src1, temp);
   3019   return dst;
   3020 }
   3021 
   3022 
   3023 LogicVRegister Simulator::ssubw2(VectorFormat vform,
   3024                                  LogicVRegister dst,
   3025                                  const LogicVRegister& src1,
   3026                                  const LogicVRegister& src2) {
   3027   SimVRegister temp;
   3028   sxtl2(vform, temp, src2);
   3029   sub(vform, dst, src1, temp);
   3030   return dst;
   3031 }
   3032 
   3033 
   3034 LogicVRegister Simulator::uabal(VectorFormat vform,
   3035                                 LogicVRegister dst,
   3036                                 const LogicVRegister& src1,
   3037                                 const LogicVRegister& src2) {
   3038   SimVRegister temp1, temp2;
   3039   uxtl(vform, temp1, src1);
   3040   uxtl(vform, temp2, src2);
   3041   uaba(vform, dst, temp1, temp2);
   3042   return dst;
   3043 }
   3044 
   3045 
   3046 LogicVRegister Simulator::uabal2(VectorFormat vform,
   3047                                  LogicVRegister dst,
   3048                                  const LogicVRegister& src1,
   3049                                  const LogicVRegister& src2) {
   3050   SimVRegister temp1, temp2;
   3051   uxtl2(vform, temp1, src1);
   3052   uxtl2(vform, temp2, src2);
   3053   uaba(vform, dst, temp1, temp2);
   3054   return dst;
   3055 }
   3056 
   3057 
   3058 LogicVRegister Simulator::sabal(VectorFormat vform,
   3059                                 LogicVRegister dst,
   3060                                 const LogicVRegister& src1,
   3061                                 const LogicVRegister& src2) {
   3062   SimVRegister temp1, temp2;
   3063   sxtl(vform, temp1, src1);
   3064   sxtl(vform, temp2, src2);
   3065   saba(vform, dst, temp1, temp2);
   3066   return dst;
   3067 }
   3068 
   3069 
   3070 LogicVRegister Simulator::sabal2(VectorFormat vform,
   3071                                  LogicVRegister dst,
   3072                                  const LogicVRegister& src1,
   3073                                  const LogicVRegister& src2) {
   3074   SimVRegister temp1, temp2;
   3075   sxtl2(vform, temp1, src1);
   3076   sxtl2(vform, temp2, src2);
   3077   saba(vform, dst, temp1, temp2);
   3078   return dst;
   3079 }
   3080 
   3081 
   3082 LogicVRegister Simulator::uabdl(VectorFormat vform,
   3083                                 LogicVRegister dst,
   3084                                 const LogicVRegister& src1,
   3085                                 const LogicVRegister& src2) {
   3086   SimVRegister temp1, temp2;
   3087   uxtl(vform, temp1, src1);
   3088   uxtl(vform, temp2, src2);
   3089   absdiff(vform, dst, temp1, temp2, false);
   3090   return dst;
   3091 }
   3092 
   3093 
   3094 LogicVRegister Simulator::uabdl2(VectorFormat vform,
   3095                                  LogicVRegister dst,
   3096                                  const LogicVRegister& src1,
   3097                                  const LogicVRegister& src2) {
   3098   SimVRegister temp1, temp2;
   3099   uxtl2(vform, temp1, src1);
   3100   uxtl2(vform, temp2, src2);
   3101   absdiff(vform, dst, temp1, temp2, false);
   3102   return dst;
   3103 }
   3104 
   3105 
   3106 LogicVRegister Simulator::sabdl(VectorFormat vform,
   3107                                 LogicVRegister dst,
   3108                                 const LogicVRegister& src1,
   3109                                 const LogicVRegister& src2) {
   3110   SimVRegister temp1, temp2;
   3111   sxtl(vform, temp1, src1);
   3112   sxtl(vform, temp2, src2);
   3113   absdiff(vform, dst, temp1, temp2, true);
   3114   return dst;
   3115 }
   3116 
   3117 
   3118 LogicVRegister Simulator::sabdl2(VectorFormat vform,
   3119                                  LogicVRegister dst,
   3120                                  const LogicVRegister& src1,
   3121                                  const LogicVRegister& src2) {
   3122   SimVRegister temp1, temp2;
   3123   sxtl2(vform, temp1, src1);
   3124   sxtl2(vform, temp2, src2);
   3125   absdiff(vform, dst, temp1, temp2, true);
   3126   return dst;
   3127 }
   3128 
   3129 
   3130 LogicVRegister Simulator::umull(VectorFormat vform,
   3131                                 LogicVRegister dst,
   3132                                 const LogicVRegister& src1,
   3133                                 const LogicVRegister& src2) {
   3134   SimVRegister temp1, temp2;
   3135   uxtl(vform, temp1, src1);
   3136   uxtl(vform, temp2, src2);
   3137   mul(vform, dst, temp1, temp2);
   3138   return dst;
   3139 }
   3140 
   3141 
   3142 LogicVRegister Simulator::umull2(VectorFormat vform,
   3143                                  LogicVRegister dst,
   3144                                  const LogicVRegister& src1,
   3145                                  const LogicVRegister& src2) {
   3146   SimVRegister temp1, temp2;
   3147   uxtl2(vform, temp1, src1);
   3148   uxtl2(vform, temp2, src2);
   3149   mul(vform, dst, temp1, temp2);
   3150   return dst;
   3151 }
   3152 
   3153 
   3154 LogicVRegister Simulator::smull(VectorFormat vform,
   3155                                 LogicVRegister dst,
   3156                                 const LogicVRegister& src1,
   3157                                 const LogicVRegister& src2) {
   3158   SimVRegister temp1, temp2;
   3159   sxtl(vform, temp1, src1);
   3160   sxtl(vform, temp2, src2);
   3161   mul(vform, dst, temp1, temp2);
   3162   return dst;
   3163 }
   3164 
   3165 
   3166 LogicVRegister Simulator::smull2(VectorFormat vform,
   3167                                  LogicVRegister dst,
   3168                                  const LogicVRegister& src1,
   3169                                  const LogicVRegister& src2) {
   3170   SimVRegister temp1, temp2;
   3171   sxtl2(vform, temp1, src1);
   3172   sxtl2(vform, temp2, src2);
   3173   mul(vform, dst, temp1, temp2);
   3174   return dst;
   3175 }
   3176 
   3177 
   3178 LogicVRegister Simulator::umlsl(VectorFormat vform,
   3179                                 LogicVRegister dst,
   3180                                 const LogicVRegister& src1,
   3181                                 const LogicVRegister& src2) {
   3182   SimVRegister temp1, temp2;
   3183   uxtl(vform, temp1, src1);
   3184   uxtl(vform, temp2, src2);
   3185   mls(vform, dst, temp1, temp2);
   3186   return dst;
   3187 }
   3188 
   3189 
   3190 LogicVRegister Simulator::umlsl2(VectorFormat vform,
   3191                                  LogicVRegister dst,
   3192                                  const LogicVRegister& src1,
   3193                                  const LogicVRegister& src2) {
   3194   SimVRegister temp1, temp2;
   3195   uxtl2(vform, temp1, src1);
   3196   uxtl2(vform, temp2, src2);
   3197   mls(vform, dst, temp1, temp2);
   3198   return dst;
   3199 }
   3200 
   3201 
   3202 LogicVRegister Simulator::smlsl(VectorFormat vform,
   3203                                 LogicVRegister dst,
   3204                                 const LogicVRegister& src1,
   3205                                 const LogicVRegister& src2) {
   3206   SimVRegister temp1, temp2;
   3207   sxtl(vform, temp1, src1);
   3208   sxtl(vform, temp2, src2);
   3209   mls(vform, dst, temp1, temp2);
   3210   return dst;
   3211 }
   3212 
   3213 
   3214 LogicVRegister Simulator::smlsl2(VectorFormat vform,
   3215                                  LogicVRegister dst,
   3216                                  const LogicVRegister& src1,
   3217                                  const LogicVRegister& src2) {
   3218   SimVRegister temp1, temp2;
   3219   sxtl2(vform, temp1, src1);
   3220   sxtl2(vform, temp2, src2);
   3221   mls(vform, dst, temp1, temp2);
   3222   return dst;
   3223 }
   3224 
   3225 
   3226 LogicVRegister Simulator::umlal(VectorFormat vform,
   3227                                 LogicVRegister dst,
   3228                                 const LogicVRegister& src1,
   3229                                 const LogicVRegister& src2) {
   3230   SimVRegister temp1, temp2;
   3231   uxtl(vform, temp1, src1);
   3232   uxtl(vform, temp2, src2);
   3233   mla(vform, dst, temp1, temp2);
   3234   return dst;
   3235 }
   3236 
   3237 
   3238 LogicVRegister Simulator::umlal2(VectorFormat vform,
   3239                                  LogicVRegister dst,
   3240                                  const LogicVRegister& src1,
   3241                                  const LogicVRegister& src2) {
   3242   SimVRegister temp1, temp2;
   3243   uxtl2(vform, temp1, src1);
   3244   uxtl2(vform, temp2, src2);
   3245   mla(vform, dst, temp1, temp2);
   3246   return dst;
   3247 }
   3248 
   3249 
   3250 LogicVRegister Simulator::smlal(VectorFormat vform,
   3251                                 LogicVRegister dst,
   3252                                 const LogicVRegister& src1,
   3253                                 const LogicVRegister& src2) {
   3254   SimVRegister temp1, temp2;
   3255   sxtl(vform, temp1, src1);
   3256   sxtl(vform, temp2, src2);
   3257   mla(vform, dst, temp1, temp2);
   3258   return dst;
   3259 }
   3260 
   3261 
   3262 LogicVRegister Simulator::smlal2(VectorFormat vform,
   3263                                  LogicVRegister dst,
   3264                                  const LogicVRegister& src1,
   3265                                  const LogicVRegister& src2) {
   3266   SimVRegister temp1, temp2;
   3267   sxtl2(vform, temp1, src1);
   3268   sxtl2(vform, temp2, src2);
   3269   mla(vform, dst, temp1, temp2);
   3270   return dst;
   3271 }
   3272 
   3273 
   3274 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
   3275                                   LogicVRegister dst,
   3276                                   const LogicVRegister& src1,
   3277                                   const LogicVRegister& src2) {
   3278   SimVRegister temp;
   3279   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3280   return add(vform, dst, dst, product).SignedSaturate(vform);
   3281 }
   3282 
   3283 
   3284 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
   3285                                    LogicVRegister dst,
   3286                                    const LogicVRegister& src1,
   3287                                    const LogicVRegister& src2) {
   3288   SimVRegister temp;
   3289   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3290   return add(vform, dst, dst, product).SignedSaturate(vform);
   3291 }
   3292 
   3293 
   3294 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
   3295                                   LogicVRegister dst,
   3296                                   const LogicVRegister& src1,
   3297                                   const LogicVRegister& src2) {
   3298   SimVRegister temp;
   3299   LogicVRegister product = sqdmull(vform, temp, src1, src2);
   3300   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3301 }
   3302 
   3303 
   3304 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
   3305                                    LogicVRegister dst,
   3306                                    const LogicVRegister& src1,
   3307                                    const LogicVRegister& src2) {
   3308   SimVRegister temp;
   3309   LogicVRegister product = sqdmull2(vform, temp, src1, src2);
   3310   return sub(vform, dst, dst, product).SignedSaturate(vform);
   3311 }
   3312 
   3313 
   3314 LogicVRegister Simulator::sqdmull(VectorFormat vform,
   3315                                   LogicVRegister dst,
   3316                                   const LogicVRegister& src1,
   3317                                   const LogicVRegister& src2) {
   3318   SimVRegister temp;
   3319   LogicVRegister product = smull(vform, temp, src1, src2);
   3320   return add(vform, dst, product, product).SignedSaturate(vform);
   3321 }
   3322 
   3323 
   3324 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
   3325                                    LogicVRegister dst,
   3326                                    const LogicVRegister& src1,
   3327                                    const LogicVRegister& src2) {
   3328   SimVRegister temp;
   3329   LogicVRegister product = smull2(vform, temp, src1, src2);
   3330   return add(vform, dst, product, product).SignedSaturate(vform);
   3331 }
   3332 
   3333 
   3334 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
   3335                                    LogicVRegister dst,
   3336                                    const LogicVRegister& src1,
   3337                                    const LogicVRegister& src2,
   3338                                    bool round) {
   3339   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   3340   // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   3341   // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   3342 
   3343   int esize = LaneSizeInBitsFromFormat(vform);
   3344   int round_const = round ? (1 << (esize - 2)) : 0;
   3345   int64_t product;
   3346 
   3347   dst.ClearForWrite(vform);
   3348   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3349     product = src1.Int(vform, i) * src2.Int(vform, i);
   3350     product += round_const;
   3351     product = product >> (esize - 1);
   3352 
   3353     if (product > MaxIntFromFormat(vform)) {
   3354       product = MaxIntFromFormat(vform);
   3355     } else if (product < MinIntFromFormat(vform)) {
   3356       product = MinIntFromFormat(vform);
   3357     }
   3358     dst.SetInt(vform, i, product);
   3359   }
   3360   return dst;
   3361 }
   3362 
   3363 
   3364 LogicVRegister Simulator::dot(VectorFormat vform,
   3365                               LogicVRegister dst,
   3366                               const LogicVRegister& src1,
   3367                               const LogicVRegister& src2,
   3368                               bool is_signed) {
   3369   VectorFormat quarter_vform =
   3370       VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
   3371 
   3372   dst.ClearForWrite(vform);
   3373   for (int e = 0; e < LaneCountFromFormat(vform); e++) {
   3374     int64_t result = 0;
   3375     int64_t element1, element2;
   3376     for (int i = 0; i < 4; i++) {
   3377       int index = 4 * e + i;
   3378       if (is_signed) {
   3379         element1 = src1.Int(quarter_vform, index);
   3380         element2 = src2.Int(quarter_vform, index);
   3381       } else {
   3382         element1 = src1.Uint(quarter_vform, index);
   3383         element2 = src2.Uint(quarter_vform, index);
   3384       }
   3385       result += element1 * element2;
   3386     }
   3387 
   3388     result += dst.Int(vform, e);
   3389     dst.SetInt(vform, e, result);
   3390   }
   3391   return dst;
   3392 }
   3393 
   3394 
   3395 LogicVRegister Simulator::sdot(VectorFormat vform,
   3396                                LogicVRegister dst,
   3397                                const LogicVRegister& src1,
   3398                                const LogicVRegister& src2) {
   3399   return dot(vform, dst, src1, src2, true);
   3400 }
   3401 
   3402 
   3403 LogicVRegister Simulator::udot(VectorFormat vform,
   3404                                LogicVRegister dst,
   3405                                const LogicVRegister& src1,
   3406                                const LogicVRegister& src2) {
   3407   return dot(vform, dst, src1, src2, false);
   3408 }
   3409 
   3410 
   3411 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
   3412                                     LogicVRegister dst,
   3413                                     const LogicVRegister& src1,
   3414                                     const LogicVRegister& src2,
   3415                                     bool round,
   3416                                     bool sub_op) {
   3417   // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
   3418   // To avoid this, we use:
   3419   //     (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
   3420   // which is same as:
   3421   //     (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
   3422 
   3423   int esize = LaneSizeInBitsFromFormat(vform);
   3424   int round_const = round ? (1 << (esize - 2)) : 0;
   3425   int64_t accum;
   3426 
   3427   dst.ClearForWrite(vform);
   3428   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   3429     accum = dst.Int(vform, i) << (esize - 1);
   3430     if (sub_op) {
   3431       accum -= src1.Int(vform, i) * src2.Int(vform, i);
   3432     } else {
   3433       accum += src1.Int(vform, i) * src2.Int(vform, i);
   3434     }
   3435     accum += round_const;
   3436     accum = accum >> (esize - 1);
   3437 
   3438     if (accum > MaxIntFromFormat(vform)) {
   3439       accum = MaxIntFromFormat(vform);
   3440     } else if (accum < MinIntFromFormat(vform)) {
   3441       accum = MinIntFromFormat(vform);
   3442     }
   3443     dst.SetInt(vform, i, accum);
   3444   }
   3445   return dst;
   3446 }
   3447 
   3448 
   3449 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
   3450                                    LogicVRegister dst,
   3451                                    const LogicVRegister& src1,
   3452                                    const LogicVRegister& src2,
   3453                                    bool round) {
   3454   return sqrdmlash(vform, dst, src1, src2, round, false);
   3455 }
   3456 
   3457 
   3458 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
   3459                                    LogicVRegister dst,
   3460                                    const LogicVRegister& src1,
   3461                                    const LogicVRegister& src2,
   3462                                    bool round) {
   3463   return sqrdmlash(vform, dst, src1, src2, round, true);
   3464 }
   3465 
   3466 
   3467 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
   3468                                   LogicVRegister dst,
   3469                                   const LogicVRegister& src1,
   3470                                   const LogicVRegister& src2) {
   3471   return sqrdmulh(vform, dst, src1, src2, false);
   3472 }
   3473 
   3474 
   3475 LogicVRegister Simulator::addhn(VectorFormat vform,
   3476                                 LogicVRegister dst,
   3477                                 const LogicVRegister& src1,
   3478                                 const LogicVRegister& src2) {
   3479   SimVRegister temp;
   3480   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3481   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3482   return dst;
   3483 }
   3484 
   3485 
   3486 LogicVRegister Simulator::addhn2(VectorFormat vform,
   3487                                  LogicVRegister dst,
   3488                                  const LogicVRegister& src1,
   3489                                  const LogicVRegister& src2) {
   3490   SimVRegister temp;
   3491   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3492   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3493   return dst;
   3494 }
   3495 
   3496 
   3497 LogicVRegister Simulator::raddhn(VectorFormat vform,
   3498                                  LogicVRegister dst,
   3499                                  const LogicVRegister& src1,
   3500                                  const LogicVRegister& src2) {
   3501   SimVRegister temp;
   3502   add(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3503   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3504   return dst;
   3505 }
   3506 
   3507 
   3508 LogicVRegister Simulator::raddhn2(VectorFormat vform,
   3509                                   LogicVRegister dst,
   3510                                   const LogicVRegister& src1,
   3511                                   const LogicVRegister& src2) {
   3512   SimVRegister temp;
   3513   add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3514   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3515   return dst;
   3516 }
   3517 
   3518 
   3519 LogicVRegister Simulator::subhn(VectorFormat vform,
   3520                                 LogicVRegister dst,
   3521                                 const LogicVRegister& src1,
   3522                                 const LogicVRegister& src2) {
   3523   SimVRegister temp;
   3524   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3525   shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3526   return dst;
   3527 }
   3528 
   3529 
   3530 LogicVRegister Simulator::subhn2(VectorFormat vform,
   3531                                  LogicVRegister dst,
   3532                                  const LogicVRegister& src1,
   3533                                  const LogicVRegister& src2) {
   3534   SimVRegister temp;
   3535   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3536   shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3537   return dst;
   3538 }
   3539 
   3540 
   3541 LogicVRegister Simulator::rsubhn(VectorFormat vform,
   3542                                  LogicVRegister dst,
   3543                                  const LogicVRegister& src1,
   3544                                  const LogicVRegister& src2) {
   3545   SimVRegister temp;
   3546   sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
   3547   rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3548   return dst;
   3549 }
   3550 
   3551 
   3552 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
   3553                                   LogicVRegister dst,
   3554                                   const LogicVRegister& src1,
   3555                                   const LogicVRegister& src2) {
   3556   SimVRegister temp;
   3557   sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
   3558   rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
   3559   return dst;
   3560 }
   3561 
   3562 
   3563 LogicVRegister Simulator::trn1(VectorFormat vform,
   3564                                LogicVRegister dst,
   3565                                const LogicVRegister& src1,
   3566                                const LogicVRegister& src2) {
   3567   uint64_t result[16];
   3568   int laneCount = LaneCountFromFormat(vform);
   3569   int pairs = laneCount / 2;
   3570   for (int i = 0; i < pairs; ++i) {
   3571     result[2 * i] = src1.Uint(vform, 2 * i);
   3572     result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
   3573   }
   3574 
   3575   dst.ClearForWrite(vform);
   3576   for (int i = 0; i < laneCount; ++i) {
   3577     dst.SetUint(vform, i, result[i]);
   3578   }
   3579   return dst;
   3580 }
   3581 
   3582 
   3583 LogicVRegister Simulator::trn2(VectorFormat vform,
   3584                                LogicVRegister dst,
   3585                                const LogicVRegister& src1,
   3586                                const LogicVRegister& src2) {
   3587   uint64_t result[16];
   3588   int laneCount = LaneCountFromFormat(vform);
   3589   int pairs = laneCount / 2;
   3590   for (int i = 0; i < pairs; ++i) {
   3591     result[2 * i] = src1.Uint(vform, (2 * i) + 1);
   3592     result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
   3593   }
   3594 
   3595   dst.ClearForWrite(vform);
   3596   for (int i = 0; i < laneCount; ++i) {
   3597     dst.SetUint(vform, i, result[i]);
   3598   }
   3599   return dst;
   3600 }
   3601 
   3602 
   3603 LogicVRegister Simulator::zip1(VectorFormat vform,
   3604                                LogicVRegister dst,
   3605                                const LogicVRegister& src1,
   3606                                const LogicVRegister& src2) {
   3607   uint64_t result[16];
   3608   int laneCount = LaneCountFromFormat(vform);
   3609   int pairs = laneCount / 2;
   3610   for (int i = 0; i < pairs; ++i) {
   3611     result[2 * i] = src1.Uint(vform, i);
   3612     result[(2 * i) + 1] = src2.Uint(vform, i);
   3613   }
   3614 
   3615   dst.ClearForWrite(vform);
   3616   for (int i = 0; i < laneCount; ++i) {
   3617     dst.SetUint(vform, i, result[i]);
   3618   }
   3619   return dst;
   3620 }
   3621 
   3622 
   3623 LogicVRegister Simulator::zip2(VectorFormat vform,
   3624                                LogicVRegister dst,
   3625                                const LogicVRegister& src1,
   3626                                const LogicVRegister& src2) {
   3627   uint64_t result[16];
   3628   int laneCount = LaneCountFromFormat(vform);
   3629   int pairs = laneCount / 2;
   3630   for (int i = 0; i < pairs; ++i) {
   3631     result[2 * i] = src1.Uint(vform, pairs + i);
   3632     result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
   3633   }
   3634 
   3635   dst.ClearForWrite(vform);
   3636   for (int i = 0; i < laneCount; ++i) {
   3637     dst.SetUint(vform, i, result[i]);
   3638   }
   3639   return dst;
   3640 }
   3641 
   3642 
   3643 LogicVRegister Simulator::uzp1(VectorFormat vform,
   3644                                LogicVRegister dst,
   3645                                const LogicVRegister& src1,
   3646                                const LogicVRegister& src2) {
   3647   uint64_t result[32];
   3648   int laneCount = LaneCountFromFormat(vform);
   3649   for (int i = 0; i < laneCount; ++i) {
   3650     result[i] = src1.Uint(vform, i);
   3651     result[laneCount + i] = src2.Uint(vform, i);
   3652   }
   3653 
   3654   dst.ClearForWrite(vform);
   3655   for (int i = 0; i < laneCount; ++i) {
   3656     dst.SetUint(vform, i, result[2 * i]);
   3657   }
   3658   return dst;
   3659 }
   3660 
   3661 
   3662 LogicVRegister Simulator::uzp2(VectorFormat vform,
   3663                                LogicVRegister dst,
   3664                                const LogicVRegister& src1,
   3665                                const LogicVRegister& src2) {
   3666   uint64_t result[32];
   3667   int laneCount = LaneCountFromFormat(vform);
   3668   for (int i = 0; i < laneCount; ++i) {
   3669     result[i] = src1.Uint(vform, i);
   3670     result[laneCount + i] = src2.Uint(vform, i);
   3671   }
   3672 
   3673   dst.ClearForWrite(vform);
   3674   for (int i = 0; i < laneCount; ++i) {
   3675     dst.SetUint(vform, i, result[(2 * i) + 1]);
   3676   }
   3677   return dst;
   3678 }
   3679 
   3680 
   3681 template <typename T>
   3682 T Simulator::FPNeg(T op) {
   3683   return -op;
   3684 }
   3685 
   3686 template <typename T>
   3687 T Simulator::FPAdd(T op1, T op2) {
   3688   T result = FPProcessNaNs(op1, op2);
   3689   if (IsNaN(result)) {
   3690     return result;
   3691   }
   3692 
   3693   if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
   3694     // inf + -inf returns the default NaN.
   3695     FPProcessException();
   3696     return FPDefaultNaN<T>();
   3697   } else {
   3698     // Other cases should be handled by standard arithmetic.
   3699     return op1 + op2;
   3700   }
   3701 }
   3702 
   3703 
   3704 template <typename T>
   3705 T Simulator::FPSub(T op1, T op2) {
   3706   // NaNs should be handled elsewhere.
   3707   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
   3708 
   3709   if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
   3710     // inf - inf returns the default NaN.
   3711     FPProcessException();
   3712     return FPDefaultNaN<T>();
   3713   } else {
   3714     // Other cases should be handled by standard arithmetic.
   3715     return op1 - op2;
   3716   }
   3717 }
   3718 
   3719 
   3720 template <typename T>
   3721 T Simulator::FPMul(T op1, T op2) {
   3722   // NaNs should be handled elsewhere.
   3723   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
   3724 
   3725   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
   3726     // inf * 0.0 returns the default NaN.
   3727     FPProcessException();
   3728     return FPDefaultNaN<T>();
   3729   } else {
   3730     // Other cases should be handled by standard arithmetic.
   3731     return op1 * op2;
   3732   }
   3733 }
   3734 
   3735 
   3736 template <typename T>
   3737 T Simulator::FPMulx(T op1, T op2) {
   3738   if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
   3739     // inf * 0.0 returns +/-2.0.
   3740     T two = 2.0;
   3741     return copysign(1.0, op1) * copysign(1.0, op2) * two;
   3742   }
   3743   return FPMul(op1, op2);
   3744 }
   3745 
   3746 
   3747 template <typename T>
   3748 T Simulator::FPMulAdd(T a, T op1, T op2) {
   3749   T result = FPProcessNaNs3(a, op1, op2);
   3750 
   3751   T sign_a = copysign(1.0, a);
   3752   T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
   3753   bool isinf_prod = IsInf(op1) || IsInf(op2);
   3754   bool operation_generates_nan =
   3755       (IsInf(op1) && (op2 == 0.0)) ||                     // inf * 0.0
   3756       (IsInf(op2) && (op1 == 0.0)) ||                     // 0.0 * inf
   3757       (IsInf(a) && isinf_prod && (sign_a != sign_prod));  // inf - inf
   3758 
   3759   if (IsNaN(result)) {
   3760     // Generated NaNs override quiet NaNs propagated from a.
   3761     if (operation_generates_nan && IsQuietNaN(a)) {
   3762       FPProcessException();
   3763       return FPDefaultNaN<T>();
   3764     } else {
   3765       return result;
   3766     }
   3767   }
   3768 
   3769   // If the operation would produce a NaN, return the default NaN.
   3770   if (operation_generates_nan) {
   3771     FPProcessException();
   3772     return FPDefaultNaN<T>();
   3773   }
   3774 
   3775   // Work around broken fma implementations for exact zero results: The sign of
   3776   // exact 0.0 results is positive unless both a and op1 * op2 are negative.
   3777   if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
   3778     return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
   3779   }
   3780 
   3781   result = FusedMultiplyAdd(op1, op2, a);
   3782   VIXL_ASSERT(!IsNaN(result));
   3783 
   3784   // Work around broken fma implementations for rounded zero results: If a is
   3785   // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
   3786   if ((a == 0.0) && (result == 0.0)) {
   3787     return copysign(0.0, sign_prod);
   3788   }
   3789 
   3790   return result;
   3791 }
   3792 
   3793 
   3794 template <typename T>
   3795 T Simulator::FPDiv(T op1, T op2) {
   3796   // NaNs should be handled elsewhere.
   3797   VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
   3798 
   3799   if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
   3800     // inf / inf and 0.0 / 0.0 return the default NaN.
   3801     FPProcessException();
   3802     return FPDefaultNaN<T>();
   3803   } else {
   3804     if (op2 == 0.0) {
   3805       FPProcessException();
   3806       if (!IsNaN(op1)) {
   3807         double op1_sign = copysign(1.0, op1);
   3808         double op2_sign = copysign(1.0, op2);
   3809         return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
   3810       }
   3811     }
   3812 
   3813     // Other cases should be handled by standard arithmetic.
   3814     return op1 / op2;
   3815   }
   3816 }
   3817 
   3818 
   3819 template <typename T>
   3820 T Simulator::FPSqrt(T op) {
   3821   if (IsNaN(op)) {
   3822     return FPProcessNaN(op);
   3823   } else if (op < T(0.0)) {
   3824     FPProcessException();
   3825     return FPDefaultNaN<T>();
   3826   } else {
   3827     return sqrt(op);
   3828   }
   3829 }
   3830 
   3831 
   3832 template <typename T>
   3833 T Simulator::FPMax(T a, T b) {
   3834   T result = FPProcessNaNs(a, b);
   3835   if (IsNaN(result)) return result;
   3836 
   3837   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3838     // a and b are zero, and the sign differs: return +0.0.
   3839     return 0.0;
   3840   } else {
   3841     return (a > b) ? a : b;
   3842   }
   3843 }
   3844 
   3845 
   3846 template <typename T>
   3847 T Simulator::FPMaxNM(T a, T b) {
   3848   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3849     a = kFP64NegativeInfinity;
   3850   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3851     b = kFP64NegativeInfinity;
   3852   }
   3853 
   3854   T result = FPProcessNaNs(a, b);
   3855   return IsNaN(result) ? result : FPMax(a, b);
   3856 }
   3857 
   3858 
   3859 template <typename T>
   3860 T Simulator::FPMin(T a, T b) {
   3861   T result = FPProcessNaNs(a, b);
   3862   if (IsNaN(result)) return result;
   3863 
   3864   if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
   3865     // a and b are zero, and the sign differs: return -0.0.
   3866     return -0.0;
   3867   } else {
   3868     return (a < b) ? a : b;
   3869   }
   3870 }
   3871 
   3872 
   3873 template <typename T>
   3874 T Simulator::FPMinNM(T a, T b) {
   3875   if (IsQuietNaN(a) && !IsQuietNaN(b)) {
   3876     a = kFP64PositiveInfinity;
   3877   } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
   3878     b = kFP64PositiveInfinity;
   3879   }
   3880 
   3881   T result = FPProcessNaNs(a, b);
   3882   return IsNaN(result) ? result : FPMin(a, b);
   3883 }
   3884 
   3885 
   3886 template <typename T>
   3887 T Simulator::FPRecipStepFused(T op1, T op2) {
   3888   const T two = 2.0;
   3889   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
   3890     return two;
   3891   } else if (IsInf(op1) || IsInf(op2)) {
   3892     // Return +inf if signs match, otherwise -inf.
   3893     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3894                                           : kFP64NegativeInfinity;
   3895   } else {
   3896     return FusedMultiplyAdd(op1, op2, two);
   3897   }
   3898 }
   3899 
   3900 template <typename T>
   3901 bool IsNormal(T value) {
   3902   return std::isnormal(value);
   3903 }
   3904 
   3905 template <>
   3906 bool IsNormal(SimFloat16 value) {
   3907   uint16_t rawbits = Float16ToRawbits(value);
   3908   uint16_t exp_mask = 0x7c00;
   3909   // Check that the exponent is neither all zeroes or all ones.
   3910   return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
   3911 }
   3912 
   3913 
   3914 template <typename T>
   3915 T Simulator::FPRSqrtStepFused(T op1, T op2) {
   3916   const T one_point_five = 1.5;
   3917   const T two = 2.0;
   3918 
   3919   if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
   3920     return one_point_five;
   3921   } else if (IsInf(op1) || IsInf(op2)) {
   3922     // Return +inf if signs match, otherwise -inf.
   3923     return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
   3924                                           : kFP64NegativeInfinity;
   3925   } else {
   3926     // The multiply-add-halve operation must be fully fused, so avoid interim
   3927     // rounding by checking which operand can be losslessly divided by two
   3928     // before doing the multiply-add.
   3929     if (IsNormal(op1 / two)) {
   3930       return FusedMultiplyAdd(op1 / two, op2, one_point_five);
   3931     } else if (IsNormal(op2 / two)) {
   3932       return FusedMultiplyAdd(op1, op2 / two, one_point_five);
   3933     } else {
   3934       // Neither operand is normal after halving: the result is dominated by
   3935       // the addition term, so just return that.
   3936       return one_point_five;
   3937     }
   3938   }
   3939 }
   3940 
   3941 int32_t Simulator::FPToFixedJS(double value) {
   3942   // The Z-flag is set when the conversion from double precision floating-point
   3943   // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
   3944   // outside the bounds of a 32-bit integer, or isn't an exact integer then the
   3945   // Z-flag is unset.
   3946   int Z = 1;
   3947   int32_t result;
   3948 
   3949   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   3950       (value == kFP64NegativeInfinity)) {
   3951     // +/- zero and infinity all return zero, however -0 and +/- Infinity also
   3952     // unset the Z-flag.
   3953     result = 0.0;
   3954     if ((value != 0.0) || std::signbit(value)) {
   3955       Z = 0;
   3956     }
   3957   } else if (std::isnan(value)) {
   3958     // NaN values unset the Z-flag and set the result to 0.
   3959     FPProcessNaN(value);
   3960     result = 0;
   3961     Z = 0;
   3962   } else {
   3963     // All other values are converted to an integer representation, rounded
   3964     // toward zero.
   3965     double int_result = std::floor(value);
   3966     double error = value - int_result;
   3967 
   3968     if ((error != 0.0) && (int_result < 0.0)) {
   3969       int_result++;
   3970     }
   3971 
   3972     // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
   3973     // write a one-liner with std::round, but the behaviour on ties is incorrect
   3974     // for our purposes.
   3975     double mod_const = static_cast<double>(UINT64_C(1) << 32);
   3976     double mod_error =
   3977         (int_result / mod_const) - std::floor(int_result / mod_const);
   3978     double constrained;
   3979     if (mod_error == 0.5) {
   3980       constrained = INT32_MIN;
   3981     } else {
   3982       constrained = int_result - mod_const * round(int_result / mod_const);
   3983     }
   3984 
   3985     VIXL_ASSERT(std::floor(constrained) == constrained);
   3986     VIXL_ASSERT(constrained >= INT32_MIN);
   3987     VIXL_ASSERT(constrained <= INT32_MAX);
   3988 
   3989     // Take the bottom 32 bits of the result as a 32-bit integer.
   3990     result = static_cast<int32_t>(constrained);
   3991 
   3992     if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
   3993         (error != 0.0)) {
   3994       // If the integer result is out of range or the conversion isn't exact,
   3995       // take exception and unset the Z-flag.
   3996       FPProcessException();
   3997       Z = 0;
   3998     }
   3999   }
   4000 
   4001   ReadNzcv().SetN(0);
   4002   ReadNzcv().SetZ(Z);
   4003   ReadNzcv().SetC(0);
   4004   ReadNzcv().SetV(0);
   4005 
   4006   return result;
   4007 }
   4008 
   4009 
   4010 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
   4011   if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
   4012       (value == kFP64NegativeInfinity)) {
   4013     return value;
   4014   } else if (IsNaN(value)) {
   4015     return FPProcessNaN(value);
   4016   }
   4017 
   4018   double int_result = std::floor(value);
   4019   double error = value - int_result;
   4020   switch (round_mode) {
   4021     case FPTieAway: {
   4022       // Take care of correctly handling the range ]-0.5, -0.0], which must
   4023       // yield -0.0.
   4024       if ((-0.5 < value) && (value < 0.0)) {
   4025         int_result = -0.0;
   4026 
   4027       } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
   4028         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   4029         // result is positive, round up.
   4030         int_result++;
   4031       }
   4032       break;
   4033     }
   4034     case FPTieEven: {
   4035       // Take care of correctly handling the range [-0.5, -0.0], which must
   4036       // yield -0.0.
   4037       if ((-0.5 <= value) && (value < 0.0)) {
   4038         int_result = -0.0;
   4039 
   4040         // If the error is greater than 0.5, or is equal to 0.5 and the integer
   4041         // result is odd, round up.
   4042       } else if ((error > 0.5) ||
   4043                  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
   4044         int_result++;
   4045       }
   4046       break;
   4047     }
   4048     case FPZero: {
   4049       // If value>0 then we take floor(value)
   4050       // otherwise, ceil(value).
   4051       if (value < 0) {
   4052         int_result = ceil(value);
   4053       }
   4054       break;
   4055     }
   4056     case FPNegativeInfinity: {
   4057       // We always use floor(value).
   4058       break;
   4059     }
   4060     case FPPositiveInfinity: {
   4061       // Take care of correctly handling the range ]-1.0, -0.0], which must
   4062       // yield -0.0.
   4063       if ((-1.0 < value) && (value < 0.0)) {
   4064         int_result = -0.0;
   4065 
   4066         // If the error is non-zero, round up.
   4067       } else if (error > 0.0) {
   4068         int_result++;
   4069       }
   4070       break;
   4071     }
   4072     default:
   4073       VIXL_UNIMPLEMENTED();
   4074   }
   4075   return int_result;
   4076 }
   4077 
   4078 
   4079 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
   4080   value = FPRoundInt(value, rmode);
   4081   if (value >= kHMaxInt) {
   4082     return kHMaxInt;
   4083   } else if (value < kHMinInt) {
   4084     return kHMinInt;
   4085   }
   4086   return IsNaN(value) ? 0 : static_cast<int16_t>(value);
   4087 }
   4088 
   4089 
   4090 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
   4091   value = FPRoundInt(value, rmode);
   4092   if (value >= kWMaxInt) {
   4093     return kWMaxInt;
   4094   } else if (value < kWMinInt) {
   4095     return kWMinInt;
   4096   }
   4097   return IsNaN(value) ? 0 : static_cast<int32_t>(value);
   4098 }
   4099 
   4100 
   4101 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
   4102   value = FPRoundInt(value, rmode);
   4103   if (value >= kXMaxInt) {
   4104     return kXMaxInt;
   4105   } else if (value < kXMinInt) {
   4106     return kXMinInt;
   4107   }
   4108   return IsNaN(value) ? 0 : static_cast<int64_t>(value);
   4109 }
   4110 
   4111 
   4112 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
   4113   value = FPRoundInt(value, rmode);
   4114   if (value >= kHMaxUInt) {
   4115     return kHMaxUInt;
   4116   } else if (value < 0.0) {
   4117     return 0;
   4118   }
   4119   return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
   4120 }
   4121 
   4122 
   4123 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
   4124   value = FPRoundInt(value, rmode);
   4125   if (value >= kWMaxUInt) {
   4126     return kWMaxUInt;
   4127   } else if (value < 0.0) {
   4128     return 0;
   4129   }
   4130   return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
   4131 }
   4132 
   4133 
   4134 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
   4135   value = FPRoundInt(value, rmode);
   4136   if (value >= kXMaxUInt) {
   4137     return kXMaxUInt;
   4138   } else if (value < 0.0) {
   4139     return 0;
   4140   }
   4141   return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
   4142 }
   4143 
   4144 
   4145 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN)                \
   4146   template <typename T>                                          \
   4147   LogicVRegister Simulator::FN(VectorFormat vform,               \
   4148                                LogicVRegister dst,               \
   4149                                const LogicVRegister& src1,       \
   4150                                const LogicVRegister& src2) {     \
   4151     dst.ClearForWrite(vform);                                    \
   4152     for (int i = 0; i < LaneCountFromFormat(vform); i++) {       \
   4153       T op1 = src1.Float<T>(i);                                  \
   4154       T op2 = src2.Float<T>(i);                                  \
   4155       T result;                                                  \
   4156       if (PROCNAN) {                                             \
   4157         result = FPProcessNaNs(op1, op2);                        \
   4158         if (!IsNaN(result)) {                                    \
   4159           result = OP(op1, op2);                                 \
   4160         }                                                        \
   4161       } else {                                                   \
   4162         result = OP(op1, op2);                                   \
   4163       }                                                          \
   4164       dst.SetFloat(i, result);                                   \
   4165     }                                                            \
   4166     return dst;                                                  \
   4167   }                                                              \
   4168                                                                  \
   4169   LogicVRegister Simulator::FN(VectorFormat vform,               \
   4170                                LogicVRegister dst,               \
   4171                                const LogicVRegister& src1,       \
   4172                                const LogicVRegister& src2) {     \
   4173     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {          \
   4174       FN<SimFloat16>(vform, dst, src1, src2);                    \
   4175     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {   \
   4176       FN<float>(vform, dst, src1, src2);                         \
   4177     } else {                                                     \
   4178       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
   4179       FN<double>(vform, dst, src1, src2);                        \
   4180     }                                                            \
   4181     return dst;                                                  \
   4182   }
   4183 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
   4184 #undef DEFINE_NEON_FP_VECTOR_OP
   4185 
   4186 
   4187 LogicVRegister Simulator::fnmul(VectorFormat vform,
   4188                                 LogicVRegister dst,
   4189                                 const LogicVRegister& src1,
   4190                                 const LogicVRegister& src2) {
   4191   SimVRegister temp;
   4192   LogicVRegister product = fmul(vform, temp, src1, src2);
   4193   return fneg(vform, dst, product);
   4194 }
   4195 
   4196 
   4197 template <typename T>
   4198 LogicVRegister Simulator::frecps(VectorFormat vform,
   4199                                  LogicVRegister dst,
   4200                                  const LogicVRegister& src1,
   4201                                  const LogicVRegister& src2) {
   4202   dst.ClearForWrite(vform);
   4203   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4204     T op1 = -src1.Float<T>(i);
   4205     T op2 = src2.Float<T>(i);
   4206     T result = FPProcessNaNs(op1, op2);
   4207     dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
   4208   }
   4209   return dst;
   4210 }
   4211 
   4212 
   4213 LogicVRegister Simulator::frecps(VectorFormat vform,
   4214                                  LogicVRegister dst,
   4215                                  const LogicVRegister& src1,
   4216                                  const LogicVRegister& src2) {
   4217   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4218     frecps<SimFloat16>(vform, dst, src1, src2);
   4219   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4220     frecps<float>(vform, dst, src1, src2);
   4221   } else {
   4222     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4223     frecps<double>(vform, dst, src1, src2);
   4224   }
   4225   return dst;
   4226 }
   4227 
   4228 
   4229 template <typename T>
   4230 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   4231                                   LogicVRegister dst,
   4232                                   const LogicVRegister& src1,
   4233                                   const LogicVRegister& src2) {
   4234   dst.ClearForWrite(vform);
   4235   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4236     T op1 = -src1.Float<T>(i);
   4237     T op2 = src2.Float<T>(i);
   4238     T result = FPProcessNaNs(op1, op2);
   4239     dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
   4240   }
   4241   return dst;
   4242 }
   4243 
   4244 
   4245 LogicVRegister Simulator::frsqrts(VectorFormat vform,
   4246                                   LogicVRegister dst,
   4247                                   const LogicVRegister& src1,
   4248                                   const LogicVRegister& src2) {
   4249   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4250     frsqrts<SimFloat16>(vform, dst, src1, src2);
   4251   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4252     frsqrts<float>(vform, dst, src1, src2);
   4253   } else {
   4254     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4255     frsqrts<double>(vform, dst, src1, src2);
   4256   }
   4257   return dst;
   4258 }
   4259 
   4260 
   4261 template <typename T>
   4262 LogicVRegister Simulator::fcmp(VectorFormat vform,
   4263                                LogicVRegister dst,
   4264                                const LogicVRegister& src1,
   4265                                const LogicVRegister& src2,
   4266                                Condition cond) {
   4267   dst.ClearForWrite(vform);
   4268   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4269     bool result = false;
   4270     T op1 = src1.Float<T>(i);
   4271     T op2 = src2.Float<T>(i);
   4272     T nan_result = FPProcessNaNs(op1, op2);
   4273     if (!IsNaN(nan_result)) {
   4274       switch (cond) {
   4275         case eq:
   4276           result = (op1 == op2);
   4277           break;
   4278         case ge:
   4279           result = (op1 >= op2);
   4280           break;
   4281         case gt:
   4282           result = (op1 > op2);
   4283           break;
   4284         case le:
   4285           result = (op1 <= op2);
   4286           break;
   4287         case lt:
   4288           result = (op1 < op2);
   4289           break;
   4290         default:
   4291           VIXL_UNREACHABLE();
   4292           break;
   4293       }
   4294     }
   4295     dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
   4296   }
   4297   return dst;
   4298 }
   4299 
   4300 
   4301 LogicVRegister Simulator::fcmp(VectorFormat vform,
   4302                                LogicVRegister dst,
   4303                                const LogicVRegister& src1,
   4304                                const LogicVRegister& src2,
   4305                                Condition cond) {
   4306   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4307     fcmp<SimFloat16>(vform, dst, src1, src2, cond);
   4308   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4309     fcmp<float>(vform, dst, src1, src2, cond);
   4310   } else {
   4311     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4312     fcmp<double>(vform, dst, src1, src2, cond);
   4313   }
   4314   return dst;
   4315 }
   4316 
   4317 
   4318 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
   4319                                     LogicVRegister dst,
   4320                                     const LogicVRegister& src,
   4321                                     Condition cond) {
   4322   SimVRegister temp;
   4323   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4324     LogicVRegister zero_reg =
   4325         dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
   4326     fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
   4327   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4328     LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
   4329     fcmp<float>(vform, dst, src, zero_reg, cond);
   4330   } else {
   4331     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4332     LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
   4333     fcmp<double>(vform, dst, src, zero_reg, cond);
   4334   }
   4335   return dst;
   4336 }
   4337 
   4338 
   4339 LogicVRegister Simulator::fabscmp(VectorFormat vform,
   4340                                   LogicVRegister dst,
   4341                                   const LogicVRegister& src1,
   4342                                   const LogicVRegister& src2,
   4343                                   Condition cond) {
   4344   SimVRegister temp1, temp2;
   4345   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4346     LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
   4347     LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
   4348     fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
   4349   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4350     LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
   4351     LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
   4352     fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
   4353   } else {
   4354     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4355     LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
   4356     LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
   4357     fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
   4358   }
   4359   return dst;
   4360 }
   4361 
   4362 
   4363 template <typename T>
   4364 LogicVRegister Simulator::fmla(VectorFormat vform,
   4365                                LogicVRegister dst,
   4366                                const LogicVRegister& src1,
   4367                                const LogicVRegister& src2) {
   4368   dst.ClearForWrite(vform);
   4369   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4370     T op1 = src1.Float<T>(i);
   4371     T op2 = src2.Float<T>(i);
   4372     T acc = dst.Float<T>(i);
   4373     T result = FPMulAdd(acc, op1, op2);
   4374     dst.SetFloat(i, result);
   4375   }
   4376   return dst;
   4377 }
   4378 
   4379 
   4380 LogicVRegister Simulator::fmla(VectorFormat vform,
   4381                                LogicVRegister dst,
   4382                                const LogicVRegister& src1,
   4383                                const LogicVRegister& src2) {
   4384   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4385     fmla<SimFloat16>(vform, dst, src1, src2);
   4386   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4387     fmla<float>(vform, dst, src1, src2);
   4388   } else {
   4389     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4390     fmla<double>(vform, dst, src1, src2);
   4391   }
   4392   return dst;
   4393 }
   4394 
   4395 
   4396 template <typename T>
   4397 LogicVRegister Simulator::fmls(VectorFormat vform,
   4398                                LogicVRegister dst,
   4399                                const LogicVRegister& src1,
   4400                                const LogicVRegister& src2) {
   4401   dst.ClearForWrite(vform);
   4402   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4403     T op1 = -src1.Float<T>(i);
   4404     T op2 = src2.Float<T>(i);
   4405     T acc = dst.Float<T>(i);
   4406     T result = FPMulAdd(acc, op1, op2);
   4407     dst.SetFloat(i, result);
   4408   }
   4409   return dst;
   4410 }
   4411 
   4412 
   4413 LogicVRegister Simulator::fmls(VectorFormat vform,
   4414                                LogicVRegister dst,
   4415                                const LogicVRegister& src1,
   4416                                const LogicVRegister& src2) {
   4417   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4418     fmls<SimFloat16>(vform, dst, src1, src2);
   4419   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4420     fmls<float>(vform, dst, src1, src2);
   4421   } else {
   4422     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4423     fmls<double>(vform, dst, src1, src2);
   4424   }
   4425   return dst;
   4426 }
   4427 
   4428 
   4429 template <typename T>
   4430 LogicVRegister Simulator::fneg(VectorFormat vform,
   4431                                LogicVRegister dst,
   4432                                const LogicVRegister& src) {
   4433   dst.ClearForWrite(vform);
   4434   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4435     T op = src.Float<T>(i);
   4436     op = -op;
   4437     dst.SetFloat(i, op);
   4438   }
   4439   return dst;
   4440 }
   4441 
   4442 
   4443 LogicVRegister Simulator::fneg(VectorFormat vform,
   4444                                LogicVRegister dst,
   4445                                const LogicVRegister& src) {
   4446   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4447     fneg<SimFloat16>(vform, dst, src);
   4448   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4449     fneg<float>(vform, dst, src);
   4450   } else {
   4451     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4452     fneg<double>(vform, dst, src);
   4453   }
   4454   return dst;
   4455 }
   4456 
   4457 
   4458 template <typename T>
   4459 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4460                                 LogicVRegister dst,
   4461                                 const LogicVRegister& src) {
   4462   dst.ClearForWrite(vform);
   4463   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4464     T op = src.Float<T>(i);
   4465     if (copysign(1.0, op) < 0.0) {
   4466       op = -op;
   4467     }
   4468     dst.SetFloat(i, op);
   4469   }
   4470   return dst;
   4471 }
   4472 
   4473 
   4474 LogicVRegister Simulator::fabs_(VectorFormat vform,
   4475                                 LogicVRegister dst,
   4476                                 const LogicVRegister& src) {
   4477   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4478     fabs_<SimFloat16>(vform, dst, src);
   4479   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4480     fabs_<float>(vform, dst, src);
   4481   } else {
   4482     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4483     fabs_<double>(vform, dst, src);
   4484   }
   4485   return dst;
   4486 }
   4487 
   4488 
   4489 LogicVRegister Simulator::fabd(VectorFormat vform,
   4490                                LogicVRegister dst,
   4491                                const LogicVRegister& src1,
   4492                                const LogicVRegister& src2) {
   4493   SimVRegister temp;
   4494   fsub(vform, temp, src1, src2);
   4495   fabs_(vform, dst, temp);
   4496   return dst;
   4497 }
   4498 
   4499 
   4500 LogicVRegister Simulator::fsqrt(VectorFormat vform,
   4501                                 LogicVRegister dst,
   4502                                 const LogicVRegister& src) {
   4503   dst.ClearForWrite(vform);
   4504   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4505     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4506       SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
   4507       dst.SetFloat(i, result);
   4508     }
   4509   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4510     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4511       float result = FPSqrt(src.Float<float>(i));
   4512       dst.SetFloat(i, result);
   4513     }
   4514   } else {
   4515     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4516     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4517       double result = FPSqrt(src.Float<double>(i));
   4518       dst.SetFloat(i, result);
   4519     }
   4520   }
   4521   return dst;
   4522 }
   4523 
   4524 
   4525 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP)                                    \
   4526   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
   4527                                 LogicVRegister dst,                            \
   4528                                 const LogicVRegister& src1,                    \
   4529                                 const LogicVRegister& src2) {                  \
   4530     SimVRegister temp1, temp2;                                                 \
   4531     uzp1(vform, temp1, src1, src2);                                            \
   4532     uzp2(vform, temp2, src1, src2);                                            \
   4533     FN(vform, dst, temp1, temp2);                                              \
   4534     return dst;                                                                \
   4535   }                                                                            \
   4536                                                                                \
   4537   LogicVRegister Simulator::FNP(VectorFormat vform,                            \
   4538                                 LogicVRegister dst,                            \
   4539                                 const LogicVRegister& src) {                   \
   4540     if (vform == kFormatH) {                                                   \
   4541       SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))),   \
   4542                            SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
   4543       dst.SetUint(vform, 0, Float16ToRawbits(result));                         \
   4544     } else if (vform == kFormatS) {                                            \
   4545       float result = OP(src.Float<float>(0), src.Float<float>(1));             \
   4546       dst.SetFloat(0, result);                                                 \
   4547     } else {                                                                   \
   4548       VIXL_ASSERT(vform == kFormatD);                                          \
   4549       double result = OP(src.Float<double>(0), src.Float<double>(1));          \
   4550       dst.SetFloat(0, result);                                                 \
   4551     }                                                                          \
   4552     dst.ClearForWrite(vform);                                                  \
   4553     return dst;                                                                \
   4554   }
   4555 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
   4556 #undef DEFINE_NEON_FP_PAIR_OP
   4557 
   4558 template <typename T>
   4559 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
   4560                                    LogicVRegister dst,
   4561                                    const LogicVRegister& src,
   4562                                    typename TFPMinMaxOp<T>::type Op) {
   4563   VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) ||
   4564               (vform == kFormat4S));
   4565   USE(vform);
   4566   T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1));
   4567   T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3));
   4568   if (vform == kFormat8H) {
   4569     T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5));
   4570     T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7));
   4571     result1 = (this->*Op)(result1, result3);
   4572     result2 = (this->*Op)(result2, result4);
   4573   }
   4574   T result = (this->*Op)(result1, result2);
   4575   dst.ClearForWrite(ScalarFormatFromFormat(vform));
   4576   dst.SetFloat<T>(0, result);
   4577   return dst;
   4578 }
   4579 
   4580 
   4581 LogicVRegister Simulator::fmaxv(VectorFormat vform,
   4582                                 LogicVRegister dst,
   4583                                 const LogicVRegister& src) {
   4584   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4585     return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>);
   4586   } else {
   4587     return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>);
   4588   }
   4589 }
   4590 
   4591 
   4592 LogicVRegister Simulator::fminv(VectorFormat vform,
   4593                                 LogicVRegister dst,
   4594                                 const LogicVRegister& src) {
   4595   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4596     return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>);
   4597   } else {
   4598     return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>);
   4599   }
   4600 }
   4601 
   4602 
   4603 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
   4604                                   LogicVRegister dst,
   4605                                   const LogicVRegister& src) {
   4606   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4607     return fminmaxv<SimFloat16>(vform,
   4608                                 dst,
   4609                                 src,
   4610                                 &Simulator::FPMaxNM<SimFloat16>);
   4611   } else {
   4612     return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>);
   4613   }
   4614 }
   4615 
   4616 
   4617 LogicVRegister Simulator::fminnmv(VectorFormat vform,
   4618                                   LogicVRegister dst,
   4619                                   const LogicVRegister& src) {
   4620   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4621     return fminmaxv<SimFloat16>(vform,
   4622                                 dst,
   4623                                 src,
   4624                                 &Simulator::FPMinNM<SimFloat16>);
   4625   } else {
   4626     return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>);
   4627   }
   4628 }
   4629 
   4630 
   4631 LogicVRegister Simulator::fmul(VectorFormat vform,
   4632                                LogicVRegister dst,
   4633                                const LogicVRegister& src1,
   4634                                const LogicVRegister& src2,
   4635                                int index) {
   4636   dst.ClearForWrite(vform);
   4637   SimVRegister temp;
   4638   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4639     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
   4640     fmul<SimFloat16>(vform, dst, src1, index_reg);
   4641   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4642     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4643     fmul<float>(vform, dst, src1, index_reg);
   4644   } else {
   4645     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4646     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4647     fmul<double>(vform, dst, src1, index_reg);
   4648   }
   4649   return dst;
   4650 }
   4651 
   4652 
   4653 LogicVRegister Simulator::fmla(VectorFormat vform,
   4654                                LogicVRegister dst,
   4655                                const LogicVRegister& src1,
   4656                                const LogicVRegister& src2,
   4657                                int index) {
   4658   dst.ClearForWrite(vform);
   4659   SimVRegister temp;
   4660   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4661     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
   4662     fmla<SimFloat16>(vform, dst, src1, index_reg);
   4663   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4664     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4665     fmla<float>(vform, dst, src1, index_reg);
   4666   } else {
   4667     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4668     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4669     fmla<double>(vform, dst, src1, index_reg);
   4670   }
   4671   return dst;
   4672 }
   4673 
   4674 
   4675 LogicVRegister Simulator::fmls(VectorFormat vform,
   4676                                LogicVRegister dst,
   4677                                const LogicVRegister& src1,
   4678                                const LogicVRegister& src2,
   4679                                int index) {
   4680   dst.ClearForWrite(vform);
   4681   SimVRegister temp;
   4682   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4683     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
   4684     fmls<SimFloat16>(vform, dst, src1, index_reg);
   4685   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4686     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4687     fmls<float>(vform, dst, src1, index_reg);
   4688   } else {
   4689     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4690     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4691     fmls<double>(vform, dst, src1, index_reg);
   4692   }
   4693   return dst;
   4694 }
   4695 
   4696 
   4697 LogicVRegister Simulator::fmulx(VectorFormat vform,
   4698                                 LogicVRegister dst,
   4699                                 const LogicVRegister& src1,
   4700                                 const LogicVRegister& src2,
   4701                                 int index) {
   4702   dst.ClearForWrite(vform);
   4703   SimVRegister temp;
   4704   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4705     LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
   4706     fmulx<SimFloat16>(vform, dst, src1, index_reg);
   4707   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4708     LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
   4709     fmulx<float>(vform, dst, src1, index_reg);
   4710   } else {
   4711     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4712     LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
   4713     fmulx<double>(vform, dst, src1, index_reg);
   4714   }
   4715   return dst;
   4716 }
   4717 
   4718 
   4719 LogicVRegister Simulator::frint(VectorFormat vform,
   4720                                 LogicVRegister dst,
   4721                                 const LogicVRegister& src,
   4722                                 FPRounding rounding_mode,
   4723                                 bool inexact_exception) {
   4724   dst.ClearForWrite(vform);
   4725   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4726     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4727       SimFloat16 input = src.Float<SimFloat16>(i);
   4728       SimFloat16 rounded = FPRoundInt(input, rounding_mode);
   4729       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
   4730         FPProcessException();
   4731       }
   4732       dst.SetFloat<SimFloat16>(i, rounded);
   4733     }
   4734   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4735     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4736       float input = src.Float<float>(i);
   4737       float rounded = FPRoundInt(input, rounding_mode);
   4738       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
   4739         FPProcessException();
   4740       }
   4741       dst.SetFloat<float>(i, rounded);
   4742     }
   4743   } else {
   4744     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4745     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4746       double input = src.Float<double>(i);
   4747       double rounded = FPRoundInt(input, rounding_mode);
   4748       if (inexact_exception && !IsNaN(input) && (input != rounded)) {
   4749         FPProcessException();
   4750       }
   4751       dst.SetFloat<double>(i, rounded);
   4752     }
   4753   }
   4754   return dst;
   4755 }
   4756 
   4757 
   4758 LogicVRegister Simulator::fcvts(VectorFormat vform,
   4759                                 LogicVRegister dst,
   4760                                 const LogicVRegister& src,
   4761                                 FPRounding rounding_mode,
   4762                                 int fbits) {
   4763   dst.ClearForWrite(vform);
   4764   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4765     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4766       SimFloat16 op =
   4767           static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
   4768       dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
   4769     }
   4770   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4771     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4772       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4773       dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
   4774     }
   4775   } else {
   4776     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4777     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4778       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4779       dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
   4780     }
   4781   }
   4782   return dst;
   4783 }
   4784 
   4785 
   4786 LogicVRegister Simulator::fcvtu(VectorFormat vform,
   4787                                 LogicVRegister dst,
   4788                                 const LogicVRegister& src,
   4789                                 FPRounding rounding_mode,
   4790                                 int fbits) {
   4791   dst.ClearForWrite(vform);
   4792   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4793     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4794       SimFloat16 op =
   4795           static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
   4796       dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
   4797     }
   4798   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4799     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4800       float op = src.Float<float>(i) * std::pow(2.0f, fbits);
   4801       dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
   4802     }
   4803   } else {
   4804     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4805     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4806       double op = src.Float<double>(i) * std::pow(2.0, fbits);
   4807       dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
   4808     }
   4809   }
   4810   return dst;
   4811 }
   4812 
   4813 
   4814 LogicVRegister Simulator::fcvtl(VectorFormat vform,
   4815                                 LogicVRegister dst,
   4816                                 const LogicVRegister& src) {
   4817   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4818     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4819       // TODO: Full support for SimFloat16 in SimRegister(s).
   4820       dst.SetFloat(i,
   4821                    FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
   4822                              ReadDN()));
   4823     }
   4824   } else {
   4825     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4826     for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
   4827       dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
   4828     }
   4829   }
   4830   return dst;
   4831 }
   4832 
   4833 
   4834 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
   4835                                  LogicVRegister dst,
   4836                                  const LogicVRegister& src) {
   4837   int lane_count = LaneCountFromFormat(vform);
   4838   if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   4839     for (int i = 0; i < lane_count; i++) {
   4840       // TODO: Full support for SimFloat16 in SimRegister(s).
   4841       dst.SetFloat(i,
   4842                    FPToFloat(RawbitsToFloat16(
   4843                                  src.Float<uint16_t>(i + lane_count)),
   4844                              ReadDN()));
   4845     }
   4846   } else {
   4847     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   4848     for (int i = 0; i < lane_count; i++) {
   4849       dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
   4850     }
   4851   }
   4852   return dst;
   4853 }
   4854 
   4855 
   4856 LogicVRegister Simulator::fcvtn(VectorFormat vform,
   4857                                 LogicVRegister dst,
   4858                                 const LogicVRegister& src) {
   4859   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4860     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4861       dst.SetFloat(i,
   4862                    Float16ToRawbits(
   4863                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
   4864     }
   4865   } else {
   4866     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4867     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4868       dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
   4869     }
   4870   }
   4871   return dst;
   4872 }
   4873 
   4874 
   4875 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
   4876                                  LogicVRegister dst,
   4877                                  const LogicVRegister& src) {
   4878   int lane_count = LaneCountFromFormat(vform) / 2;
   4879   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   4880     for (int i = lane_count - 1; i >= 0; i--) {
   4881       dst.SetFloat(i + lane_count,
   4882                    Float16ToRawbits(
   4883                        FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
   4884     }
   4885   } else {
   4886     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4887     for (int i = lane_count - 1; i >= 0; i--) {
   4888       dst.SetFloat(i + lane_count,
   4889                    FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
   4890     }
   4891   }
   4892   return dst;
   4893 }
   4894 
   4895 
   4896 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
   4897                                  LogicVRegister dst,
   4898                                  const LogicVRegister& src) {
   4899   dst.ClearForWrite(vform);
   4900   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4901   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   4902     dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
   4903   }
   4904   return dst;
   4905 }
   4906 
   4907 
   4908 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
   4909                                   LogicVRegister dst,
   4910                                   const LogicVRegister& src) {
   4911   VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
   4912   int lane_count = LaneCountFromFormat(vform) / 2;
   4913   for (int i = lane_count - 1; i >= 0; i--) {
   4914     dst.SetFloat(i + lane_count,
   4915                  FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
   4916   }
   4917   return dst;
   4918 }
   4919 
   4920 
   4921 // Based on reference C function recip_sqrt_estimate from ARM ARM.
   4922 double Simulator::recip_sqrt_estimate(double a) {
   4923   int q0, q1, s;
   4924   double r;
   4925   if (a < 0.5) {
   4926     q0 = static_cast<int>(a * 512.0);
   4927     r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
   4928   } else {
   4929     q1 = static_cast<int>(a * 256.0);
   4930     r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
   4931   }
   4932   s = static_cast<int>(256.0 * r + 0.5);
   4933   return static_cast<double>(s) / 256.0;
   4934 }
   4935 
   4936 
   4937 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
   4938   return ExtractUnsignedBitfield64(start_bit, end_bit, val);
   4939 }
   4940 
   4941 
   4942 template <typename T>
   4943 T Simulator::FPRecipSqrtEstimate(T op) {
   4944   if (IsNaN(op)) {
   4945     return FPProcessNaN(op);
   4946   } else if (op == 0.0) {
   4947     if (copysign(1.0, op) < 0.0) {
   4948       return kFP64NegativeInfinity;
   4949     } else {
   4950       return kFP64PositiveInfinity;
   4951     }
   4952   } else if (copysign(1.0, op) < 0.0) {
   4953     FPProcessException();
   4954     return FPDefaultNaN<T>();
   4955   } else if (IsInf(op)) {
   4956     return 0.0;
   4957   } else {
   4958     uint64_t fraction;
   4959     int exp, result_exp;
   4960 
   4961     if (IsFloat16<T>()) {
   4962       exp = Float16Exp(op);
   4963       fraction = Float16Mantissa(op);
   4964       fraction <<= 42;
   4965     } else if (IsFloat32<T>()) {
   4966       exp = FloatExp(op);
   4967       fraction = FloatMantissa(op);
   4968       fraction <<= 29;
   4969     } else {
   4970       VIXL_ASSERT(IsFloat64<T>());
   4971       exp = DoubleExp(op);
   4972       fraction = DoubleMantissa(op);
   4973     }
   4974 
   4975     if (exp == 0) {
   4976       while (Bits(fraction, 51, 51) == 0) {
   4977         fraction = Bits(fraction, 50, 0) << 1;
   4978         exp -= 1;
   4979       }
   4980       fraction = Bits(fraction, 50, 0) << 1;
   4981     }
   4982 
   4983     double scaled;
   4984     if (Bits(exp, 0, 0) == 0) {
   4985       scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
   4986     } else {
   4987       scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
   4988     }
   4989 
   4990     if (IsFloat16<T>()) {
   4991       result_exp = (44 - exp) / 2;
   4992     } else if (IsFloat32<T>()) {
   4993       result_exp = (380 - exp) / 2;
   4994     } else {
   4995       VIXL_ASSERT(IsFloat64<T>());
   4996       result_exp = (3068 - exp) / 2;
   4997     }
   4998 
   4999     uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
   5000 
   5001     if (IsFloat16<T>()) {
   5002       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
   5003       uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
   5004       return Float16Pack(0, exp_bits, est_bits);
   5005     } else if (IsFloat32<T>()) {
   5006       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   5007       uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
   5008       return FloatPack(0, exp_bits, est_bits);
   5009     } else {
   5010       VIXL_ASSERT(IsFloat64<T>());
   5011       return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
   5012     }
   5013   }
   5014 }
   5015 
   5016 
   5017 LogicVRegister Simulator::frsqrte(VectorFormat vform,
   5018                                   LogicVRegister dst,
   5019                                   const LogicVRegister& src) {
   5020   dst.ClearForWrite(vform);
   5021   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   5022     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5023       SimFloat16 input = src.Float<SimFloat16>(i);
   5024       dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input));
   5025     }
   5026   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   5027     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5028       float input = src.Float<float>(i);
   5029       dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
   5030     }
   5031   } else {
   5032     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   5033     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5034       double input = src.Float<double>(i);
   5035       dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
   5036     }
   5037   }
   5038   return dst;
   5039 }
   5040 
   5041 template <typename T>
   5042 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
   5043   uint32_t sign;
   5044 
   5045   if (IsFloat16<T>()) {
   5046     sign = Float16Sign(op);
   5047   } else if (IsFloat32<T>()) {
   5048     sign = FloatSign(op);
   5049   } else {
   5050     VIXL_ASSERT(IsFloat64<T>());
   5051     sign = DoubleSign(op);
   5052   }
   5053 
   5054   if (IsNaN(op)) {
   5055     return FPProcessNaN(op);
   5056   } else if (IsInf(op)) {
   5057     return (sign == 1) ? -0.0 : 0.0;
   5058   } else if (op == 0.0) {
   5059     FPProcessException();  // FPExc_DivideByZero exception.
   5060     return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   5061   } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
   5062              (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
   5063              (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
   5064     bool overflow_to_inf = false;
   5065     switch (rounding) {
   5066       case FPTieEven:
   5067         overflow_to_inf = true;
   5068         break;
   5069       case FPPositiveInfinity:
   5070         overflow_to_inf = (sign == 0);
   5071         break;
   5072       case FPNegativeInfinity:
   5073         overflow_to_inf = (sign == 1);
   5074         break;
   5075       case FPZero:
   5076         overflow_to_inf = false;
   5077         break;
   5078       default:
   5079         break;
   5080     }
   5081     FPProcessException();  // FPExc_Overflow and FPExc_Inexact.
   5082     if (overflow_to_inf) {
   5083       return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
   5084     } else {
   5085       // Return FPMaxNormal(sign).
   5086       if (IsFloat16<T>()) {
   5087         return Float16Pack(sign, 0x1f, 0x3ff);
   5088       } else if (IsFloat32<T>()) {
   5089         return FloatPack(sign, 0xfe, 0x07fffff);
   5090       } else {
   5091         VIXL_ASSERT(IsFloat64<T>());
   5092         return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
   5093       }
   5094     }
   5095   } else {
   5096     uint64_t fraction;
   5097     int exp, result_exp;
   5098     uint32_t sign;
   5099 
   5100     if (IsFloat16<T>()) {
   5101       sign = Float16Sign(op);
   5102       exp = Float16Exp(op);
   5103       fraction = Float16Mantissa(op);
   5104       fraction <<= 42;
   5105     } else if (IsFloat32<T>()) {
   5106       sign = FloatSign(op);
   5107       exp = FloatExp(op);
   5108       fraction = FloatMantissa(op);
   5109       fraction <<= 29;
   5110     } else {
   5111       VIXL_ASSERT(IsFloat64<T>());
   5112       sign = DoubleSign(op);
   5113       exp = DoubleExp(op);
   5114       fraction = DoubleMantissa(op);
   5115     }
   5116 
   5117     if (exp == 0) {
   5118       if (Bits(fraction, 51, 51) == 0) {
   5119         exp -= 1;
   5120         fraction = Bits(fraction, 49, 0) << 2;
   5121       } else {
   5122         fraction = Bits(fraction, 50, 0) << 1;
   5123       }
   5124     }
   5125 
   5126     double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
   5127 
   5128     if (IsFloat16<T>()) {
   5129       result_exp = (29 - exp);  // In range 29-30 = -1 to 29+1 = 30.
   5130     } else if (IsFloat32<T>()) {
   5131       result_exp = (253 - exp);  // In range 253-254 = -1 to 253+1 = 254.
   5132     } else {
   5133       VIXL_ASSERT(IsFloat64<T>());
   5134       result_exp = (2045 - exp);  // In range 2045-2046 = -1 to 2045+1 = 2046.
   5135     }
   5136 
   5137     double estimate = recip_estimate(scaled);
   5138 
   5139     fraction = DoubleMantissa(estimate);
   5140     if (result_exp == 0) {
   5141       fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
   5142     } else if (result_exp == -1) {
   5143       fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
   5144       result_exp = 0;
   5145     }
   5146     if (IsFloat16<T>()) {
   5147       uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
   5148       uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
   5149       return Float16Pack(sign, exp_bits, frac_bits);
   5150     } else if (IsFloat32<T>()) {
   5151       uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
   5152       uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
   5153       return FloatPack(sign, exp_bits, frac_bits);
   5154     } else {
   5155       VIXL_ASSERT(IsFloat64<T>());
   5156       return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
   5157     }
   5158   }
   5159 }
   5160 
   5161 
   5162 LogicVRegister Simulator::frecpe(VectorFormat vform,
   5163                                  LogicVRegister dst,
   5164                                  const LogicVRegister& src,
   5165                                  FPRounding round) {
   5166   dst.ClearForWrite(vform);
   5167   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   5168     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5169       SimFloat16 input = src.Float<SimFloat16>(i);
   5170       dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round));
   5171     }
   5172   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   5173     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5174       float input = src.Float<float>(i);
   5175       dst.SetFloat(i, FPRecipEstimate<float>(input, round));
   5176     }
   5177   } else {
   5178     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   5179     for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5180       double input = src.Float<double>(i);
   5181       dst.SetFloat(i, FPRecipEstimate<double>(input, round));
   5182     }
   5183   }
   5184   return dst;
   5185 }
   5186 
   5187 
   5188 LogicVRegister Simulator::ursqrte(VectorFormat vform,
   5189                                   LogicVRegister dst,
   5190                                   const LogicVRegister& src) {
   5191   dst.ClearForWrite(vform);
   5192   uint64_t operand;
   5193   uint32_t result;
   5194   double dp_operand, dp_result;
   5195   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5196     operand = src.Uint(vform, i);
   5197     if (operand <= 0x3FFFFFFF) {
   5198       result = 0xFFFFFFFF;
   5199     } else {
   5200       dp_operand = operand * std::pow(2.0, -32);
   5201       dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
   5202       result = static_cast<uint32_t>(dp_result);
   5203     }
   5204     dst.SetUint(vform, i, result);
   5205   }
   5206   return dst;
   5207 }
   5208 
   5209 
   5210 // Based on reference C function recip_estimate from ARM ARM.
   5211 double Simulator::recip_estimate(double a) {
   5212   int q, s;
   5213   double r;
   5214   q = static_cast<int>(a * 512.0);
   5215   r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
   5216   s = static_cast<int>(256.0 * r + 0.5);
   5217   return static_cast<double>(s) / 256.0;
   5218 }
   5219 
   5220 
   5221 LogicVRegister Simulator::urecpe(VectorFormat vform,
   5222                                  LogicVRegister dst,
   5223                                  const LogicVRegister& src) {
   5224   dst.ClearForWrite(vform);
   5225   uint64_t operand;
   5226   uint32_t result;
   5227   double dp_operand, dp_result;
   5228   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5229     operand = src.Uint(vform, i);
   5230     if (operand <= 0x7FFFFFFF) {
   5231       result = 0xFFFFFFFF;
   5232     } else {
   5233       dp_operand = operand * std::pow(2.0, -32);
   5234       dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
   5235       result = static_cast<uint32_t>(dp_result);
   5236     }
   5237     dst.SetUint(vform, i, result);
   5238   }
   5239   return dst;
   5240 }
   5241 
   5242 template <typename T>
   5243 LogicVRegister Simulator::frecpx(VectorFormat vform,
   5244                                  LogicVRegister dst,
   5245                                  const LogicVRegister& src) {
   5246   dst.ClearForWrite(vform);
   5247   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5248     T op = src.Float<T>(i);
   5249     T result;
   5250     if (IsNaN(op)) {
   5251       result = FPProcessNaN(op);
   5252     } else {
   5253       int exp;
   5254       uint32_t sign;
   5255       if (IsFloat16<T>()) {
   5256         sign = Float16Sign(op);
   5257         exp = Float16Exp(op);
   5258         exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
   5259         result = Float16Pack(sign, exp, 0);
   5260       } else if (IsFloat32<T>()) {
   5261         sign = FloatSign(op);
   5262         exp = FloatExp(op);
   5263         exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
   5264         result = FloatPack(sign, exp, 0);
   5265       } else {
   5266         VIXL_ASSERT(IsFloat64<T>());
   5267         sign = DoubleSign(op);
   5268         exp = DoubleExp(op);
   5269         exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
   5270         result = DoublePack(sign, exp, 0);
   5271       }
   5272     }
   5273     dst.SetFloat(i, result);
   5274   }
   5275   return dst;
   5276 }
   5277 
   5278 
   5279 LogicVRegister Simulator::frecpx(VectorFormat vform,
   5280                                  LogicVRegister dst,
   5281                                  const LogicVRegister& src) {
   5282   if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   5283     frecpx<SimFloat16>(vform, dst, src);
   5284   } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   5285     frecpx<float>(vform, dst, src);
   5286   } else {
   5287     VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   5288     frecpx<double>(vform, dst, src);
   5289   }
   5290   return dst;
   5291 }
   5292 
   5293 LogicVRegister Simulator::scvtf(VectorFormat vform,
   5294                                 LogicVRegister dst,
   5295                                 const LogicVRegister& src,
   5296                                 int fbits,
   5297                                 FPRounding round) {
   5298   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5299     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   5300       SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
   5301       dst.SetFloat<SimFloat16>(i, result);
   5302     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   5303       float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
   5304       dst.SetFloat<float>(i, result);
   5305     } else {
   5306       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   5307       double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
   5308       dst.SetFloat<double>(i, result);
   5309     }
   5310   }
   5311   return dst;
   5312 }
   5313 
   5314 
   5315 LogicVRegister Simulator::ucvtf(VectorFormat vform,
   5316                                 LogicVRegister dst,
   5317                                 const LogicVRegister& src,
   5318                                 int fbits,
   5319                                 FPRounding round) {
   5320   for (int i = 0; i < LaneCountFromFormat(vform); i++) {
   5321     if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
   5322       SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
   5323       dst.SetFloat<SimFloat16>(i, result);
   5324     } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
   5325       float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
   5326       dst.SetFloat<float>(i, result);
   5327     } else {
   5328       VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
   5329       double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
   5330       dst.SetFloat<double>(i, result);
   5331     }
   5332   }
   5333   return dst;
   5334 }
   5335 
   5336 
   5337 }  // namespace aarch64
   5338 }  // namespace vixl
   5339 
   5340 #endif  // VIXL_INCLUDE_SIMULATOR_AARCH64
   5341