Home | History | Annotate | Download | only in arm64
      1 
      2 #include <stdio.h>
      3 #include <assert.h>
      4 #include <malloc.h>  // memalign
      5 #include <string.h>  // memset
      6 #include <math.h>    // isnormal
      7 
      8 typedef  unsigned char           UChar;
      9 typedef  unsigned short int      UShort;
     10 typedef  unsigned int            UInt;
     11 typedef  signed int              Int;
     12 typedef  unsigned char           UChar;
     13 typedef  unsigned long long int  ULong;
     14 typedef  signed long long int    Long;
     15 typedef  double                  Double;
     16 typedef  float                   Float;
     17 
     18 typedef  unsigned char           Bool;
     19 #define False ((Bool)0)
     20 #define True  ((Bool)1)
     21 
     22 
     23 #define ITERS 1
     24 
     25 typedef
     26   enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
     27   LaneTy;
     28 
     29 union _V128 {
     30    UChar  u8[16];
     31    UShort u16[8];
     32    UInt   u32[4];
     33    ULong  u64[2];
     34    Float  f32[4];
     35    Double f64[2];
     36 };
     37 typedef  union _V128   V128;
     38 
     39 static inline UChar randUChar ( void )
     40 {
     41    static UInt seed = 80021;
     42    seed = 1103515245 * seed + 12345;
     43    return (seed >> 17) & 0xFF;
     44 }
     45 
     46 static ULong randULong ( LaneTy ty )
     47 {
     48    Int i;
     49    ULong r = 0;
     50    for (i = 0; i < 8; i++) {
     51       r = (r << 8) | (ULong)(0xFF & randUChar());
     52    }
     53    return r;
     54 }
     55 
     56 /* Generates a random V128.  Ensures that that it contains normalised
     57    FP numbers when viewed as either F32x4 or F64x2, so that it is
     58    reasonable to use in FP test cases. */
     59 static void randV128 ( /*OUT*/V128* v, LaneTy ty )
     60 {
     61    static UInt nCalls = 0, nIters = 0;
     62    Int i;
     63    nCalls++;
     64    while (1) {
     65       nIters++;
     66       for (i = 0; i < 16; i++) {
     67          v->u8[i] = randUChar();
     68       }
     69       if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
     70           && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
     71         break;
     72    }
     73    if (0 == (nCalls & 0xFF))
     74       printf("randV128: %u calls, %u iters\n", nCalls, nIters);
     75 }
     76 
     77 static void showV128 ( V128* v )
     78 {
     79    Int i;
     80    for (i = 15; i >= 0; i--)
     81       printf("%02x", (Int)v->u8[i]);
     82 }
     83 
     84 static void showBlock ( const char* msg, V128* block, Int nBlock )
     85 {
     86    Int i;
     87    printf("%s\n", msg);
     88    for (i = 0; i < nBlock; i++) {
     89       printf("  ");
     90       showV128(&block[i]);
     91       printf("\n");
     92    }
     93 }
     94 
     95 __attribute__((unused))
     96 static void* memalign16(size_t szB)
     97 {
     98    void* x;
     99    x = memalign(16, szB);
    100    assert(x);
    101    assert(0 == ((16-1) & (unsigned long)x));
    102    return x;
    103 }
    104 
    105 static ULong dup4x16 ( UInt x )
    106 {
    107    ULong r = x & 0xF;
    108    r |= (r << 4);
    109    r |= (r << 8);
    110    r |= (r << 16);
    111    r |= (r << 32);
    112    return r;
    113 }
    114 
    115 // Generate a random double-precision number.  About 1 time in 2,
    116 // instead return a special value (+/- Inf, +/-Nan, denorm).
    117 // This ensures that many of the groups of 4 calls here will
    118 // return a special value.
    119 
    120 static Double special_values[10];
    121 static Bool   special_values_initted = False;
    122 
    123 static __attribute__((noinline))
    124 Double negate ( Double d ) { return -d; }
    125 static __attribute__((noinline))
    126 Double divf64 ( Double x, Double y ) { return x/y; }
    127 
    128 static __attribute__((noinline))
    129 Double plusZero  ( void ) { return 0.0; }
    130 static __attribute__((noinline))
    131 Double minusZero ( void ) { return negate(plusZero()); }
    132 
    133 static __attribute__((noinline))
    134 Double plusOne  ( void ) { return 1.0; }
    135 static __attribute__((noinline))
    136 Double minusOne ( void ) { return negate(plusOne()); }
    137 
    138 static __attribute__((noinline))
    139 Double plusInf   ( void ) { return 1.0 / 0.0; }
    140 static __attribute__((noinline))
    141 Double minusInf  ( void ) { return negate(plusInf()); }
    142 
    143 static __attribute__((noinline))
    144 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
    145 static __attribute__((noinline))
    146 Double minusNaN ( void ) { return negate(plusNaN()); }
    147 
    148 static __attribute__((noinline))
    149 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
    150 static __attribute__((noinline))
    151 Double minusDenorm ( void ) { return negate(plusDenorm()); }
    152 
    153 
    154 static void ensure_special_values_initted ( void )
    155 {
    156    if (special_values_initted) return;
    157    special_values[0] = plusZero();
    158    special_values[1] = minusZero();
    159    special_values[2] = plusOne();
    160    special_values[3] = minusOne();
    161    special_values[4] = plusInf();
    162    special_values[5] = minusInf();
    163    special_values[6] = plusNaN();
    164    special_values[7] = minusNaN();
    165    special_values[8] = plusDenorm();
    166    special_values[9] = minusDenorm();
    167    special_values_initted = True;
    168    int i;
    169    printf("\n");
    170    for (i = 0; i < 10; i++) {
    171       printf("special value %d = %e\n", i, special_values[i]);
    172    }
    173    printf("\n");
    174 }
    175 
    176 static Double randDouble ( void )
    177 {
    178    ensure_special_values_initted();
    179    UChar c = randUChar();
    180    if (c >= 128) {
    181       // return a normal number most of the time.
    182       // 0 .. 2^63-1
    183       ULong u64 = randULong(TyDF);
    184       // -2^62 .. 2^62-1
    185       Long s64 = (Long)u64;
    186       // -2^55 .. 2^55-1
    187       s64 >>= (62-55);
    188       // and now as a float
    189       return (Double)s64;
    190    }
    191    c = randUChar() % 10;
    192    return special_values[c];
    193 }
    194 
    195 static Float randFloat ( void )
    196 {
    197    ensure_special_values_initted();
    198    UChar c = randUChar();
    199    if (c >= 128) {
    200       // return a normal number most of the time.
    201       // 0 .. 2^63-1
    202       ULong u64 = randULong(TyDF);
    203       // -2^62 .. 2^62-1
    204       Long s64 = (Long)u64;
    205       // -2^25 .. 2^25-1
    206       s64 >>= (62-25);
    207       // and now as a float
    208       return (Float)s64;
    209    }
    210    c = randUChar() % 10;
    211    return special_values[c];
    212 }
    213 
    214 void randBlock_Doubles ( V128* block, Int nBlock )
    215 {
    216    Int i;
    217    for (i = 0; i < nBlock; i++) {
    218       block[i].f64[0] = randDouble();
    219       block[i].f64[1] = randDouble();
    220    }
    221 }
    222 
    223 void randBlock_Floats ( V128* block, Int nBlock )
    224 {
    225    Int i;
    226    for (i = 0; i < nBlock; i++) {
    227       block[i].f32[0] = randFloat();
    228       block[i].f32[1] = randFloat();
    229       block[i].f32[2] = randFloat();
    230       block[i].f32[3] = randFloat();
    231    }
    232 }
    233 
    234 
    235 /* ---------------------------------------------------------------- */
    236 /* -- Parameterisable test macros                                -- */
    237 /* ---------------------------------------------------------------- */
    238 
    239 #define DO50(_action) \
    240    do { \
    241       Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
    242    } while (0)
    243 
    244 
    245 /* Note this also sets the destination register to a known value (0x55..55)
    246    since it can sometimes be an input to the instruction too. */
    247 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
    248   __attribute__((noinline)) \
    249   static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
    250      Int i; \
    251      for (i = 0; i < ITERS; i++) { \
    252         V128 block[2+1]; \
    253         memset(block, 0x55, sizeof(block)); \
    254         randV128(&block[0], ty); \
    255         randV128(&block[1], ty); \
    256         __asm__ __volatile__( \
    257            "mov   x30, #0 ; msr fpsr, x30 ; " \
    258            "ldr   q7, [%0, #0]   ; " \
    259            "ldr   q8, [%0, #16]   ; " \
    260            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
    261            "str   q8, [%0, #16] ; " \
    262            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    263            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    264         ); \
    265         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN); \
    266         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    267         showV128(&block[0]); printf("  "); \
    268         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    269      } \
    270   }
    271 
    272 
    273 /* Note this also sets the destination register to a known value (0x55..55)
    274    since it can sometimes be an input to the instruction too. */
    275 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM)  \
    276   __attribute__((noinline)) \
    277   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
    278      Int i; \
    279      for (i = 0; i < ITERS; i++) { \
    280         V128 block[3+1]; \
    281         memset(block, 0x55, sizeof(block)); \
    282         randV128(&block[0], ty); \
    283         randV128(&block[1], ty); \
    284         randV128(&block[2], ty); \
    285         __asm__ __volatile__( \
    286            "mov   x30, #0 ; msr fpsr, x30 ; " \
    287            "ldr   q7, [%0, #0]   ; " \
    288            "ldr   q8, [%0, #16]   ; " \
    289            "ldr   q9, [%0, #32]   ; " \
    290            #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
    291            "str   q9, [%0, #32] ; " \
    292            "mrs   x30, fpsr ; str x30, [%0, #48] " \
    293            : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
    294         ); \
    295         printf(#INSN   " v9." #SUFFIXD \
    296                ", v7." #SUFFIXN ", v8." #SUFFIXM "  ");   \
    297         UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
    298         showV128(&block[0]); printf("  "); \
    299         showV128(&block[1]); printf("  "); \
    300         showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
    301      } \
    302   }
    303 
    304 
    305 /* Note this also sets the destination register to a known value (0x55..55)
    306    since it can sometimes be an input to the instruction too. */
    307 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
    308   __attribute__((noinline)) \
    309   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
    310      Int i; \
    311      for (i = 0; i < ITERS; i++) { \
    312         V128 block[2+1]; \
    313         memset(block, 0x55, sizeof(block)); \
    314         randV128(&block[0], ty); \
    315         randV128(&block[1], ty); \
    316         __asm__ __volatile__( \
    317            "mov   x30, #0 ; msr fpsr, x30 ; " \
    318            "ldr   q7, [%0, #0]   ; " \
    319            "ldr   q8, [%0, #16]   ; " \
    320            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
    321            "str   q8, [%0, #16] ; " \
    322            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    323            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    324         ); \
    325         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT "  "); \
    326         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    327         showV128(&block[0]); printf("  "); \
    328         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    329      } \
    330   }
    331 
    332 
    333 /* Generate a test that involves one integer reg and one vector reg,
    334    with no bias as towards which is input or output. */
    335 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
    336   __attribute__((noinline)) \
    337   static void test_##TESTNAME ( LaneTy ty ) { \
    338      Int i; \
    339      assert(INTREGNO != 30); \
    340      for (i = 0; i < ITERS; i++) { \
    341         V128 block[4+1]; \
    342         memset(block, 0x55, sizeof(block)); \
    343         randV128(&block[0], ty); \
    344         randV128(&block[1], ty); \
    345         randV128(&block[2], ty); \
    346         randV128(&block[3], ty); \
    347         __asm__ __volatile__( \
    348            "mov   x30, #0 ; msr fpsr, x30 ; " \
    349            "ldr   q"#VECREGNO", [%0, #0]  ; " \
    350            "ldr   x"#INTREGNO", [%0, #16] ; " \
    351            INSN " ; " \
    352            "str   q"#VECREGNO", [%0, #32] ; " \
    353            "str   x"#INTREGNO", [%0, #48] ; " \
    354            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    355            : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
    356         ); \
    357         printf(INSN   "   "); \
    358         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    359         showV128(&block[0]); printf("  "); \
    360         showV128(&block[1]); printf("  "); \
    361         showV128(&block[2]); printf("  "); \
    362         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    363      } \
    364   }
    365 
    366 
    367 /* Generate a test that involves two vector regs,
    368    with no bias as towards which is input or output.
    369    It's OK to use x10 as scratch.*/
    370 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
    371   __attribute__((noinline)) \
    372   static void test_##TESTNAME ( LaneTy ty ) { \
    373      Int i; \
    374      for (i = 0; i < ITERS; i++) { \
    375         V128 block[4+1]; \
    376         memset(block, 0x55, sizeof(block)); \
    377         randV128(&block[0], ty); \
    378         randV128(&block[1], ty); \
    379         randV128(&block[2], ty); \
    380         randV128(&block[3], ty); \
    381         __asm__ __volatile__( \
    382            "mov   x30, #0 ; msr fpsr, x30 ; " \
    383            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    384            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    385            INSN " ; " \
    386            "str   q"#VECREG1NO", [%0, #32] ; " \
    387            "str   q"#VECREG2NO", [%0, #48] ; " \
    388            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    389            : : "r"(&block[0]) \
    390              : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
    391         ); \
    392         printf(INSN   "   "); \
    393         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    394         showV128(&block[0]); printf("  "); \
    395         showV128(&block[1]); printf("  "); \
    396         showV128(&block[2]); printf("  "); \
    397         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    398      } \
    399   }
    400 
    401 
    402 /* Generate a test that involves three vector regs,
    403    with no bias as towards which is input or output.  It's also OK
    404    to use v16, v17, v18 as scratch. */
    405 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO)  \
    406   __attribute__((noinline)) \
    407   static void test_##TESTNAME ( LaneTy ty ) { \
    408      Int i; \
    409      for (i = 0; i < ITERS; i++) { \
    410         V128 block[6+1]; \
    411         memset(block, 0x55, sizeof(block)); \
    412         randV128(&block[0], ty); \
    413         randV128(&block[1], ty); \
    414         randV128(&block[2], ty); \
    415         randV128(&block[3], ty); \
    416         randV128(&block[4], ty); \
    417         randV128(&block[5], ty); \
    418         __asm__ __volatile__( \
    419            "mov   x30, #0 ; msr fpsr, x30 ; " \
    420            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    421            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    422            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    423            INSN " ; " \
    424            "str   q"#VECREG1NO", [%0, #48] ; " \
    425            "str   q"#VECREG2NO", [%0, #64] ; " \
    426            "str   q"#VECREG3NO", [%0, #80] ; " \
    427            "mrs   x30, fpsr ; str x30, [%0, #96] " \
    428            : : "r"(&block[0]) \
    429            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
    430              "v16", "v17", "v18", "x30" \
    431         ); \
    432         printf(INSN   "   "); \
    433         UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
    434         showV128(&block[0]); printf("  "); \
    435         showV128(&block[1]); printf("  "); \
    436         showV128(&block[2]); printf("  "); \
    437         showV128(&block[3]); printf("  "); \
    438         showV128(&block[4]); printf("  "); \
    439         showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
    440      } \
    441   }
    442 
    443 
    444 /* Generate a test that involves four vector regs,
    445    with no bias as towards which is input or output.  It's also OK
    446    to use v16, v17, v18 as scratch. */
    447 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
    448                                        VECREG3NO,VECREG4NO)  \
    449   __attribute__((noinline)) \
    450   static void test_##TESTNAME ( LaneTy ty ) { \
    451      Int i; \
    452      for (i = 0; i < ITERS; i++) { \
    453         V128 block[8+1]; \
    454         memset(block, 0x55, sizeof(block)); \
    455         randV128(&block[0], ty); \
    456         randV128(&block[1], ty); \
    457         randV128(&block[2], ty); \
    458         randV128(&block[3], ty); \
    459         randV128(&block[4], ty); \
    460         randV128(&block[5], ty); \
    461         randV128(&block[6], ty); \
    462         randV128(&block[7], ty); \
    463         __asm__ __volatile__( \
    464            "mov   x30, #0 ; msr fpsr, x30 ; " \
    465            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    466            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    467            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    468            "ldr   q"#VECREG4NO", [%0, #48] ; " \
    469            INSN " ; " \
    470            "str   q"#VECREG1NO", [%0, #64] ; " \
    471            "str   q"#VECREG2NO", [%0, #80] ; " \
    472            "str   q"#VECREG3NO", [%0, #96] ; " \
    473            "str   q"#VECREG4NO", [%0, #112] ; " \
    474            "mrs   x30, fpsr ; str x30, [%0, #128] " \
    475            : : "r"(&block[0]) \
    476            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
    477                        "v"#VECREG3NO, "v"#VECREG4NO, \
    478              "v16", "v17", "v18", "x30" \
    479         ); \
    480         printf(INSN   "   "); \
    481         UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
    482         showV128(&block[0]); printf("  "); \
    483         showV128(&block[1]); printf("  "); \
    484         showV128(&block[2]); printf("  "); \
    485         showV128(&block[3]); printf("  "); \
    486         showV128(&block[4]); printf("  "); \
    487         showV128(&block[5]); printf("  "); \
    488         showV128(&block[6]); printf("  "); \
    489         showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
    490      } \
    491   }
    492 
    493 
    494 /* ---------------------------------------------------------------- */
    495 /* -- Test functions and non-parameterisable test macros         -- */
    496 /* ---------------------------------------------------------------- */
    497 
    498 void test_UMINV ( void )
    499 {
    500   int i;
    501   V128 block[2];
    502 
    503   /* -- 4s -- */
    504 
    505   for (i = 0; i < 10; i++) {
    506     memset(&block, 0x55, sizeof(block));
    507     randV128(&block[0], TyS);
    508     randV128(&block[1], TyS);
    509     __asm__ __volatile__(
    510        "ldr   q7, [%0, #0]   ; "
    511        "uminv s8, v7.4s   ; "
    512        "str   q8, [%0, #16] "
    513        : : "r"(&block[0]) : "memory", "v7", "v8"
    514                          );
    515     printf("UMINV v8, v7.4s  ");
    516     showV128(&block[0]); printf("  ");
    517     showV128(&block[1]); printf("\n");
    518   }
    519 
    520   /* -- 8h -- */
    521 
    522   for (i = 0; i < 10; i++) {
    523     memset(&block, 0x55, sizeof(block));
    524     randV128(&block[0], TyH);
    525     randV128(&block[1], TyH);
    526     __asm__ __volatile__(
    527        "ldr   q7, [%0, #0]   ; "
    528        "uminv h8, v7.8h   ; "
    529        "str   q8, [%0, #16] "
    530        : : "r"(&block[0]) : "memory", "v7", "v8"
    531                          );
    532     printf("UMINV h8, v7.8h  ");
    533     showV128(&block[0]); printf("  ");
    534     showV128(&block[1]); printf("\n");
    535   }
    536 
    537   /* -- 4h -- */
    538 
    539   for (i = 0; i < 10; i++) {
    540     memset(&block, 0x55, sizeof(block));
    541     randV128(&block[0], TyH);
    542     randV128(&block[1], TyH);
    543     __asm__ __volatile__(
    544        "ldr   q7, [%0, #0]   ; "
    545        "uminv h8, v7.4h   ; "
    546        "str   q8, [%0, #16] "
    547        : : "r"(&block[0]) : "memory", "v7", "v8"
    548                          );
    549     printf("UMINV h8, v7.4h  ");
    550     showV128(&block[0]); printf("  ");
    551     showV128(&block[1]); printf("\n");
    552   }
    553 
    554   /* -- 16b -- */
    555 
    556   for (i = 0; i < 10; i++) {
    557     memset(&block, 0x55, sizeof(block));
    558     randV128(&block[0], TyB);
    559     randV128(&block[1], TyB);
    560     __asm__ __volatile__(
    561        "ldr   q7, [%0, #0]   ; "
    562        "uminv b8, v7.16b   ; "
    563        "str   q8, [%0, #16] "
    564        : : "r"(&block[0]) : "memory", "v7", "v8"
    565                          );
    566     printf("UMINV b8, v7.16b  ");
    567     showV128(&block[0]); printf("  ");
    568     showV128(&block[1]); printf("\n");
    569   }
    570 
    571   /* -- 8b -- */
    572 
    573   for (i = 0; i < 10; i++) {
    574     memset(&block, 0x55, sizeof(block));
    575     randV128(&block[0], TyB);
    576     randV128(&block[1], TyB);
    577     __asm__ __volatile__(
    578        "ldr   q7, [%0, #0]   ; "
    579        "uminv b8, v7.8b   ; "
    580        "str   q8, [%0, #16] "
    581        : : "r"(&block[0]) : "memory", "v7", "v8"
    582                          );
    583     printf("UMINV b8, v7.8b  ");
    584     showV128(&block[0]); printf("  ");
    585     showV128(&block[1]); printf("\n");
    586   }
    587 
    588 }
    589 
    590 
    591 void test_UMAXV ( void )
    592 {
    593   int i;
    594   V128 block[2];
    595 
    596   /* -- 4s -- */
    597 
    598   for (i = 0; i < 10; i++) {
    599     memset(&block, 0x55, sizeof(block));
    600     randV128(&block[0], TyS);
    601     randV128(&block[1], TyS);
    602     __asm__ __volatile__(
    603        "ldr   q7, [%0, #0]   ; "
    604        "umaxv s8, v7.4s   ; "
    605        "str   q8, [%0, #16] "
    606        : : "r"(&block[0]) : "memory", "v7", "v8"
    607                          );
    608     printf("UMAXV v8, v7.4s  ");
    609     showV128(&block[0]); printf("  ");
    610     showV128(&block[1]); printf("\n");
    611   }
    612 
    613   /* -- 8h -- */
    614 
    615   for (i = 0; i < 10; i++) {
    616     memset(&block, 0x55, sizeof(block));
    617     randV128(&block[0], TyH);
    618     randV128(&block[1], TyH);
    619     __asm__ __volatile__(
    620        "ldr   q7, [%0, #0]   ; "
    621        "umaxv h8, v7.8h   ; "
    622        "str   q8, [%0, #16] "
    623        : : "r"(&block[0]) : "memory", "v7", "v8"
    624                          );
    625     printf("UMAXV h8, v7.8h  ");
    626     showV128(&block[0]); printf("  ");
    627     showV128(&block[1]); printf("\n");
    628   }
    629 
    630   /* -- 4h -- */
    631 
    632   for (i = 0; i < 10; i++) {
    633     memset(&block, 0x55, sizeof(block));
    634     randV128(&block[0], TyH);
    635     randV128(&block[1], TyH);
    636     __asm__ __volatile__(
    637        "ldr   q7, [%0, #0]   ; "
    638        "umaxv h8, v7.4h   ; "
    639        "str   q8, [%0, #16] "
    640        : : "r"(&block[0]) : "memory", "v7", "v8"
    641                          );
    642     printf("UMAXV h8, v7.4h  ");
    643     showV128(&block[0]); printf("  ");
    644     showV128(&block[1]); printf("\n");
    645   }
    646 
    647   /* -- 16b -- */
    648 
    649   for (i = 0; i < 10; i++) {
    650     memset(&block, 0x55, sizeof(block));
    651     randV128(&block[0], TyB);
    652     randV128(&block[1], TyB);
    653     __asm__ __volatile__(
    654        "ldr   q7, [%0, #0]   ; "
    655        "umaxv b8, v7.16b   ; "
    656        "str   q8, [%0, #16] "
    657        : : "r"(&block[0]) : "memory", "v7", "v8"
    658                          );
    659     printf("UMAXV b8, v7.16b  ");
    660     showV128(&block[0]); printf("  ");
    661     showV128(&block[1]); printf("\n");
    662   }
    663 
    664   /* -- 8b -- */
    665 
    666   for (i = 0; i < 10; i++) {
    667     memset(&block, 0x55, sizeof(block));
    668     randV128(&block[0], TyB);
    669     randV128(&block[1], TyB);
    670     __asm__ __volatile__(
    671        "ldr   q7, [%0, #0]   ; "
    672        "umaxv b8, v7.8b   ; "
    673        "str   q8, [%0, #16] "
    674        : : "r"(&block[0]) : "memory", "v7", "v8"
    675                          );
    676     printf("UMAXV b8, v7.8b  ");
    677     showV128(&block[0]); printf("  ");
    678     showV128(&block[1]); printf("\n");
    679   }
    680 
    681 }
    682 
    683 
    684 void test_INS_general ( void )
    685 {
    686   V128 block[3];
    687 
    688   /* -- D[0..1] -- */
    689 
    690   memset(&block, 0x55, sizeof(block));
    691   block[1].u64[0] = randULong(TyD);
    692   __asm__ __volatile__(
    693      "ldr q7, [%0, #0]   ; "
    694      "ldr x19, [%0, #16] ; "
    695      "ins v7.d[0], x19   ; "
    696      "str q7, [%0, #32] "
    697      : : "r"(&block[0]) : "memory", "x19", "v7"
    698   );
    699   printf("INS v7.u64[0],x19  ");
    700   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    701   showV128(&block[2]); printf("\n");
    702 
    703   memset(&block, 0x55, sizeof(block));
    704   block[1].u64[0] = randULong(TyD);
    705   __asm__ __volatile__(
    706      "ldr q7, [%0, #0]   ; "
    707      "ldr x19, [%0, #16] ; "
    708      "ins v7.d[1], x19   ; "
    709      "str q7, [%0, #32] "
    710      : : "r"(&block[0]) : "memory", "x19", "v7"
    711   );
    712   printf("INS v7.d[1],x19  ");
    713   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    714   showV128(&block[2]); printf("\n");
    715 
    716   /* -- S[0..3] -- */
    717 
    718   memset(&block, 0x55, sizeof(block));
    719   block[1].u64[0] = randULong(TyS);
    720   __asm__ __volatile__(
    721      "ldr q7, [%0, #0]   ; "
    722      "ldr x19, [%0, #16] ; "
    723      "ins v7.s[0], w19   ; "
    724      "str q7, [%0, #32] "
    725      : : "r"(&block[0]) : "memory", "x19", "v7"
    726   );
    727   printf("INS v7.s[0],x19  ");
    728   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    729   showV128(&block[2]); printf("\n");
    730 
    731   memset(&block, 0x55, sizeof(block));
    732   block[1].u64[0] = randULong(TyS);
    733   __asm__ __volatile__(
    734      "ldr q7, [%0, #0]   ; "
    735      "ldr x19, [%0, #16] ; "
    736      "ins v7.s[1], w19   ; "
    737      "str q7, [%0, #32] "
    738      : : "r"(&block[0]) : "memory", "x19", "v7"
    739   );
    740   printf("INS v7.s[1],x19  ");
    741   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    742   showV128(&block[2]); printf("\n");
    743 
    744   memset(&block, 0x55, sizeof(block));
    745   block[1].u64[0] = randULong(TyS);
    746   __asm__ __volatile__(
    747      "ldr q7, [%0, #0]   ; "
    748      "ldr x19, [%0, #16] ; "
    749      "ins v7.s[2], w19   ; "
    750      "str q7, [%0, #32] "
    751      : : "r"(&block[0]) : "memory", "x19", "v7"
    752   );
    753   printf("INS v7.s[2],x19  ");
    754   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    755   showV128(&block[2]); printf("\n");
    756 
    757   memset(&block, 0x55, sizeof(block));
    758   block[1].u64[0] = randULong(TyS);
    759   __asm__ __volatile__(
    760      "ldr q7, [%0, #0]   ; "
    761      "ldr x19, [%0, #16] ; "
    762      "ins v7.s[3], w19   ; "
    763      "str q7, [%0, #32] "
    764      : : "r"(&block[0]) : "memory", "x19", "v7"
    765   );
    766   printf("INS v7.s[3],x19  ");
    767   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    768   showV128(&block[2]); printf("\n");
    769 
    770   /* -- H[0..7] -- */
    771 
    772   memset(&block, 0x55, sizeof(block));
    773   block[1].u64[0] = randULong(TyH);
    774   __asm__ __volatile__(
    775      "ldr q7, [%0, #0]   ; "
    776      "ldr x19, [%0, #16] ; "
    777      "ins v7.h[0], w19   ; "
    778      "str q7, [%0, #32] "
    779      : : "r"(&block[0]) : "memory", "x19", "v7"
    780   );
    781   printf("INS v7.h[0],x19  ");
    782   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    783   showV128(&block[2]); printf("\n");
    784 
    785   memset(&block, 0x55, sizeof(block));
    786   block[1].u64[0] = randULong(TyH);
    787   __asm__ __volatile__(
    788      "ldr q7, [%0, #0]   ; "
    789      "ldr x19, [%0, #16] ; "
    790      "ins v7.h[1], w19   ; "
    791      "str q7, [%0, #32] "
    792      : : "r"(&block[0]) : "memory", "x19", "v7"
    793   );
    794   printf("INS v7.h[1],x19  ");
    795   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    796   showV128(&block[2]); printf("\n");
    797 
    798   memset(&block, 0x55, sizeof(block));
    799   block[1].u64[0] = randULong(TyH);
    800   __asm__ __volatile__(
    801      "ldr q7, [%0, #0]   ; "
    802      "ldr x19, [%0, #16] ; "
    803      "ins v7.h[2], w19   ; "
    804      "str q7, [%0, #32] "
    805      : : "r"(&block[0]) : "memory", "x19", "v7"
    806   );
    807   printf("INS v7.h[2],x19  ");
    808   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    809   showV128(&block[2]); printf("\n");
    810 
    811   memset(&block, 0x55, sizeof(block));
    812   block[1].u64[0] = randULong(TyH);
    813   __asm__ __volatile__(
    814      "ldr q7, [%0, #0]   ; "
    815      "ldr x19, [%0, #16] ; "
    816      "ins v7.h[3], w19   ; "
    817      "str q7, [%0, #32] "
    818      : : "r"(&block[0]) : "memory", "x19", "v7"
    819   );
    820   printf("INS v7.h[3],x19  ");
    821   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    822   showV128(&block[2]); printf("\n");
    823 
    824   memset(&block, 0x55, sizeof(block));
    825   block[1].u64[0] = randULong(TyH);
    826   __asm__ __volatile__(
    827      "ldr q7, [%0, #0]   ; "
    828      "ldr x19, [%0, #16] ; "
    829      "ins v7.h[4], w19   ; "
    830      "str q7, [%0, #32] "
    831      : : "r"(&block[0]) : "memory", "x19", "v7"
    832   );
    833   printf("INS v7.h[4],x19  ");
    834   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    835   showV128(&block[2]); printf("\n");
    836 
    837   memset(&block, 0x55, sizeof(block));
    838   block[1].u64[0] = randULong(TyH);
    839   __asm__ __volatile__(
    840      "ldr q7, [%0, #0]   ; "
    841      "ldr x19, [%0, #16] ; "
    842      "ins v7.h[5], w19   ; "
    843      "str q7, [%0, #32] "
    844      : : "r"(&block[0]) : "memory", "x19", "v7"
    845   );
    846   printf("INS v7.h[5],x19  ");
    847   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    848   showV128(&block[2]); printf("\n");
    849 
    850   memset(&block, 0x55, sizeof(block));
    851   block[1].u64[0] = randULong(TyH);
    852   __asm__ __volatile__(
    853      "ldr q7, [%0, #0]   ; "
    854      "ldr x19, [%0, #16] ; "
    855      "ins v7.h[6], w19   ; "
    856      "str q7, [%0, #32] "
    857      : : "r"(&block[0]) : "memory", "x19", "v7"
    858   );
    859   printf("INS v7.h[6],x19  ");
    860   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    861   showV128(&block[2]); printf("\n");
    862 
    863   memset(&block, 0x55, sizeof(block));
    864   block[1].u64[0] = randULong(TyH);
    865   __asm__ __volatile__(
    866      "ldr q7, [%0, #0]   ; "
    867      "ldr x19, [%0, #16] ; "
    868      "ins v7.h[7], w19   ; "
    869      "str q7, [%0, #32] "
    870      : : "r"(&block[0]) : "memory", "x19", "v7"
    871   );
    872   printf("INS v7.h[7],x19  ");
    873   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    874   showV128(&block[2]); printf("\n");
    875 
    876   /* -- B[0,15] -- */
    877 
    878   memset(&block, 0x55, sizeof(block));
    879   block[1].u64[0] = randULong(TyB);
    880   __asm__ __volatile__(
    881      "ldr q7, [%0, #0]   ; "
    882      "ldr x19, [%0, #16] ; "
    883      "ins v7.b[0], w19   ; "
    884      "str q7, [%0, #32] "
    885      : : "r"(&block[0]) : "memory", "x19", "v7"
    886   );
    887   printf("INS v7.b[0],x19  ");
    888   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    889   showV128(&block[2]); printf("\n");
    890 
    891   memset(&block, 0x55, sizeof(block));
    892   block[1].u64[0] = randULong(TyB);
    893   __asm__ __volatile__(
    894      "ldr q7, [%0, #0]   ; "
    895      "ldr x19, [%0, #16] ; "
    896      "ins v7.b[15], w19   ; "
    897      "str q7, [%0, #32] "
    898      : : "r"(&block[0]) : "memory", "x19", "v7"
    899   );
    900   printf("INS v7.b[15],x19 ");
    901   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    902   showV128(&block[2]); printf("\n");
    903 }
    904 
    905 
    906 
    907 void test_SMINV ( void )
    908 {
    909   int i;
    910   V128 block[2];
    911 
    912   /* -- 4s -- */
    913 
    914   for (i = 0; i < 10; i++) {
    915     memset(&block, 0x55, sizeof(block));
    916     randV128(&block[0], TyS);
    917     randV128(&block[1], TyS);
    918     __asm__ __volatile__(
    919        "ldr   q7, [%0, #0]   ; "
    920        "sminv s8, v7.4s   ; "
    921        "str   q8, [%0, #16] "
    922        : : "r"(&block[0]) : "memory", "v7", "v8"
    923                          );
    924     printf("SMINV v8, v7.4s  ");
    925     showV128(&block[0]); printf("  ");
    926     showV128(&block[1]); printf("\n");
    927   }
    928 
    929   /* -- 8h -- */
    930 
    931   for (i = 0; i < 10; i++) {
    932     memset(&block, 0x55, sizeof(block));
    933     randV128(&block[0], TyH);
    934     randV128(&block[1], TyH);
    935     __asm__ __volatile__(
    936        "ldr   q7, [%0, #0]   ; "
    937        "sminv h8, v7.8h   ; "
    938        "str   q8, [%0, #16] "
    939        : : "r"(&block[0]) : "memory", "v7", "v8"
    940                          );
    941     printf("SMINV h8, v7.8h  ");
    942     showV128(&block[0]); printf("  ");
    943     showV128(&block[1]); printf("\n");
    944   }
    945 
    946   /* -- 4h -- */
    947 
    948   for (i = 0; i < 10; i++) {
    949     memset(&block, 0x55, sizeof(block));
    950     randV128(&block[0], TyH);
    951     randV128(&block[1], TyH);
    952     __asm__ __volatile__(
    953        "ldr   q7, [%0, #0]   ; "
    954        "sminv h8, v7.4h   ; "
    955        "str   q8, [%0, #16] "
    956        : : "r"(&block[0]) : "memory", "v7", "v8"
    957                          );
    958     printf("SMINV h8, v7.4h  ");
    959     showV128(&block[0]); printf("  ");
    960     showV128(&block[1]); printf("\n");
    961   }
    962 
    963   /* -- 16b -- */
    964 
    965   for (i = 0; i < 10; i++) {
    966     memset(&block, 0x55, sizeof(block));
    967     randV128(&block[0], TyB);
    968     randV128(&block[1], TyB);
    969     __asm__ __volatile__(
    970        "ldr   q7, [%0, #0]   ; "
    971        "sminv b8, v7.16b   ; "
    972        "str   q8, [%0, #16] "
    973        : : "r"(&block[0]) : "memory", "v7", "v8"
    974                          );
    975     printf("SMINV b8, v7.16b  ");
    976     showV128(&block[0]); printf("  ");
    977     showV128(&block[1]); printf("\n");
    978   }
    979 
    980   /* -- 8b -- */
    981 
    982   for (i = 0; i < 10; i++) {
    983     memset(&block, 0x55, sizeof(block));
    984     randV128(&block[0], TyB);
    985     randV128(&block[1], TyB);
    986     __asm__ __volatile__(
    987        "ldr   q7, [%0, #0]   ; "
    988        "sminv b8, v7.8b   ; "
    989        "str   q8, [%0, #16] "
    990        : : "r"(&block[0]) : "memory", "v7", "v8"
    991                          );
    992     printf("SMINV b8, v7.8b  ");
    993     showV128(&block[0]); printf("  ");
    994     showV128(&block[1]); printf("\n");
    995   }
    996 
    997 }
    998 
    999 
   1000 void test_SMAXV ( void )
   1001 {
   1002   int i;
   1003   V128 block[2];
   1004 
   1005   /* -- 4s -- */
   1006 
   1007   for (i = 0; i < 10; i++) {
   1008     memset(&block, 0x55, sizeof(block));
   1009     randV128(&block[0], TyS);
   1010     randV128(&block[1], TyS);
   1011     __asm__ __volatile__(
   1012        "ldr   q7, [%0, #0]   ; "
   1013        "smaxv s8, v7.4s   ; "
   1014        "str   q8, [%0, #16] "
   1015        : : "r"(&block[0]) : "memory", "v7", "v8"
   1016                          );
   1017     printf("SMAXV v8, v7.4s  ");
   1018     showV128(&block[0]); printf("  ");
   1019     showV128(&block[1]); printf("\n");
   1020   }
   1021 
   1022   /* -- 8h -- */
   1023 
   1024   for (i = 0; i < 10; i++) {
   1025     memset(&block, 0x55, sizeof(block));
   1026     randV128(&block[0], TyH);
   1027     randV128(&block[1], TyH);
   1028     __asm__ __volatile__(
   1029        "ldr   q7, [%0, #0]   ; "
   1030        "smaxv h8, v7.8h   ; "
   1031        "str   q8, [%0, #16] "
   1032        : : "r"(&block[0]) : "memory", "v7", "v8"
   1033                          );
   1034     printf("SMAXV h8, v7.8h  ");
   1035     showV128(&block[0]); printf("  ");
   1036     showV128(&block[1]); printf("\n");
   1037   }
   1038 
   1039   /* -- 4h -- */
   1040 
   1041   for (i = 0; i < 10; i++) {
   1042     memset(&block, 0x55, sizeof(block));
   1043     randV128(&block[0], TyH);
   1044     randV128(&block[1], TyH);
   1045     __asm__ __volatile__(
   1046        "ldr   q7, [%0, #0]   ; "
   1047        "smaxv h8, v7.4h   ; "
   1048        "str   q8, [%0, #16] "
   1049        : : "r"(&block[0]) : "memory", "v7", "v8"
   1050                          );
   1051     printf("SMAXV h8, v7.4h  ");
   1052     showV128(&block[0]); printf("  ");
   1053     showV128(&block[1]); printf("\n");
   1054   }
   1055 
   1056   /* -- 16b -- */
   1057 
   1058   for (i = 0; i < 10; i++) {
   1059     memset(&block, 0x55, sizeof(block));
   1060     randV128(&block[0], TyB);
   1061     randV128(&block[1], TyB);
   1062     __asm__ __volatile__(
   1063        "ldr   q7, [%0, #0]   ; "
   1064        "smaxv b8, v7.16b   ; "
   1065        "str   q8, [%0, #16] "
   1066        : : "r"(&block[0]) : "memory", "v7", "v8"
   1067                          );
   1068     printf("SMAXV b8, v7.16b  ");
   1069     showV128(&block[0]); printf("  ");
   1070     showV128(&block[1]); printf("\n");
   1071   }
   1072 
   1073   /* -- 8b -- */
   1074 
   1075   for (i = 0; i < 10; i++) {
   1076     memset(&block, 0x55, sizeof(block));
   1077     randV128(&block[0], TyB);
   1078     randV128(&block[1], TyB);
   1079     __asm__ __volatile__(
   1080        "ldr   q7, [%0, #0]   ; "
   1081        "smaxv b8, v7.8b   ; "
   1082        "str   q8, [%0, #16] "
   1083        : : "r"(&block[0]) : "memory", "v7", "v8"
   1084                          );
   1085     printf("SMAXV b8, v7.8b  ");
   1086     showV128(&block[0]); printf("  ");
   1087     showV128(&block[1]); printf("\n");
   1088   }
   1089 
   1090 }
   1091 
   1092 
   1093 //======== FCCMP_D ========//
   1094 
   1095 #define GEN_test_FCCMP_D_D_0xF_EQ \
   1096   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
   1097   { \
   1098      V128 block[4]; \
   1099      randBlock_Doubles(&block[0], 3); \
   1100      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1101      showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
   1102      __asm__ __volatile__( \
   1103         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1104         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1105         "fccmp d29, d11, #0xf, eq; " \
   1106         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1107         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1108         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1109      ); \
   1110      showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
   1111      printf("\n"); \
   1112   }
   1113 
   1114 #define GEN_test_FCCMP_D_D_0xF_NE \
   1115   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
   1116   { \
   1117      V128 block[4]; \
   1118      randBlock_Doubles(&block[0], 3); \
   1119      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1120      showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
   1121      __asm__ __volatile__( \
   1122         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1123         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1124         "fccmp d29, d11, #0xf, ne; " \
   1125         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1126         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1127         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1128      ); \
   1129      showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
   1130      printf("\n"); \
   1131   }
   1132 
   1133 #define GEN_test_FCCMP_D_D_0x0_EQ \
   1134   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
   1135   { \
   1136      V128 block[4]; \
   1137      randBlock_Doubles(&block[0], 3); \
   1138      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1139      showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
   1140      __asm__ __volatile__( \
   1141         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1142         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1143         "fccmp d29, d11, #0x0, eq; " \
   1144         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1145         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1146         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1147      ); \
   1148      showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
   1149      printf("\n"); \
   1150   }
   1151 
   1152 #define GEN_test_FCCMP_D_D_0x0_NE \
   1153   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
   1154   { \
   1155      V128 block[4]; \
   1156      randBlock_Doubles(&block[0], 3); \
   1157      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1158      showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
   1159      __asm__ __volatile__( \
   1160         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1161         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1162         "fccmp d29, d11, #0x0, ne; " \
   1163         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1164         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1165         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1166      ); \
   1167      showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
   1168      printf("\n"); \
   1169   }
   1170 
   1171 //======== FCCMP_S ========//
   1172 
   1173 #define GEN_test_FCCMP_S_S_0xF_EQ \
   1174   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
   1175   { \
   1176      V128 block[4]; \
   1177      randBlock_Floats(&block[0], 3); \
   1178      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1179      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1180      __asm__ __volatile__( \
   1181         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1182         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1183         "fccmp s29, s11, #0xf, eq; " \
   1184         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1185         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1186         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1187      ); \
   1188      showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
   1189      printf("\n"); \
   1190   }
   1191 
   1192 #define GEN_test_FCCMP_S_S_0xF_NE \
   1193   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
   1194   { \
   1195      V128 block[4]; \
   1196      randBlock_Floats(&block[0], 3); \
   1197      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1198      showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
   1199      __asm__ __volatile__( \
   1200         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1201         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1202         "fccmp s29, s11, #0xf, ne; " \
   1203         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1204         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1205         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1206      ); \
   1207      showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
   1208      printf("\n"); \
   1209   }
   1210 
   1211 #define GEN_test_FCCMP_S_S_0x0_EQ \
   1212   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
   1213   { \
   1214      V128 block[4]; \
   1215      randBlock_Floats(&block[0], 3); \
   1216      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1217      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1218      __asm__ __volatile__( \
   1219         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1220         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1221         "fccmp s29, s11, #0x0, eq; " \
   1222         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1223         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1224         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1225      ); \
   1226      showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
   1227      printf("\n"); \
   1228   }
   1229 
   1230 #define GEN_test_FCCMP_S_S_0x0_NE \
   1231   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
   1232   { \
   1233      V128 block[4]; \
   1234      randBlock_Floats(&block[0], 3); \
   1235      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1236      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1237      __asm__ __volatile__( \
   1238         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1239         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1240         "fccmp s29, s11, #0x0, ne; " \
   1241         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1242         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1243         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1244      ); \
   1245      showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
   1246      printf("\n"); \
   1247   }
   1248 
   1249 //======== FCCMPE_D ========//
   1250 
   1251 #define GEN_test_FCCMPE_D_D_0xF_EQ \
   1252   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
   1253   { \
   1254      V128 block[4]; \
   1255      randBlock_Doubles(&block[0], 3); \
   1256      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1257      showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
   1258      __asm__ __volatile__( \
   1259         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1260         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1261         "fccmpe d29, d11, #0xf, eq; " \
   1262         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1263         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1264         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1265      ); \
   1266      showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
   1267      printf("\n"); \
   1268   }
   1269 
   1270 #define GEN_test_FCCMPE_D_D_0xF_NE \
   1271   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
   1272   { \
   1273      V128 block[4]; \
   1274      randBlock_Doubles(&block[0], 3); \
   1275      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1276      showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
   1277      __asm__ __volatile__( \
   1278         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1279         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1280         "fccmpe d29, d11, #0xf, ne; " \
   1281         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1282         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1283         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1284      ); \
   1285      showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
   1286      printf("\n"); \
   1287   }
   1288 
   1289 #define GEN_test_FCCMPE_D_D_0x0_EQ \
   1290   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
   1291   { \
   1292      V128 block[4]; \
   1293      randBlock_Doubles(&block[0], 3); \
   1294      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1295      showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
   1296      __asm__ __volatile__( \
   1297         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1298         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1299         "fccmpe d29, d11, #0x0, eq; " \
   1300         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1301         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1302         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1303      ); \
   1304      showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
   1305      printf("\n"); \
   1306   }
   1307 
   1308 #define GEN_test_FCCMPE_D_D_0x0_NE \
   1309   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
   1310   { \
   1311      V128 block[4]; \
   1312      randBlock_Doubles(&block[0], 3); \
   1313      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1314      showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
   1315      __asm__ __volatile__( \
   1316         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1317         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1318         "fccmpe d29, d11, #0x0, ne; " \
   1319         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1320         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1321         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1322      ); \
   1323      showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
   1324      printf("\n"); \
   1325   }
   1326 
   1327 //======== FCCMPE_S ========//
   1328 
   1329 #define GEN_test_FCCMPE_S_S_0xF_EQ \
   1330   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
   1331   { \
   1332      V128 block[4]; \
   1333      randBlock_Floats(&block[0], 3); \
   1334      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1335      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1336      __asm__ __volatile__( \
   1337         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1338         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1339         "fccmpe s29, s11, #0xf, eq; " \
   1340         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1341         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1342         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1343      ); \
   1344      showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
   1345      printf("\n"); \
   1346   }
   1347 
   1348 #define GEN_test_FCCMPE_S_S_0xF_NE \
   1349   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
   1350   { \
   1351      V128 block[4]; \
   1352      randBlock_Floats(&block[0], 3); \
   1353      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1354      showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
   1355      __asm__ __volatile__( \
   1356         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1357         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1358         "fccmpe s29, s11, #0xf, ne; " \
   1359         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1360         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1361         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1362      ); \
   1363      showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
   1364      printf("\n"); \
   1365   }
   1366 
   1367 #define GEN_test_FCCMPE_S_S_0x0_EQ \
   1368   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
   1369   { \
   1370      V128 block[4]; \
   1371      randBlock_Floats(&block[0], 3); \
   1372      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1373      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1374      __asm__ __volatile__( \
   1375         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1376         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1377         "fccmpe s29, s11, #0x0, eq; " \
   1378         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1379         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1380         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1381      ); \
   1382      showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
   1383      printf("\n"); \
   1384   }
   1385 
   1386 #define GEN_test_FCCMPE_S_S_0x0_NE \
   1387   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
   1388   { \
   1389      V128 block[4]; \
   1390      randBlock_Floats(&block[0], 3); \
   1391      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1392      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1393      __asm__ __volatile__( \
   1394         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1395         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1396         "fccmpe s29, s11, #0x0, ne; " \
   1397         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1398         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1399         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1400      ); \
   1401      showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
   1402      printf("\n"); \
   1403   }
   1404 
   1405 //======== FCMEQ_D_D_D ========//
   1406 
   1407 #define GEN_test_FCMEQ_D_D_D \
   1408   __attribute__((noinline)) static void test_FCMEQ_D_D_D ( void ) \
   1409   { \
   1410      V128 block[4]; \
   1411      randBlock_Doubles(&block[0], 3); \
   1412      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1413      showBlock("FCMEQ_D_D_D before", &block[0], 4); \
   1414      __asm__ __volatile__( \
   1415         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1416         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1417         "fcmeq d29, d11, d9; " \
   1418         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1419         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1420         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1421      ); \
   1422      showBlock("FCMEQ_D_D_D after", &block[0], 4); \
   1423      printf("\n"); \
   1424   }
   1425 
   1426 //======== FCMEQ_S_S_S ========//
   1427 
   1428 #define GEN_test_FCMEQ_S_S_S \
   1429   __attribute__((noinline)) static void test_FCMEQ_S_S_S ( void ) \
   1430   { \
   1431      V128 block[4]; \
   1432      randBlock_Floats(&block[0], 3); \
   1433      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1434      showBlock("FCMEQ_S_S_S before", &block[0], 4); \
   1435      __asm__ __volatile__( \
   1436         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1437         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1438         "fcmeq s29, s11, s9; " \
   1439         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1440         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1441         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1442      ); \
   1443      showBlock("FCMEQ_S_S_S after", &block[0], 4); \
   1444      printf("\n"); \
   1445   }
   1446 
   1447 //======== FCMGE_D_D_D ========//
   1448 
   1449 #define GEN_test_FCMGE_D_D_D \
   1450   __attribute__((noinline)) static void test_FCMGE_D_D_D ( void ) \
   1451   { \
   1452      V128 block[4]; \
   1453      randBlock_Doubles(&block[0], 3); \
   1454      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1455      showBlock("FCMGE_D_D_D before", &block[0], 4); \
   1456      __asm__ __volatile__( \
   1457         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1458         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1459         "fcmge d29, d11, d9; " \
   1460         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1461         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1462         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1463      ); \
   1464      showBlock("FCMGE_D_D_D after", &block[0], 4); \
   1465      printf("\n"); \
   1466   }
   1467 
   1468 //======== FCMGE_S_S_S ========//
   1469 
   1470 #define GEN_test_FCMGE_S_S_S \
   1471   __attribute__((noinline)) static void test_FCMGE_S_S_S ( void ) \
   1472   { \
   1473      V128 block[4]; \
   1474      randBlock_Floats(&block[0], 3); \
   1475      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1476      showBlock("FCMGE_S_S_S before", &block[0], 4); \
   1477      __asm__ __volatile__( \
   1478         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1479         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1480         "fcmge s29, s11, s9; " \
   1481         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1482         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1483         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1484      ); \
   1485      showBlock("FCMGE_S_S_S after", &block[0], 4); \
   1486      printf("\n"); \
   1487   }
   1488 
   1489 //======== FCMGT_D_D_D ========//
   1490 
   1491 #define GEN_test_FCMGT_D_D_D \
   1492   __attribute__((noinline)) static void test_FCMGT_D_D_D ( void ) \
   1493   { \
   1494      V128 block[4]; \
   1495      randBlock_Doubles(&block[0], 3); \
   1496      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1497      showBlock("FCMGT_D_D_D before", &block[0], 4); \
   1498      __asm__ __volatile__( \
   1499         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1500         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1501         "fcmgt d29, d11, d9; " \
   1502         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1503         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1504         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1505      ); \
   1506      showBlock("FCMGT_D_D_D after", &block[0], 4); \
   1507      printf("\n"); \
   1508   }
   1509 
   1510 //======== FCMGT_S_S_S ========//
   1511 
   1512 #define GEN_test_FCMGT_S_S_S \
   1513   __attribute__((noinline)) static void test_FCMGT_S_S_S ( void ) \
   1514   { \
   1515      V128 block[4]; \
   1516      randBlock_Floats(&block[0], 3); \
   1517      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1518      showBlock("FCMGT_S_S_S before", &block[0], 4); \
   1519      __asm__ __volatile__( \
   1520         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1521         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1522         "fcmgt s29, s11, s9; " \
   1523         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1524         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1525         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1526      ); \
   1527      showBlock("FCMGT_S_S_S after", &block[0], 4); \
   1528      printf("\n"); \
   1529   }
   1530 
   1531 //======== FACGT_D_D_D ========//
   1532 
   1533 #define GEN_test_FACGT_D_D_D \
   1534   __attribute__((noinline)) static void test_FACGT_D_D_D ( void ) \
   1535   { \
   1536      V128 block[4]; \
   1537      randBlock_Doubles(&block[0], 3); \
   1538      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1539      showBlock("FACGT_D_D_D before", &block[0], 4); \
   1540      __asm__ __volatile__( \
   1541         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1542         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1543         "facgt d29, d11, d9; " \
   1544         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1545         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1546         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1547      ); \
   1548      showBlock("FACGT_D_D_D after", &block[0], 4); \
   1549      printf("\n"); \
   1550   }
   1551 
   1552 //======== FACGT_S_S_S ========//
   1553 
   1554 #define GEN_test_FACGT_S_S_S \
   1555   __attribute__((noinline)) static void test_FACGT_S_S_S ( void ) \
   1556   { \
   1557      V128 block[4]; \
   1558      randBlock_Floats(&block[0], 3); \
   1559      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1560      showBlock("FACGT_S_S_S before", &block[0], 4); \
   1561      __asm__ __volatile__( \
   1562         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1563         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1564         "facgt s29, s11, s9; " \
   1565         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1566         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1567         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1568      ); \
   1569      showBlock("FACGT_S_S_S after", &block[0], 4); \
   1570      printf("\n"); \
   1571   }
   1572 
   1573 //======== FACGE_D_D_D ========//
   1574 
   1575 #define GEN_test_FACGE_D_D_D \
   1576   __attribute__((noinline)) static void test_FACGE_D_D_D ( void ) \
   1577   { \
   1578      V128 block[4]; \
   1579      randBlock_Doubles(&block[0], 3); \
   1580      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1581      showBlock("FACGE_D_D_D before", &block[0], 4); \
   1582      __asm__ __volatile__( \
   1583         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1584         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1585         "facge d29, d11, d9; " \
   1586         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1587         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1588         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1589      ); \
   1590      showBlock("FACGE_D_D_D after", &block[0], 4); \
   1591      printf("\n"); \
   1592   }
   1593 
   1594 //======== FACGE_S_S_S ========//
   1595 
   1596 #define GEN_test_FACGE_S_S_S \
   1597   __attribute__((noinline)) static void test_FACGE_S_S_S ( void ) \
   1598   { \
   1599      V128 block[4]; \
   1600      randBlock_Floats(&block[0], 3); \
   1601      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1602      showBlock("FACGE_S_S_S before", &block[0], 4); \
   1603      __asm__ __volatile__( \
   1604         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1605         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1606         "facge s29, s11, s9; " \
   1607         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1608         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1609         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1610      ); \
   1611      showBlock("FACGE_S_S_S after", &block[0], 4); \
   1612      printf("\n"); \
   1613   }
   1614 
   1615 //======== FCMEQ_D_D_Z ========//
   1616 
   1617 #define GEN_test_FCMEQ_D_D_Z \
   1618   __attribute__((noinline)) static void test_FCMEQ_D_D_Z ( void ) \
   1619   { \
   1620      V128 block[4]; \
   1621      randBlock_Doubles(&block[0], 3); \
   1622      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1623      showBlock("FCMEQ_D_D_Z before", &block[0], 4); \
   1624      __asm__ __volatile__( \
   1625         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1626         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1627         "fcmeq d29, d11, #0; " \
   1628         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1629         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1630         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1631      ); \
   1632      showBlock("FCMEQ_D_D_Z after", &block[0], 4); \
   1633      printf("\n"); \
   1634   }
   1635 
   1636 //======== FCMEQ_S_S_Z ========//
   1637 
   1638 #define GEN_test_FCMEQ_S_S_Z \
   1639   __attribute__((noinline)) static void test_FCMEQ_S_S_Z ( void ) \
   1640   { \
   1641      V128 block[4]; \
   1642      randBlock_Floats(&block[0], 3); \
   1643      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1644      showBlock("FCMEQ_S_S_Z before", &block[0], 4); \
   1645      __asm__ __volatile__( \
   1646         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1647         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1648         "fcmeq s29, s11, #0; " \
   1649         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1650         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1651         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1652      ); \
   1653      showBlock("FCMEQ_S_S_Z after", &block[0], 4); \
   1654      printf("\n"); \
   1655   }
   1656 
   1657 //======== FCMGE_D_D_Z ========//
   1658 
   1659 #define GEN_test_FCMGE_D_D_Z \
   1660   __attribute__((noinline)) static void test_FCMGE_D_D_Z ( void ) \
   1661   { \
   1662      V128 block[4]; \
   1663      randBlock_Doubles(&block[0], 3); \
   1664      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1665      showBlock("FCMGE_D_D_Z before", &block[0], 4); \
   1666      __asm__ __volatile__( \
   1667         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1668         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1669         "fcmge d29, d11, #0; " \
   1670         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1671         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1672         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1673      ); \
   1674      showBlock("FCMGE_D_D_Z after", &block[0], 4); \
   1675      printf("\n"); \
   1676   }
   1677 
   1678 //======== FCMGE_S_S_Z ========//
   1679 
   1680 #define GEN_test_FCMGE_S_S_Z \
   1681   __attribute__((noinline)) static void test_FCMGE_S_S_Z ( void ) \
   1682   { \
   1683      V128 block[4]; \
   1684      randBlock_Floats(&block[0], 3); \
   1685      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1686      showBlock("FCMGE_S_S_Z before", &block[0], 4); \
   1687      __asm__ __volatile__( \
   1688         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1689         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1690         "fcmge s29, s11, #0; " \
   1691         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1692         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1693         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1694      ); \
   1695      showBlock("FCMGE_S_S_Z after", &block[0], 4); \
   1696      printf("\n"); \
   1697   }
   1698 
   1699 //======== FCMGT_D_D_Z ========//
   1700 
   1701 #define GEN_test_FCMGT_D_D_Z \
   1702   __attribute__((noinline)) static void test_FCMGT_D_D_Z ( void ) \
   1703   { \
   1704      V128 block[4]; \
   1705      randBlock_Doubles(&block[0], 3); \
   1706      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1707      showBlock("FCMGT_D_D_Z before", &block[0], 4); \
   1708      __asm__ __volatile__( \
   1709         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1710         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1711         "fcmgt d29, d11, #0; " \
   1712         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1713         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1714         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1715      ); \
   1716      showBlock("FCMGT_D_D_Z after", &block[0], 4); \
   1717      printf("\n"); \
   1718   }
   1719 
   1720 //======== FCMGT_S_S_Z ========//
   1721 
   1722 #define GEN_test_FCMGT_S_S_Z \
   1723   __attribute__((noinline)) static void test_FCMGT_S_S_Z ( void ) \
   1724   { \
   1725      V128 block[4]; \
   1726      randBlock_Floats(&block[0], 3); \
   1727      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1728      showBlock("FCMGT_S_S_Z before", &block[0], 4); \
   1729      __asm__ __volatile__( \
   1730         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1731         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1732         "fcmgt s29, s11, #0; " \
   1733         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1734         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1735         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1736      ); \
   1737      showBlock("FCMGT_S_S_Z after", &block[0], 4); \
   1738      printf("\n"); \
   1739   }
   1740 
   1741 //======== FCMLE_D_D_Z ========//
   1742 
   1743 #define GEN_test_FCMLE_D_D_Z \
   1744   __attribute__((noinline)) static void test_FCMLE_D_D_Z ( void ) \
   1745   { \
   1746      V128 block[4]; \
   1747      randBlock_Doubles(&block[0], 3); \
   1748      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1749      showBlock("FCMLE_D_D_Z before", &block[0], 4); \
   1750      __asm__ __volatile__( \
   1751         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1752         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1753         "fcmle d29, d11, #0; " \
   1754         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1755         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1756         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1757      ); \
   1758      showBlock("FCMLE_D_D_Z after", &block[0], 4); \
   1759      printf("\n"); \
   1760   }
   1761 
   1762 //======== FCMLE_S_S_Z ========//
   1763 
   1764 #define GEN_test_FCMLE_S_S_Z \
   1765   __attribute__((noinline)) static void test_FCMLE_S_S_Z ( void ) \
   1766   { \
   1767      V128 block[4]; \
   1768      randBlock_Floats(&block[0], 3); \
   1769      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1770      showBlock("FCMLE_S_S_Z before", &block[0], 4); \
   1771      __asm__ __volatile__( \
   1772         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1773         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1774         "fcmle s29, s11, #0; " \
   1775         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1776         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1777         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1778      ); \
   1779      showBlock("FCMLE_S_S_Z after", &block[0], 4); \
   1780      printf("\n"); \
   1781   }
   1782 
   1783 //======== FCMLT_D_D_Z ========//
   1784 
   1785 #define GEN_test_FCMLT_D_D_Z \
   1786   __attribute__((noinline)) static void test_FCMLT_D_D_Z ( void ) \
   1787   { \
   1788      V128 block[4]; \
   1789      randBlock_Doubles(&block[0], 3); \
   1790      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1791      showBlock("FCMLT_D_D_Z before", &block[0], 4); \
   1792      __asm__ __volatile__( \
   1793         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1794         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1795         "fcmlt d29, d11, #0; " \
   1796         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1797         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1798         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1799      ); \
   1800      showBlock("FCMLT_D_D_Z after", &block[0], 4); \
   1801      printf("\n"); \
   1802   }
   1803 
   1804 //======== FCMLT_S_S_Z ========//
   1805 
   1806 #define GEN_test_FCMLT_S_S_Z \
   1807   __attribute__((noinline)) static void test_FCMLT_S_S_Z ( void ) \
   1808   { \
   1809      V128 block[4]; \
   1810      randBlock_Floats(&block[0], 3); \
   1811      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1812      showBlock("FCMLT_S_S_Z before", &block[0], 4); \
   1813      __asm__ __volatile__( \
   1814         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1815         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1816         "fcmlt s29, s11, #0; " \
   1817         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1818         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1819         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1820      ); \
   1821      showBlock("FCMLT_S_S_Z after", &block[0], 4); \
   1822      printf("\n"); \
   1823   }
   1824 
   1825 //======== FCMP_D_D ========//
   1826 
   1827 #define GEN_test_FCMP_D_D \
   1828   __attribute__((noinline)) static void test_FCMP_D_D ( void ) \
   1829   { \
   1830      V128 block[4]; \
   1831      randBlock_Doubles(&block[0], 3); \
   1832      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1833      showBlock("FCMP_D_D before", &block[0], 4); \
   1834      __asm__ __volatile__( \
   1835         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1836         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1837         "fcmp d29, d11; " \
   1838         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1839         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1840         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1841      ); \
   1842      showBlock("FCMP_D_D after", &block[0], 4); \
   1843      printf("\n"); \
   1844   }
   1845 
   1846 //======== FCMP_S_S ========//
   1847 
   1848 #define GEN_test_FCMP_S_S \
   1849   __attribute__((noinline)) static void test_FCMP_S_S ( void ) \
   1850   { \
   1851      V128 block[4]; \
   1852      randBlock_Floats(&block[0], 3); \
   1853      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1854      showBlock("FCMP_S_S before", &block[0], 4); \
   1855      __asm__ __volatile__( \
   1856         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1857         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1858         "fcmp s29, s11; " \
   1859         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1860         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1861         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1862      ); \
   1863      showBlock("FCMP_S_S after", &block[0], 4); \
   1864      printf("\n"); \
   1865   }
   1866 
   1867 //======== FCMPE_D_D ========//
   1868 
   1869 #define GEN_test_FCMPE_D_D \
   1870   __attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
   1871   { \
   1872      V128 block[4]; \
   1873      randBlock_Doubles(&block[0], 3); \
   1874      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1875      showBlock("FCMPE_D_D before", &block[0], 4); \
   1876      __asm__ __volatile__( \
   1877         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1878         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1879         "fcmpe d29, d11; " \
   1880         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1881         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1882         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1883      ); \
   1884      showBlock("FCMPE_D_D after", &block[0], 4); \
   1885      printf("\n"); \
   1886   }
   1887 
   1888 //======== FCMPE_S_S ========//
   1889 
   1890 #define GEN_test_FCMPE_S_S \
   1891   __attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
   1892   { \
   1893      V128 block[4]; \
   1894      randBlock_Floats(&block[0], 3); \
   1895      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1896      showBlock("FCMPE_S_S before", &block[0], 4); \
   1897      __asm__ __volatile__( \
   1898         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1899         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1900         "fcmpe s29, s11; " \
   1901         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1902         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1903         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1904      ); \
   1905      showBlock("FCMPE_S_S after", &block[0], 4); \
   1906      printf("\n"); \
   1907   }
   1908 
   1909 //======== FCMP_D_Z ========//
   1910 
   1911 #define GEN_test_FCMP_D_Z \
   1912   __attribute__((noinline)) static void test_FCMP_D_Z ( void ) \
   1913   { \
   1914      V128 block[4]; \
   1915      randBlock_Doubles(&block[0], 3); \
   1916      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1917      showBlock("FCMP_D_Z before", &block[0], 4); \
   1918      __asm__ __volatile__( \
   1919         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1920         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1921         "fcmp d29, #0; " \
   1922         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1923         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1924         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1925      ); \
   1926      showBlock("FCMP_D_Z after", &block[0], 4); \
   1927      printf("\n"); \
   1928   }
   1929 
   1930 //======== FCMP_S_Z ========//
   1931 
   1932 #define GEN_test_FCMP_S_Z \
   1933   __attribute__((noinline)) static void test_FCMP_S_Z ( void ) \
   1934   { \
   1935      V128 block[4]; \
   1936      randBlock_Floats(&block[0], 3); \
   1937      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1938      showBlock("FCMP_S_Z before", &block[0], 4); \
   1939      __asm__ __volatile__( \
   1940         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1941         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1942         "fcmp s29, #0; " \
   1943         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1944         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1945         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1946      ); \
   1947      showBlock("FCMP_S_Z after", &block[0], 4); \
   1948      printf("\n"); \
   1949   }
   1950 
   1951 //======== FCMPE_D_Z ========//
   1952 
   1953 #define GEN_test_FCMPE_D_Z \
   1954   __attribute__((noinline)) static void test_FCMPE_D_Z ( void ) \
   1955   { \
   1956      V128 block[4]; \
   1957      randBlock_Doubles(&block[0], 3); \
   1958      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1959      showBlock("FCMPE_D_Z before", &block[0], 4); \
   1960      __asm__ __volatile__( \
   1961         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1962         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1963         "fcmpe d29, #0; " \
   1964         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1965         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1966         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1967      ); \
   1968      showBlock("FCMPE_D_Z after", &block[0], 4); \
   1969      printf("\n"); \
   1970   }
   1971 
   1972 //======== FCMPE_S_Z ========//
   1973 
   1974 #define GEN_test_FCMPE_S_Z \
   1975   __attribute__((noinline)) static void test_FCMPE_S_Z ( void ) \
   1976   { \
   1977      V128 block[4]; \
   1978      randBlock_Floats(&block[0], 3); \
   1979      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1980      showBlock("FCMPE_S_Z before", &block[0], 4); \
   1981      __asm__ __volatile__( \
   1982         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1983         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1984         "fcmpe s29, #0; " \
   1985         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1986         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1987         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1988      ); \
   1989      showBlock("FCMPE_S_Z after", &block[0], 4); \
   1990      printf("\n"); \
   1991   }
   1992 
   1993 //======== FCSEL_D_D_D_EQ ========//
   1994 
   1995 #define GEN_test_FCSEL_D_D_D_EQ \
   1996   __attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
   1997   { \
   1998      V128 block[4]; \
   1999      randBlock_Doubles(&block[0], 3); \
   2000      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2001      showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
   2002      __asm__ __volatile__( \
   2003         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2004         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2005         "fcsel d29, d11, d9, eq; " \
   2006         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2007         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2008         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2009      ); \
   2010      showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
   2011      printf("\n"); \
   2012   }
   2013 
   2014 //======== FCSEL_D_D_D_NE ========//
   2015 
   2016 #define GEN_test_FCSEL_D_D_D_NE \
   2017   __attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
   2018   { \
   2019      V128 block[4]; \
   2020      randBlock_Doubles(&block[0], 3); \
   2021      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2022      showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
   2023      __asm__ __volatile__( \
   2024         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2025         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2026         "fcsel d29, d11, d9, ne; " \
   2027         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2028         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2029         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2030      ); \
   2031      showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
   2032      printf("\n"); \
   2033   }
   2034 
   2035 //======== FCSEL_S_S_S_EQ ========//
   2036 
   2037 #define GEN_test_FCSEL_S_S_S_EQ \
   2038   __attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
   2039   { \
   2040      V128 block[4]; \
   2041      randBlock_Doubles(&block[0], 3); \
   2042      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2043      showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
   2044      __asm__ __volatile__( \
   2045         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2046         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2047         "fcsel s29, s11, s9, eq; " \
   2048         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2049         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2050         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2051      ); \
   2052      showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
   2053      printf("\n"); \
   2054   }
   2055 
   2056 //======== FCSEL_S_S_S_NE ========//
   2057 
   2058 #define GEN_test_FCSEL_S_S_S_NE \
   2059   __attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
   2060   { \
   2061      V128 block[4]; \
   2062      randBlock_Doubles(&block[0], 3); \
   2063      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2064      showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
   2065      __asm__ __volatile__( \
   2066         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2067         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2068         "fcsel s29, s11, s9, ne; " \
   2069         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2070         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2071         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2072      ); \
   2073      showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
   2074      printf("\n"); \
   2075   }
   2076 
   2077 
   2078 /* ---------------------------------------------------------------- */
   2079 /* -- Tests, in the same order that they appear in main()        -- */
   2080 /* ---------------------------------------------------------------- */
   2081 
   2082 // ======================== FP ========================
   2083 
   2084 GEN_TWOVEC_TEST(fabs_d_d,   "fabs d22,    d23",    22, 23)
   2085 GEN_TWOVEC_TEST(fabs_s_s,   "fabs s22,    s23",    22, 23)
   2086 GEN_TWOVEC_TEST(fabs_2d_2d, "fabs v22.2d, v23.2d", 22, 23)
   2087 GEN_TWOVEC_TEST(fabs_4s_4s, "fabs v22.4s, v23.4s", 22, 23)
   2088 GEN_TWOVEC_TEST(fabs_2s_2s, "fabs v22.2s, v23.2s", 22, 23)
   2089 
   2090 GEN_TWOVEC_TEST(fneg_d_d,   "fneg d22, d23",       22, 23)
   2091 GEN_TWOVEC_TEST(fneg_s_s,   "fneg s22, s23",       22, 23)
   2092 GEN_TWOVEC_TEST(fneg_2d_2d, "fneg v22.2d, v23.2d", 22, 23)
   2093 GEN_TWOVEC_TEST(fneg_4s_4s, "fneg v22.4s, v23.4s", 22, 23)
   2094 GEN_TWOVEC_TEST(fneg_2s_2s, "fneg v22.2s, v23.2s", 22, 23)
   2095 
   2096 GEN_TWOVEC_TEST(fsqrt_d_d,   "fsqrt d22, d23",       22, 23)
   2097 GEN_TWOVEC_TEST(fsqrt_s_s,   "fsqrt s22, s23",       22, 23)
   2098 GEN_TWOVEC_TEST(fsqrt_2d_2d, "fsqrt v22.2d, v23.2d", 22, 23)
   2099 GEN_TWOVEC_TEST(fsqrt_4s_4s, "fsqrt v22.4s, v23.4s", 22, 23)
   2100 GEN_TWOVEC_TEST(fsqrt_2s_2s, "fsqrt v22.2s, v23.2s", 22, 23)
   2101 
   2102 GEN_THREEVEC_TEST(fadd_d_d_d,  "fadd d2, d11, d29", 2, 11, 29)
   2103 GEN_THREEVEC_TEST(fadd_s_s_s,  "fadd s2, s11, s29", 2, 11, 29)
   2104 GEN_THREEVEC_TEST(fsub_d_d_d,  "fsub d2, d11, d29", 2, 11, 29)
   2105 GEN_THREEVEC_TEST(fsub_s_s_s,  "fsub s2, s11, s29", 2, 11, 29)
   2106 
   2107 GEN_BINARY_TEST(fadd, 2d, 2d, 2d)
   2108 GEN_BINARY_TEST(fadd, 4s, 4s, 4s)
   2109 GEN_BINARY_TEST(fadd, 2s, 2s, 2s)
   2110 GEN_BINARY_TEST(fsub, 2d, 2d, 2d)
   2111 GEN_BINARY_TEST(fsub, 4s, 4s, 4s)
   2112 GEN_BINARY_TEST(fsub, 2s, 2s, 2s)
   2113 
   2114 GEN_THREEVEC_TEST(fabd_d_d_d,  "fabd d2, d11, d29", 2, 11, 29)
   2115 GEN_THREEVEC_TEST(fabd_s_s_s,  "fabd s2, s11, s29", 2, 11, 29)
   2116 GEN_BINARY_TEST(fabd, 2d, 2d, 2d)
   2117 GEN_BINARY_TEST(fabd, 4s, 4s, 4s)
   2118 GEN_BINARY_TEST(fabd, 2s, 2s, 2s)
   2119 
   2120 GEN_TWOVEC_TEST(faddp_d_2d,     "faddp d2, v23.2d",    2, 23)
   2121 GEN_TWOVEC_TEST(faddp_s_2s,     "faddp s2, v23.2s",    2, 23)
   2122 GEN_THREEVEC_TEST(faddp_2d_2d_2d, "faddp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2123 GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2124 GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2125 
   2126 GEN_test_FCCMP_D_D_0xF_EQ
   2127 GEN_test_FCCMP_D_D_0xF_NE
   2128 GEN_test_FCCMP_D_D_0x0_EQ
   2129 GEN_test_FCCMP_D_D_0x0_NE
   2130 GEN_test_FCCMP_S_S_0xF_EQ
   2131 GEN_test_FCCMP_S_S_0xF_NE
   2132 GEN_test_FCCMP_S_S_0x0_EQ
   2133 GEN_test_FCCMP_S_S_0x0_NE
   2134 GEN_test_FCCMPE_D_D_0xF_EQ
   2135 GEN_test_FCCMPE_D_D_0xF_NE
   2136 GEN_test_FCCMPE_D_D_0x0_EQ
   2137 GEN_test_FCCMPE_D_D_0x0_NE
   2138 GEN_test_FCCMPE_S_S_0xF_EQ
   2139 GEN_test_FCCMPE_S_S_0xF_NE
   2140 GEN_test_FCCMPE_S_S_0x0_EQ
   2141 GEN_test_FCCMPE_S_S_0x0_NE
   2142 
   2143 GEN_test_FCMEQ_D_D_D
   2144 GEN_test_FCMEQ_S_S_S
   2145 GEN_test_FCMGE_D_D_D
   2146 GEN_test_FCMGE_S_S_S
   2147 GEN_test_FCMGT_D_D_D
   2148 GEN_test_FCMGT_S_S_S
   2149 GEN_test_FACGT_D_D_D
   2150 GEN_test_FACGT_S_S_S
   2151 GEN_test_FACGE_D_D_D
   2152 GEN_test_FACGE_S_S_S
   2153 
   2154 GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2155 GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2156 GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2157 GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2158 GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2159 GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2160 GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2161 GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2162 GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2163 GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2164 GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2165 GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2166 GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2167 GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2168 GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2169 
   2170 GEN_test_FCMEQ_D_D_Z
   2171 GEN_test_FCMEQ_S_S_Z
   2172 GEN_test_FCMGE_D_D_Z
   2173 GEN_test_FCMGE_S_S_Z
   2174 GEN_test_FCMGT_D_D_Z
   2175 GEN_test_FCMGT_S_S_Z
   2176 GEN_test_FCMLE_D_D_Z
   2177 GEN_test_FCMLE_S_S_Z
   2178 GEN_test_FCMLT_D_D_Z
   2179 GEN_test_FCMLT_S_S_Z
   2180 
   2181 GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
   2182 GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
   2183 GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
   2184 GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
   2185 GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
   2186 GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
   2187 GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
   2188 GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
   2189 GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
   2190 GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
   2191 GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
   2192 GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
   2193 GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
   2194 GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
   2195 GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
   2196 
   2197 GEN_test_FCMP_D_Z
   2198 GEN_test_FCMP_S_Z
   2199 GEN_test_FCMPE_D_Z
   2200 GEN_test_FCMPE_S_Z
   2201 GEN_test_FCMP_D_D
   2202 GEN_test_FCMP_S_S
   2203 GEN_test_FCMPE_D_D
   2204 GEN_test_FCMPE_S_S
   2205 
   2206 GEN_test_FCSEL_D_D_D_EQ
   2207 GEN_test_FCSEL_D_D_D_NE
   2208 GEN_test_FCSEL_S_S_S_EQ
   2209 GEN_test_FCSEL_S_S_S_NE
   2210 
   2211 GEN_THREEVEC_TEST(fdiv_d_d_d,  "fdiv d2, d11, d29", 2, 11, 29)
   2212 GEN_THREEVEC_TEST(fdiv_s_s_s,  "fdiv s2, s11, s29", 2, 11, 29)
   2213 GEN_BINARY_TEST(fdiv, 2d, 2d, 2d)
   2214 GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
   2215 GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
   2216 
   2217 GEN_FOURVEC_TEST(fmadd_d_d_d_d,  "fmadd  d2, d11, d29, d3", 2, 11, 29, 3)
   2218 GEN_FOURVEC_TEST(fmadd_s_s_s_s,  "fmadd  s2, s11, s29, s3", 2, 11, 29, 3)
   2219 GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
   2220 GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
   2221 GEN_FOURVEC_TEST(fmsub_d_d_d_d,  "fmsub  d2, d11, d29, d3", 2, 11, 29, 3)
   2222 GEN_FOURVEC_TEST(fmsub_s_s_s_s,  "fmsub  s2, s11, s29, s3", 2, 11, 29, 3)
   2223 GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
   2224 GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
   2225 
   2226 GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
   2227 GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
   2228 
   2229 GEN_THREEVEC_TEST(fmax_d_d_d,  "fmax d2, d11, d29", 2, 11, 29)
   2230 GEN_THREEVEC_TEST(fmax_s_s_s,  "fmax s2, s11, s29", 2, 11, 29)
   2231 GEN_THREEVEC_TEST(fmin_d_d_d,  "fmin d2, d11, d29", 2, 11, 29)
   2232 GEN_THREEVEC_TEST(fmin_s_s_s,  "fmin s2, s11, s29", 2, 11, 29)
   2233 GEN_THREEVEC_TEST(fmaxnm_d_d_d,  "fmaxnm d2, d11, d29", 2, 11, 29)
   2234 GEN_THREEVEC_TEST(fmaxnm_s_s_s,  "fmaxnm s2, s11, s29", 2, 11, 29)
   2235 GEN_THREEVEC_TEST(fminnm_d_d_d,  "fminnm d2, d11, d29", 2, 11, 29)
   2236 GEN_THREEVEC_TEST(fminnm_s_s_s,  "fminnm s2, s11, s29", 2, 11, 29)
   2237 
   2238 GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2239 GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2240 GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2241 GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2242 GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2243 GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2244 GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2245 GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2246 GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2247 GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2248 GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2249 GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2250 
   2251 GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
   2252 GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
   2253 GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
   2254 GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
   2255 
   2256 GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2257 GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2258 GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2259 GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2260 GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2261 GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2262 
   2263 GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
   2264 GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
   2265 
   2266 GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
   2267 GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
   2268 GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
   2269 GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
   2270 
   2271 GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2272 GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2273 GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2274 GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2275 GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2276 GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2277 
   2278 GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
   2279 GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
   2280 
   2281 GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2282 GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2283 GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2284 GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2285 GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2286 GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2287 
   2288 GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
   2289 GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
   2290 GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
   2291 GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
   2292 GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
   2293 GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
   2294 GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
   2295 GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
   2296 
   2297 GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2298 GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2299 GEN_THREEVEC_TEST(fmla_4s_4s_s0, "fmla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2300 GEN_THREEVEC_TEST(fmla_4s_4s_s3, "fmla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2301 GEN_THREEVEC_TEST(fmla_2s_2s_s0, "fmla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2302 GEN_THREEVEC_TEST(fmla_2s_2s_s3, "fmla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2303 
   2304 GEN_THREEVEC_TEST(fmls_2d_2d_d0, "fmls v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2305 GEN_THREEVEC_TEST(fmls_2d_2d_d1, "fmls v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2306 GEN_THREEVEC_TEST(fmls_4s_4s_s0, "fmls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2307 GEN_THREEVEC_TEST(fmls_4s_4s_s3, "fmls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2308 GEN_THREEVEC_TEST(fmls_2s_2s_s0, "fmls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2309 GEN_THREEVEC_TEST(fmls_2s_2s_s3, "fmls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2310 
   2311 GEN_TWOVEC_TEST(fmov_2d_imm_01, "fmov v22.2d, #0.125", 22, 23)
   2312 GEN_TWOVEC_TEST(fmov_2d_imm_02, "fmov v22.2d, #-4.0",  22, 23)
   2313 GEN_TWOVEC_TEST(fmov_2d_imm_03, "fmov v22.2d, #1.0",   22, 23)
   2314 GEN_TWOVEC_TEST(fmov_4s_imm_01, "fmov v22.4s, #0.125", 22, 23)
   2315 GEN_TWOVEC_TEST(fmov_4s_imm_02, "fmov v22.4s, #-4.0",  22, 23)
   2316 GEN_TWOVEC_TEST(fmov_4s_imm_03, "fmov v22.4s, #1.0",   22, 23)
   2317 GEN_TWOVEC_TEST(fmov_2s_imm_01, "fmov v22.2s, #0.125", 22, 23)
   2318 GEN_TWOVEC_TEST(fmov_2s_imm_02, "fmov v22.2s, #-4.0",  22, 23)
   2319 GEN_TWOVEC_TEST(fmov_2s_imm_03, "fmov v22.2s, #1.0",   22, 23)
   2320 
   2321 GEN_TWOVEC_TEST(fmov_d_d,  "fmov d22, d23",   22, 23)
   2322 GEN_TWOVEC_TEST(fmov_s_s,  "fmov s22, s23",   22, 23)
   2323 
   2324 GEN_ONEINT_ONEVEC_TEST(fmov_s_w,  "fmov s7,      w15", 15, 7)
   2325 GEN_ONEINT_ONEVEC_TEST(fmov_d_x,  "fmov d7,      x15", 15, 7)
   2326 GEN_ONEINT_ONEVEC_TEST(fmov_d1_x, "fmov v7.d[1], x15", 15, 7)
   2327 GEN_ONEINT_ONEVEC_TEST(fmov_w_s,  "fmov w15,      s7", 15, 7)
   2328 GEN_ONEINT_ONEVEC_TEST(fmov_x_d,  "fmov x15,      d7", 15, 7)
   2329 GEN_ONEINT_ONEVEC_TEST(fmov_x_d1, "fmov x15, v7.d[1]", 15, 7)
   2330 
   2331 /* overkill -- don't need two vecs, only one */
   2332 GEN_TWOVEC_TEST(fmov_d_imm_01, "fmov d22, #0.125", 22, 23)
   2333 GEN_TWOVEC_TEST(fmov_d_imm_02, "fmov d22, #-4.0",  22, 23)
   2334 GEN_TWOVEC_TEST(fmov_d_imm_03, "fmov d22, #1.0",   22, 23)
   2335 GEN_TWOVEC_TEST(fmov_s_imm_01, "fmov s22, #0.125", 22, 23)
   2336 GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0",  22, 23)
   2337 GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0",   22, 23)
   2338 
   2339 GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
   2340 GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
   2341 GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
   2342 GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
   2343 
   2344 GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2345 GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2346 GEN_THREEVEC_TEST(fmul_4s_4s_s0, "fmul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2347 GEN_THREEVEC_TEST(fmul_4s_4s_s3, "fmul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2348 GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2349 GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2350 
   2351 GEN_THREEVEC_TEST(fmul_d_d_d,    "fmul d2, d11, d29", 2, 11, 29)
   2352 GEN_THREEVEC_TEST(fmul_s_s_s,    "fmul s2, s11, s29", 2, 11, 29)
   2353 GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2354 GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2355 GEN_THREEVEC_TEST(fmul_2s_2s_2s, "fmul v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2356 
   2357 GEN_THREEVEC_TEST(fmulx_d_d_d0, "fmulx d2, d11, v29.d[0]", 2, 11, 29)
   2358 GEN_THREEVEC_TEST(fmulx_d_d_d1, "fmulx d2, d11, v29.d[1]", 2, 11, 29)
   2359 GEN_THREEVEC_TEST(fmulx_s_s_s0, "fmulx s2, s11, v29.s[0]", 2, 11, 29)
   2360 GEN_THREEVEC_TEST(fmulx_s_s_s3, "fmulx s2, s11, v29.s[3]", 2, 11, 29)
   2361 GEN_THREEVEC_TEST(fmulx_2d_2d_d0, "fmulx v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2362 GEN_THREEVEC_TEST(fmulx_2d_2d_d1, "fmulx v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2363 GEN_THREEVEC_TEST(fmulx_4s_4s_s0, "fmulx v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2364 GEN_THREEVEC_TEST(fmulx_4s_4s_s3, "fmulx v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2365 GEN_THREEVEC_TEST(fmulx_2s_2s_s0, "fmulx v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2366 GEN_THREEVEC_TEST(fmulx_2s_2s_s3, "fmulx v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2367 
   2368 GEN_THREEVEC_TEST(fmulx_d_d_d,    "fmulx d2, d11, d29", 2, 11, 29)
   2369 GEN_THREEVEC_TEST(fmulx_s_s_s,    "fmulx s2, s11, s29", 2, 11, 29)
   2370 GEN_THREEVEC_TEST(fmulx_2d_2d_2d, "fmulx v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2371 GEN_THREEVEC_TEST(fmulx_4s_4s_4s, "fmulx v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2372 GEN_THREEVEC_TEST(fmulx_2s_2s_2s, "fmulx v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2373 
   2374 GEN_TWOVEC_TEST(frecpe_d_d,   "frecpe d22, d23",       22, 23)
   2375 GEN_TWOVEC_TEST(frecpe_s_s,   "frecpe s22, s23",       22, 23)
   2376 GEN_TWOVEC_TEST(frecpe_2d_2d, "frecpe v22.2d, v23.2d", 22, 23)
   2377 GEN_TWOVEC_TEST(frecpe_4s_4s, "frecpe v22.4s, v23.4s", 22, 23)
   2378 GEN_TWOVEC_TEST(frecpe_2s_2s, "frecpe v22.2s, v23.2s", 22, 23)
   2379 
   2380 GEN_THREEVEC_TEST(frecps_d_d_d,    "frecps d2, d11, d29", 2, 11, 29)
   2381 GEN_THREEVEC_TEST(frecps_s_s_s,    "frecps s2, s11, s29", 2, 11, 29)
   2382 GEN_THREEVEC_TEST(frecps_2d_2d_2d, "frecps v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2383 GEN_THREEVEC_TEST(frecps_4s_4s_4s, "frecps v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2384 GEN_THREEVEC_TEST(frecps_2s_2s_2s, "frecps v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2385 
   2386 GEN_TWOVEC_TEST(frecpx_d_d,   "frecpx d22, d23",       22, 23)
   2387 GEN_TWOVEC_TEST(frecpx_s_s,   "frecpx s22, s23",       22, 23)
   2388 
   2389 GEN_TWOVEC_TEST(frinta_d_d,   "frinta d22, d23",       22, 23)
   2390 GEN_TWOVEC_TEST(frinta_s_s,   "frinta s22, s23",       22, 23)
   2391 GEN_TWOVEC_TEST(frinti_d_d,   "frinti d22, d23",       22, 23)
   2392 GEN_TWOVEC_TEST(frinti_s_s,   "frinti s22, s23",       22, 23)
   2393 GEN_TWOVEC_TEST(frintm_d_d,   "frintm d22, d23",       22, 23)
   2394 GEN_TWOVEC_TEST(frintm_s_s,   "frintm s22, s23",       22, 23)
   2395 GEN_TWOVEC_TEST(frintn_d_d,   "frintn d22, d23",       22, 23)
   2396 GEN_TWOVEC_TEST(frintn_s_s,   "frintn s22, s23",       22, 23)
   2397 GEN_TWOVEC_TEST(frintp_d_d,   "frintp d22, d23",       22, 23)
   2398 GEN_TWOVEC_TEST(frintp_s_s,   "frintp s22, s23",       22, 23)
   2399 GEN_TWOVEC_TEST(frintx_d_d,   "frintx d22, d23",       22, 23)
   2400 GEN_TWOVEC_TEST(frintx_s_s,   "frintx s22, s23",       22, 23)
   2401 GEN_TWOVEC_TEST(frintz_d_d,   "frintz d22, d23",       22, 23)
   2402 GEN_TWOVEC_TEST(frintz_s_s,   "frintz s22, s23",       22, 23)
   2403 
   2404 GEN_TWOVEC_TEST(frinta_2d_2d, "frinta v2.2d, v11.2d", 2, 11)
   2405 GEN_TWOVEC_TEST(frinta_4s_4s, "frinta v2.4s, v11.4s", 2, 11)
   2406 GEN_TWOVEC_TEST(frinta_2s_2s, "frinta v2.2s, v11.2s", 2, 11)
   2407 GEN_TWOVEC_TEST(frinti_2d_2d, "frinti v2.2d, v11.2d", 2, 11)
   2408 GEN_TWOVEC_TEST(frinti_4s_4s, "frinti v2.4s, v11.4s", 2, 11)
   2409 GEN_TWOVEC_TEST(frinti_2s_2s, "frinti v2.2s, v11.2s", 2, 11)
   2410 GEN_TWOVEC_TEST(frintm_2d_2d, "frintm v2.2d, v11.2d", 2, 11)
   2411 GEN_TWOVEC_TEST(frintm_4s_4s, "frintm v2.4s, v11.4s", 2, 11)
   2412 GEN_TWOVEC_TEST(frintm_2s_2s, "frintm v2.2s, v11.2s", 2, 11)
   2413 GEN_TWOVEC_TEST(frintn_2d_2d, "frintn v2.2d, v11.2d", 2, 11)
   2414 GEN_TWOVEC_TEST(frintn_4s_4s, "frintn v2.4s, v11.4s", 2, 11)
   2415 GEN_TWOVEC_TEST(frintn_2s_2s, "frintn v2.2s, v11.2s", 2, 11)
   2416 GEN_TWOVEC_TEST(frintp_2d_2d, "frintp v2.2d, v11.2d", 2, 11)
   2417 GEN_TWOVEC_TEST(frintp_4s_4s, "frintp v2.4s, v11.4s", 2, 11)
   2418 GEN_TWOVEC_TEST(frintp_2s_2s, "frintp v2.2s, v11.2s", 2, 11)
   2419 GEN_TWOVEC_TEST(frintx_2d_2d, "frintx v2.2d, v11.2d", 2, 11)
   2420 GEN_TWOVEC_TEST(frintx_4s_4s, "frintx v2.4s, v11.4s", 2, 11)
   2421 GEN_TWOVEC_TEST(frintx_2s_2s, "frintx v2.2s, v11.2s", 2, 11)
   2422 GEN_TWOVEC_TEST(frintz_2d_2d, "frintz v2.2d, v11.2d", 2, 11)
   2423 GEN_TWOVEC_TEST(frintz_4s_4s, "frintz v2.4s, v11.4s", 2, 11)
   2424 GEN_TWOVEC_TEST(frintz_2s_2s, "frintz v2.2s, v11.2s", 2, 11)
   2425 
   2426 GEN_TWOVEC_TEST(frsqrte_d_d,   "frsqrte d22, d23",       22, 23)
   2427 GEN_TWOVEC_TEST(frsqrte_s_s,   "frsqrte s22, s23",       22, 23)
   2428 GEN_TWOVEC_TEST(frsqrte_2d_2d, "frsqrte v22.2d, v23.2d", 22, 23)
   2429 GEN_TWOVEC_TEST(frsqrte_4s_4s, "frsqrte v22.4s, v23.4s", 22, 23)
   2430 GEN_TWOVEC_TEST(frsqrte_2s_2s, "frsqrte v22.2s, v23.2s", 22, 23)
   2431 
   2432 GEN_THREEVEC_TEST(frsqrts_d_d_d,    "frsqrts d2, d11, d29", 2, 11, 29)
   2433 GEN_THREEVEC_TEST(frsqrts_s_s_s,    "frsqrts s2, s11, s29", 2, 11, 29)
   2434 GEN_THREEVEC_TEST(frsqrts_2d_2d_2d, "frsqrts v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2435 GEN_THREEVEC_TEST(frsqrts_4s_4s_4s, "frsqrts v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2436 GEN_THREEVEC_TEST(frsqrts_2s_2s_2s, "frsqrts v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2437 
   2438 // ======================== CONV ========================
   2439 
   2440 GEN_TWOVEC_TEST(fcvt_s_h, "fcvt s7, h16", 7, 16)
   2441 GEN_TWOVEC_TEST(fcvt_d_h, "fcvt d7, h16", 7, 16)
   2442 GEN_TWOVEC_TEST(fcvt_h_s, "fcvt h7, s16", 7, 16)
   2443 GEN_TWOVEC_TEST(fcvt_d_s, "fcvt d7, s16", 7, 16)
   2444 GEN_TWOVEC_TEST(fcvt_h_d, "fcvt h7, d16", 7, 16)
   2445 GEN_TWOVEC_TEST(fcvt_s_d, "fcvt s7, d16", 7, 16)
   2446 
   2447 GEN_TWOVEC_TEST(fcvtl_4s_4h, "fcvtl  v11.4s, v29.4h", 11, 29)
   2448 GEN_TWOVEC_TEST(fcvtl_4s_8h, "fcvtl2 v11.4s, v29.8h", 11, 29)
   2449 GEN_TWOVEC_TEST(fcvtl_2d_2s, "fcvtl  v11.2d, v29.2s", 11, 29)
   2450 GEN_TWOVEC_TEST(fcvtl_2d_4s, "fcvtl2 v11.2d, v29.4s", 11, 29)
   2451 
   2452 GEN_TWOVEC_TEST(fcvtn_4h_4s, "fcvtn  v22.4h, v23.4s", 22, 23)
   2453 GEN_TWOVEC_TEST(fcvtn_8h_4s, "fcvtn2 v22.8h, v23.4s", 22, 23)
   2454 GEN_TWOVEC_TEST(fcvtn_2s_2d, "fcvtn  v22.2s, v23.2d", 22, 23)
   2455 GEN_TWOVEC_TEST(fcvtn_4s_2d, "fcvtn2 v22.4s, v23.2d", 22, 23)
   2456 
   2457 GEN_TWOVEC_TEST(fcvtas_d_d,   "fcvtas d10, d21",       10, 21)
   2458 GEN_TWOVEC_TEST(fcvtau_d_d,   "fcvtau d21, d10",       21, 10)
   2459 GEN_TWOVEC_TEST(fcvtas_s_s,   "fcvtas s10, s21",       10, 21)
   2460 GEN_TWOVEC_TEST(fcvtau_s_s,   "fcvtau s21, s10",       21, 10)
   2461 GEN_TWOVEC_TEST(fcvtas_2d_2d, "fcvtas v10.2d, v21.2d", 10, 21)
   2462 GEN_TWOVEC_TEST(fcvtau_2d_2d, "fcvtau v10.2d, v21.2d", 10, 21)
   2463 GEN_TWOVEC_TEST(fcvtas_4s_4s, "fcvtas v10.4s, v21.4s", 10, 21)
   2464 GEN_TWOVEC_TEST(fcvtau_4s_4s, "fcvtau v10.4s, v21.4s", 10, 21)
   2465 GEN_TWOVEC_TEST(fcvtas_2s_2s, "fcvtas v10.2s, v21.2s", 10, 21)
   2466 GEN_TWOVEC_TEST(fcvtau_2s_2s, "fcvtau v10.2s, v21.2s", 10, 21)
   2467 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_s, "fcvtas w21, s10", 21, 10)
   2468 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_s, "fcvtau w21, s10", 21, 10)
   2469 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_s, "fcvtas x21, s10", 21, 10)
   2470 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_s, "fcvtau x21, s10", 21, 10)
   2471 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_d, "fcvtas w21, d10", 21, 10)
   2472 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_d, "fcvtau w21, d10", 21, 10)
   2473 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_d, "fcvtas x21, d10", 21, 10)
   2474 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_d, "fcvtau x21, d10", 21, 10)
   2475 
   2476 GEN_TWOVEC_TEST(fcvtms_d_d,   "fcvtms d10, d21",       10, 21)
   2477 GEN_TWOVEC_TEST(fcvtmu_d_d,   "fcvtmu d21, d10",       21, 10)
   2478 GEN_TWOVEC_TEST(fcvtms_s_s,   "fcvtms s10, s21",       10, 21)
   2479 GEN_TWOVEC_TEST(fcvtmu_s_s,   "fcvtmu s21, s10",       21, 10)
   2480 GEN_TWOVEC_TEST(fcvtms_2d_2d, "fcvtms v10.2d, v21.2d", 10, 21)
   2481 GEN_TWOVEC_TEST(fcvtmu_2d_2d, "fcvtmu v10.2d, v21.2d", 10, 21)
   2482 GEN_TWOVEC_TEST(fcvtms_4s_4s, "fcvtms v10.4s, v21.4s", 10, 21)
   2483 GEN_TWOVEC_TEST(fcvtmu_4s_4s, "fcvtmu v10.4s, v21.4s", 10, 21)
   2484 GEN_TWOVEC_TEST(fcvtms_2s_2s, "fcvtms v10.2s, v21.2s", 10, 21)
   2485 GEN_TWOVEC_TEST(fcvtmu_2s_2s, "fcvtmu v10.2s, v21.2s", 10, 21)
   2486 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_s, "fcvtms w21, s10", 21, 10)
   2487 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_s, "fcvtmu w21, s10", 21, 10)
   2488 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_s, "fcvtms x21, s10", 21, 10)
   2489 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_s, "fcvtmu x21, s10", 21, 10)
   2490 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_d, "fcvtms w21, d10", 21, 10)
   2491 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_d, "fcvtmu w21, d10", 21, 10)
   2492 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_d, "fcvtms x21, d10", 21, 10)
   2493 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_d, "fcvtmu x21, d10", 21, 10)
   2494 
   2495 GEN_TWOVEC_TEST(fcvtns_d_d,   "fcvtns d10, d21",       10, 21)
   2496 GEN_TWOVEC_TEST(fcvtnu_d_d,   "fcvtnu d21, d10",       21, 10)
   2497 GEN_TWOVEC_TEST(fcvtns_s_s,   "fcvtns s10, s21",       10, 21)
   2498 GEN_TWOVEC_TEST(fcvtnu_s_s,   "fcvtnu s21, s10",       21, 10)
   2499 GEN_TWOVEC_TEST(fcvtns_2d_2d, "fcvtns v10.2d, v21.2d", 10, 21)
   2500 GEN_TWOVEC_TEST(fcvtnu_2d_2d, "fcvtnu v10.2d, v21.2d", 10, 21)
   2501 GEN_TWOVEC_TEST(fcvtns_4s_4s, "fcvtns v10.4s, v21.4s", 10, 21)
   2502 GEN_TWOVEC_TEST(fcvtnu_4s_4s, "fcvtnu v10.4s, v21.4s", 10, 21)
   2503 GEN_TWOVEC_TEST(fcvtns_2s_2s, "fcvtns v10.2s, v21.2s", 10, 21)
   2504 GEN_TWOVEC_TEST(fcvtnu_2s_2s, "fcvtnu v10.2s, v21.2s", 10, 21)
   2505 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_s, "fcvtns w21, s10", 21, 10)
   2506 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_s, "fcvtnu w21, s10", 21, 10)
   2507 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_s, "fcvtns x21, s10", 21, 10)
   2508 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_s, "fcvtnu x21, s10", 21, 10)
   2509 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_d, "fcvtns w21, d10", 21, 10)
   2510 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_d, "fcvtnu w21, d10", 21, 10)
   2511 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_d, "fcvtns x21, d10", 21, 10)
   2512 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_d, "fcvtnu x21, d10", 21, 10)
   2513 
   2514 GEN_TWOVEC_TEST(fcvtps_d_d,   "fcvtps d10, d21",       10, 21)
   2515 GEN_TWOVEC_TEST(fcvtpu_d_d,   "fcvtpu d21, d10",       21, 10)
   2516 GEN_TWOVEC_TEST(fcvtps_s_s,   "fcvtps s10, s21",       10, 21)
   2517 GEN_TWOVEC_TEST(fcvtpu_s_s,   "fcvtpu s21, s10",       21, 10)
   2518 GEN_TWOVEC_TEST(fcvtps_2d_2d, "fcvtps v10.2d, v21.2d", 10, 21)
   2519 GEN_TWOVEC_TEST(fcvtpu_2d_2d, "fcvtpu v10.2d, v21.2d", 10, 21)
   2520 GEN_TWOVEC_TEST(fcvtps_4s_4s, "fcvtps v10.4s, v21.4s", 10, 21)
   2521 GEN_TWOVEC_TEST(fcvtpu_4s_4s, "fcvtpu v10.4s, v21.4s", 10, 21)
   2522 GEN_TWOVEC_TEST(fcvtps_2s_2s, "fcvtps v10.2s, v21.2s", 10, 21)
   2523 GEN_TWOVEC_TEST(fcvtpu_2s_2s, "fcvtpu v10.2s, v21.2s", 10, 21)
   2524 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_s, "fcvtps w21, s10", 21, 10)
   2525 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_s, "fcvtpu w21, s10", 21, 10)
   2526 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_s, "fcvtps x21, s10", 21, 10)
   2527 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_s, "fcvtpu x21, s10", 21, 10)
   2528 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_d, "fcvtps w21, d10", 21, 10)
   2529 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_d, "fcvtpu w21, d10", 21, 10)
   2530 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_d, "fcvtps x21, d10", 21, 10)
   2531 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_d, "fcvtpu x21, d10", 21, 10)
   2532 
   2533 GEN_TWOVEC_TEST(fcvtzs_d_d,   "fcvtzs d10, d21",       10, 21)
   2534 GEN_TWOVEC_TEST(fcvtzu_d_d,   "fcvtzu d21, d10",       21, 10)
   2535 GEN_TWOVEC_TEST(fcvtzs_s_s,   "fcvtzs s10, s21",       10, 21)
   2536 GEN_TWOVEC_TEST(fcvtzu_s_s,   "fcvtzu s21, s10",       21, 10)
   2537 GEN_TWOVEC_TEST(fcvtzs_2d_2d, "fcvtzs v10.2d, v21.2d", 10, 21)
   2538 GEN_TWOVEC_TEST(fcvtzu_2d_2d, "fcvtzu v10.2d, v21.2d", 10, 21)
   2539 GEN_TWOVEC_TEST(fcvtzs_4s_4s, "fcvtzs v10.4s, v21.4s", 10, 21)
   2540 GEN_TWOVEC_TEST(fcvtzu_4s_4s, "fcvtzu v10.4s, v21.4s", 10, 21)
   2541 GEN_TWOVEC_TEST(fcvtzs_2s_2s, "fcvtzs v10.2s, v21.2s", 10, 21)
   2542 GEN_TWOVEC_TEST(fcvtzu_2s_2s, "fcvtzu v10.2s, v21.2s", 10, 21)
   2543 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s, "fcvtzs w21, s10", 21, 10)
   2544 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s, "fcvtzu w21, s10", 21, 10)
   2545 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s, "fcvtzs x21, s10", 21, 10)
   2546 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s, "fcvtzu x21, s10", 21, 10)
   2547 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d, "fcvtzs w21, d10", 21, 10)
   2548 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d, "fcvtzu w21, d10", 21, 10)
   2549 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d, "fcvtzs x21, d10", 21, 10)
   2550 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d, "fcvtzu x21, d10", 21, 10)
   2551 
   2552 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits1,    "fcvtzs d10, d21, #1",   10, 21)
   2553 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits32,   "fcvtzs d10, d21, #32",  10, 21)
   2554 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits64,   "fcvtzs d10, d21, #64",  10, 21)
   2555 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits1,    "fcvtzu d10, d21, #1",   10, 21)
   2556 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits32,   "fcvtzu d10, d21, #32",  10, 21)
   2557 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits64,   "fcvtzu d10, d21, #64",  10, 21)
   2558 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits1,    "fcvtzs s10, s21, #1",   10, 21)
   2559 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits16,   "fcvtzs s10, s21, #16",  10, 21)
   2560 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits32,   "fcvtzs s10, s21, #32",  10, 21)
   2561 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits1,    "fcvtzu s10, s21, #1",   10, 21)
   2562 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits16,   "fcvtzu s10, s21, #16",  10, 21)
   2563 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits32,   "fcvtzu s10, s21, #32",  10, 21)
   2564 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits1,  "fcvtzs v10.2d, v21.2d, #1",  10, 21)
   2565 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits32, "fcvtzs v10.2d, v21.2d, #32", 10, 21)
   2566 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits64, "fcvtzs v10.2d, v21.2d, #64", 10, 21)
   2567 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits1,  "fcvtzu v10.2d, v21.2d, #1",  10, 21)
   2568 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits32, "fcvtzu v10.2d, v21.2d, #32", 10, 21)
   2569 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits64, "fcvtzu v10.2d, v21.2d, #64", 10, 21)
   2570 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits1,  "fcvtzs v10.4s, v21.4s, #1",  10, 21)
   2571 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits16, "fcvtzs v10.4s, v21.4s, #16", 10, 21)
   2572 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits32, "fcvtzs v10.4s, v21.4s, #32", 10, 21)
   2573 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits1,  "fcvtzu v10.4s, v21.4s, #1",  10, 21)
   2574 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits16, "fcvtzu v10.4s, v21.4s, #16", 10, 21)
   2575 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits32, "fcvtzu v10.4s, v21.4s, #32", 10, 21)
   2576 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits1,  "fcvtzs v10.2s, v21.2s, #1",  10, 21)
   2577 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits16, "fcvtzs v10.2s, v21.2s, #16", 10, 21)
   2578 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits32, "fcvtzs v10.2s, v21.2s, #32", 10, 21)
   2579 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits1,  "fcvtzu v10.2s, v21.2s, #1",  10, 21)
   2580 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits16, "fcvtzu v10.2s, v21.2s, #16", 10, 21)
   2581 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits32, "fcvtzu v10.2s, v21.2s, #32", 10, 21)
   2582 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits1,  "fcvtzs w21, s10, #1",  21, 10)
   2583 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits16, "fcvtzs w21, s10, #16", 21, 10)
   2584 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits32, "fcvtzs w21, s10, #32", 21, 10)
   2585 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits1,  "fcvtzu w21, s10, #1",  21, 10)
   2586 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits16, "fcvtzu w21, s10, #16", 21, 10)
   2587 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits32, "fcvtzu w21, s10, #32", 21, 10)
   2588 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits1,  "fcvtzs x21, s10, #1",  21, 10)
   2589 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits32, "fcvtzs x21, s10, #32", 21, 10)
   2590 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits64, "fcvtzs x21, s10, #64", 21, 10)
   2591 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits1,  "fcvtzu x21, s10, #1",  21, 10)
   2592 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits32, "fcvtzu x21, s10, #32", 21, 10)
   2593 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits64, "fcvtzu x21, s10, #64", 21, 10)
   2594 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits1,  "fcvtzs w21, d10, #1",  21, 10)
   2595 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits16, "fcvtzs w21, d10, #16", 21, 10)
   2596 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits32, "fcvtzs w21, d10, #32", 21, 10)
   2597 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits1,  "fcvtzu w21, d10, #1",  21, 10)
   2598 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits16, "fcvtzu w21, d10, #16", 21, 10)
   2599 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits32, "fcvtzu w21, d10, #32", 21, 10)
   2600 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits1,  "fcvtzs x21, d10, #1",  21, 10)
   2601 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits32, "fcvtzs x21, d10, #32", 21, 10)
   2602 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits64, "fcvtzs x21, d10, #64", 21, 10)
   2603 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits1,  "fcvtzu x21, d10, #1",  21, 10)
   2604 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits32, "fcvtzu x21, d10, #32", 21, 10)
   2605 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits64, "fcvtzu x21, d10, #64", 21, 10)
   2606 
   2607 GEN_TWOVEC_TEST(fcvtxn_s_d,   "fcvtxn s10, d21", 10, 21)
   2608 GEN_TWOVEC_TEST(fcvtxn_2s_2d, "fcvtxn  v10.2s, v21.2d", 10, 21)
   2609 GEN_TWOVEC_TEST(fcvtxn_4s_2d, "fcvtxn2 v10.4s, v21.2d", 10, 21)
   2610 
   2611 GEN_TWOVEC_TEST(scvtf_d_d_fbits1,    "scvtf d10, d21      , #1",  10, 21)
   2612 GEN_TWOVEC_TEST(scvtf_d_d_fbits32,   "scvtf d10, d21      , #32", 10, 21)
   2613 GEN_TWOVEC_TEST(scvtf_d_d_fbits64,   "scvtf d10, d21      , #64", 10, 21)
   2614 GEN_TWOVEC_TEST(ucvtf_d_d_fbits1,    "ucvtf d21, d10      , #1",  21, 10)
   2615 GEN_TWOVEC_TEST(ucvtf_d_d_fbits32,   "ucvtf d21, d10      , #32", 21, 10)
   2616 GEN_TWOVEC_TEST(ucvtf_d_d_fbits64,   "ucvtf d21, d10      , #64", 21, 10)
   2617 GEN_TWOVEC_TEST(scvtf_s_s_fbits1,    "scvtf s10, s21      , #1",  10, 21)
   2618 GEN_TWOVEC_TEST(scvtf_s_s_fbits16,   "scvtf s10, s21      , #16", 10, 21)
   2619 GEN_TWOVEC_TEST(scvtf_s_s_fbits32,   "scvtf s10, s21      , #32", 10, 21)
   2620 GEN_TWOVEC_TEST(ucvtf_s_s_fbits1,    "ucvtf s21, s10      , #1",  21, 10)
   2621 GEN_TWOVEC_TEST(ucvtf_s_s_fbits16,   "ucvtf s21, s10      , #16", 21, 10)
   2622 GEN_TWOVEC_TEST(ucvtf_s_s_fbits32,   "ucvtf s21, s10      , #32", 21, 10)
   2623 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits1,  "scvtf v10.2d, v21.2d, #1",  10, 21)
   2624 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits32, "scvtf v10.2d, v21.2d, #32", 10, 21)
   2625 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits64, "scvtf v10.2d, v21.2d, #64", 10, 21)
   2626 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits1,  "ucvtf v10.2d, v21.2d, #1",  10, 21)
   2627 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits32, "ucvtf v10.2d, v21.2d, #32", 10, 21)
   2628 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits64, "ucvtf v10.2d, v21.2d, #64", 10, 21)
   2629 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits1,  "scvtf v10.4s, v21.4s, #1",  10, 21)
   2630 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits16, "scvtf v10.4s, v21.4s, #16", 10, 21)
   2631 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits32, "scvtf v10.4s, v21.4s, #32", 10, 21)
   2632 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits1,  "ucvtf v10.4s, v21.4s, #1",  10, 21)
   2633 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits16, "ucvtf v10.4s, v21.4s, #16", 10, 21)
   2634 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits32, "ucvtf v10.4s, v21.4s, #32", 10, 21)
   2635 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits1,  "scvtf v10.2s, v21.2s, #1",  10, 21)
   2636 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits16, "scvtf v10.2s, v21.2s, #16", 10, 21)
   2637 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits32, "scvtf v10.2s, v21.2s, #32", 10, 21)
   2638 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits1,  "ucvtf v10.2s, v21.2s, #1",  10, 21)
   2639 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits16, "ucvtf v10.2s, v21.2s, #16", 10, 21)
   2640 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits32, "ucvtf v10.2s, v21.2s, #32", 10, 21)
   2641 
   2642 GEN_TWOVEC_TEST(scvtf_d_d,   "scvtf d10, d21",       10, 21)
   2643 GEN_TWOVEC_TEST(ucvtf_d_d,   "ucvtf d21, d10",       21, 10)
   2644 GEN_TWOVEC_TEST(scvtf_s_s,   "scvtf s10, s21",       10, 21)
   2645 GEN_TWOVEC_TEST(ucvtf_s_s,   "ucvtf s21, s10",       21, 10)
   2646 GEN_TWOVEC_TEST(scvtf_2d_2d, "scvtf v10.2d, v21.2d", 10, 21)
   2647 GEN_TWOVEC_TEST(ucvtf_2d_2d, "ucvtf v10.2d, v21.2d", 10, 21)
   2648 GEN_TWOVEC_TEST(scvtf_4s_4s, "scvtf v10.4s, v21.4s", 10, 21)
   2649 GEN_TWOVEC_TEST(ucvtf_4s_4s, "ucvtf v10.4s, v21.4s", 10, 21)
   2650 GEN_TWOVEC_TEST(scvtf_2s_2s, "scvtf v10.2s, v21.2s", 10, 21)
   2651 GEN_TWOVEC_TEST(ucvtf_2s_2s, "ucvtf v10.2s, v21.2s", 10, 21)
   2652 
   2653 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits1,  "scvtf s7, w15, #1",  15, 7)
   2654 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits16, "scvtf s7, w15, #16", 15, 7)
   2655 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits32, "scvtf s7, w15, #32", 15, 7)
   2656 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits1,  "scvtf d7, w15, #1",  15, 7)
   2657 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits16, "scvtf d7, w15, #16", 15, 7)
   2658 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits32, "scvtf d7, w15, #32", 15, 7)
   2659 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits1,  "scvtf s7, x15, #1",  15, 7)
   2660 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits32, "scvtf s7, x15, #32", 15, 7)
   2661 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits64, "scvtf s7, x15, #64", 15, 7)
   2662 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits1,  "scvtf d7, x15, #1",  15, 7)
   2663 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits32, "scvtf d7, x15, #32", 15, 7)
   2664 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits64, "scvtf d7, x15, #64", 15, 7)
   2665 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits1,  "ucvtf s7, w15, #1",  15, 7)
   2666 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits16, "ucvtf s7, w15, #16", 15, 7)
   2667 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits32, "ucvtf s7, w15, #32", 15, 7)
   2668 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits1,  "ucvtf d7, w15, #1",  15, 7)
   2669 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits16, "ucvtf d7, w15, #16", 15, 7)
   2670 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits32, "ucvtf d7, w15, #32", 15, 7)
   2671 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits1,  "ucvtf s7, x15, #1",  15, 7)
   2672 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits32, "ucvtf s7, x15, #32", 15, 7)
   2673 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits64, "ucvtf s7, x15, #64", 15, 7)
   2674 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits1,  "ucvtf d7, x15, #1",  15, 7)
   2675 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits32, "ucvtf d7, x15, #32", 15, 7)
   2676 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits64, "ucvtf d7, x15, #64", 15, 7)
   2677 
   2678 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w, "scvtf s7, w15", 15, 7)
   2679 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w, "scvtf d7, w15", 15, 7)
   2680 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x, "scvtf s7, x15", 15, 7)
   2681 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x, "scvtf d7, x15", 15, 7)
   2682 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w, "ucvtf s7, w15", 15, 7)
   2683 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w, "ucvtf d7, w15", 15, 7)
   2684 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x, "ucvtf s7, x15", 15, 7)
   2685 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x, "ucvtf d7, x15", 15, 7)
   2686 
   2687 // ======================== INT ========================
   2688 
   2689 GEN_TWOVEC_TEST(abs_d_d,  "abs d22, d23",   22, 23)
   2690 GEN_TWOVEC_TEST(neg_d_d,  "neg d22, d23",   22, 23)
   2691 
   2692 GEN_UNARY_TEST(abs, 2d, 2d)
   2693 GEN_UNARY_TEST(abs, 4s, 4s)
   2694 GEN_UNARY_TEST(abs, 2s, 2s)
   2695 GEN_UNARY_TEST(abs, 8h, 8h)
   2696 GEN_UNARY_TEST(abs, 4h, 4h)
   2697 GEN_UNARY_TEST(abs, 16b, 16b)
   2698 GEN_UNARY_TEST(abs, 8b, 8b)
   2699 GEN_UNARY_TEST(neg, 2d, 2d)
   2700 GEN_UNARY_TEST(neg, 4s, 4s)
   2701 GEN_UNARY_TEST(neg, 2s, 2s)
   2702 GEN_UNARY_TEST(neg, 8h, 8h)
   2703 GEN_UNARY_TEST(neg, 4h, 4h)
   2704 GEN_UNARY_TEST(neg, 16b, 16b)
   2705 GEN_UNARY_TEST(neg, 8b,  8b)
   2706 
   2707 GEN_THREEVEC_TEST(add_d_d_d, "add d21, d22, d23", 21, 22, 23)
   2708 GEN_THREEVEC_TEST(sub_d_d_d, "sub d21, d22, d23", 21, 22, 23)
   2709 
   2710 GEN_BINARY_TEST(add, 2d, 2d, 2d)
   2711 GEN_BINARY_TEST(add, 4s, 4s, 4s)
   2712 GEN_BINARY_TEST(add, 2s, 2s, 2s)
   2713 GEN_BINARY_TEST(add, 8h, 8h, 8h)
   2714 GEN_BINARY_TEST(add, 4h, 4h, 4h)
   2715 GEN_BINARY_TEST(add, 16b, 16b, 16b)
   2716 GEN_BINARY_TEST(add, 8b, 8b, 8b)
   2717 GEN_BINARY_TEST(sub, 2d, 2d, 2d)
   2718 GEN_BINARY_TEST(sub, 4s, 4s, 4s)
   2719 GEN_BINARY_TEST(sub, 2s, 2s, 2s)
   2720 GEN_BINARY_TEST(sub, 8h, 8h, 8h)
   2721 GEN_BINARY_TEST(sub, 4h, 4h, 4h)
   2722 GEN_BINARY_TEST(sub, 16b, 16b, 16b)
   2723 GEN_BINARY_TEST(sub, 8b, 8b, 8b)
   2724 
   2725 GEN_BINARY_TEST(addhn,   2s, 2d, 2d)
   2726 GEN_BINARY_TEST(addhn2,  4s, 2d, 2d)
   2727 GEN_BINARY_TEST(addhn,   4h, 4s, 4s)
   2728 GEN_BINARY_TEST(addhn2,  8h, 4s, 4s)
   2729 GEN_BINARY_TEST(addhn,   8b, 8h, 8h)
   2730 GEN_BINARY_TEST(addhn2,  16b, 8h, 8h)
   2731 GEN_BINARY_TEST(subhn,   2s, 2d, 2d)
   2732 GEN_BINARY_TEST(subhn2,  4s, 2d, 2d)
   2733 GEN_BINARY_TEST(subhn,   4h, 4s, 4s)
   2734 GEN_BINARY_TEST(subhn2,  8h, 4s, 4s)
   2735 GEN_BINARY_TEST(subhn,   8b, 8h, 8h)
   2736 GEN_BINARY_TEST(subhn2,  16b, 8h, 8h)
   2737 GEN_BINARY_TEST(raddhn,  2s, 2d, 2d)
   2738 GEN_BINARY_TEST(raddhn2, 4s, 2d, 2d)
   2739 GEN_BINARY_TEST(raddhn,  4h, 4s, 4s)
   2740 GEN_BINARY_TEST(raddhn2, 8h, 4s, 4s)
   2741 GEN_BINARY_TEST(raddhn,  8b, 8h, 8h)
   2742 GEN_BINARY_TEST(raddhn2, 16b, 8h, 8h)
   2743 GEN_BINARY_TEST(rsubhn,  2s, 2d, 2d)
   2744 GEN_BINARY_TEST(rsubhn2, 4s, 2d, 2d)
   2745 GEN_BINARY_TEST(rsubhn,  4h, 4s, 4s)
   2746 GEN_BINARY_TEST(rsubhn2, 8h, 4s, 4s)
   2747 GEN_BINARY_TEST(rsubhn,  8b, 8h, 8h)
   2748 GEN_BINARY_TEST(rsubhn2, 16b, 8h, 8h)
   2749 
   2750 GEN_TWOVEC_TEST(addp_d_2d,  "addp d22, v23.2d",   22, 23)
   2751 
   2752 GEN_BINARY_TEST(addp, 2d, 2d, 2d)
   2753 GEN_BINARY_TEST(addp, 4s, 4s, 4s)
   2754 GEN_BINARY_TEST(addp, 2s, 2s, 2s)
   2755 GEN_BINARY_TEST(addp, 8h, 8h, 8h)
   2756 GEN_BINARY_TEST(addp, 4h, 4h, 4h)
   2757 GEN_BINARY_TEST(addp, 16b, 16b, 16b)
   2758 GEN_BINARY_TEST(addp, 8b, 8b, 8b)
   2759 
   2760 GEN_TWOVEC_TEST(addv_s_4s,  "addv s22, v23.4s",  22, 23)
   2761 GEN_TWOVEC_TEST(addv_h_8h,  "addv h22, v23.8h",  22, 23)
   2762 GEN_TWOVEC_TEST(addv_h_4h,  "addv h22, v23.4h",  22, 23)
   2763 GEN_TWOVEC_TEST(addv_b_16b, "addv b22, v23.16b", 22, 23)
   2764 GEN_TWOVEC_TEST(addv_b_8b,  "addv b22, v23.8b",  22, 23)
   2765 
   2766 GEN_BINARY_TEST(and, 16b, 16b, 16b)
   2767 GEN_BINARY_TEST(and, 8b, 8b, 8b)
   2768 GEN_BINARY_TEST(bic, 16b, 16b, 16b)
   2769 GEN_BINARY_TEST(bic, 8b, 8b, 8b)
   2770 GEN_BINARY_TEST(orr, 16b, 16b, 16b)
   2771 GEN_BINARY_TEST(orr, 8b, 8b, 8b)
   2772 GEN_BINARY_TEST(orn, 16b, 16b, 16b)
   2773 GEN_BINARY_TEST(orn, 8b, 8b, 8b)
   2774 
   2775 /* overkill -- don't need two vecs, only one */
   2776 GEN_TWOVEC_TEST(orr_8h_0x5A_lsl0, "orr v22.8h, #0x5A, LSL #0", 22, 23)
   2777 GEN_TWOVEC_TEST(orr_8h_0xA5_lsl8, "orr v22.8h, #0xA5, LSL #8", 22, 23)
   2778 GEN_TWOVEC_TEST(orr_4h_0x5A_lsl0, "orr v22.4h, #0x5A, LSL #0", 22, 23)
   2779 GEN_TWOVEC_TEST(orr_4h_0xA5_lsl8, "orr v22.4h, #0xA5, LSL #8", 22, 23)
   2780 GEN_TWOVEC_TEST(orr_4s_0x5A_lsl0,  "orr v22.4s, #0x5A, LSL #0",  22, 23)
   2781 GEN_TWOVEC_TEST(orr_4s_0x6B_lsl8,  "orr v22.4s, #0x6B, LSL #8",  22, 23)
   2782 GEN_TWOVEC_TEST(orr_4s_0x49_lsl16, "orr v22.4s, #0x49, LSL #16", 22, 23)
   2783 GEN_TWOVEC_TEST(orr_4s_0x3D_lsl24, "orr v22.4s, #0x3D, LSL #24", 22, 23)
   2784 GEN_TWOVEC_TEST(orr_2s_0x5A_lsl0,  "orr v22.2s, #0x5A, LSL #0",  22, 23)
   2785 GEN_TWOVEC_TEST(orr_2s_0x6B_lsl8,  "orr v22.2s, #0x6B, LSL #8",  22, 23)
   2786 GEN_TWOVEC_TEST(orr_2s_0x49_lsl16, "orr v22.2s, #0x49, LSL #16", 22, 23)
   2787 GEN_TWOVEC_TEST(orr_2s_0x3D_lsl24, "orr v22.2s, #0x3D, LSL #24", 22, 23)
   2788 GEN_TWOVEC_TEST(bic_8h_0x5A_lsl0, "bic v22.8h, #0x5A, LSL #0", 22, 23)
   2789 GEN_TWOVEC_TEST(bic_8h_0xA5_lsl8, "bic v22.8h, #0xA5, LSL #8", 22, 23)
   2790 GEN_TWOVEC_TEST(bic_4h_0x5A_lsl0, "bic v22.4h, #0x5A, LSL #0", 22, 23)
   2791 GEN_TWOVEC_TEST(bic_4h_0xA5_lsl8, "bic v22.4h, #0xA5, LSL #8", 22, 23)
   2792 GEN_TWOVEC_TEST(bic_4s_0x5A_lsl0,  "bic v22.4s, #0x5A, LSL #0",  22, 23)
   2793 GEN_TWOVEC_TEST(bic_4s_0x6B_lsl8,  "bic v22.4s, #0x6B, LSL #8",  22, 23)
   2794 GEN_TWOVEC_TEST(bic_4s_0x49_lsl16, "bic v22.4s, #0x49, LSL #16", 22, 23)
   2795 GEN_TWOVEC_TEST(bic_4s_0x3D_lsl24, "bic v22.4s, #0x3D, LSL #24", 22, 23)
   2796 GEN_TWOVEC_TEST(bic_2s_0x5A_lsl0,  "bic v22.2s, #0x5A, LSL #0",  22, 23)
   2797 GEN_TWOVEC_TEST(bic_2s_0x6B_lsl8,  "bic v22.2s, #0x6B, LSL #8",  22, 23)
   2798 GEN_TWOVEC_TEST(bic_2s_0x49_lsl16, "bic v22.2s, #0x49, LSL #16", 22, 23)
   2799 GEN_TWOVEC_TEST(bic_2s_0x3D_lsl24, "bic v22.2s, #0x3D, LSL #24", 22, 23)
   2800 
   2801 GEN_BINARY_TEST(bif, 16b, 16b, 16b)
   2802 GEN_BINARY_TEST(bif, 8b, 8b, 8b)
   2803 GEN_BINARY_TEST(bit, 16b, 16b, 16b)
   2804 GEN_BINARY_TEST(bit, 8b, 8b, 8b)
   2805 GEN_BINARY_TEST(bsl, 16b, 16b, 16b)
   2806 GEN_BINARY_TEST(bsl, 8b, 8b, 8b)
   2807 GEN_BINARY_TEST(eor, 16b, 16b, 16b)
   2808 GEN_BINARY_TEST(eor, 8b, 8b, 8b)
   2809 
   2810 GEN_UNARY_TEST(cls, 4s, 4s)
   2811 GEN_UNARY_TEST(cls, 2s, 2s)
   2812 GEN_UNARY_TEST(cls, 8h, 8h)
   2813 GEN_UNARY_TEST(cls, 4h, 4h)
   2814 GEN_UNARY_TEST(cls, 16b, 16b)
   2815 GEN_UNARY_TEST(cls, 8b, 8b)
   2816 GEN_UNARY_TEST(clz, 4s, 4s)
   2817 GEN_UNARY_TEST(clz, 2s, 2s)
   2818 GEN_UNARY_TEST(clz, 8h, 8h)
   2819 GEN_UNARY_TEST(clz, 4h, 4h)
   2820 GEN_UNARY_TEST(clz, 16b, 16b)
   2821 GEN_UNARY_TEST(clz, 8b, 8b)
   2822 
   2823 GEN_THREEVEC_TEST(cmeq_d_d_d,  "cmeq  d2, d11, d29", 2, 11, 29)
   2824 GEN_THREEVEC_TEST(cmge_d_d_d,  "cmge  d2, d11, d29", 2, 11, 29)
   2825 GEN_THREEVEC_TEST(cmgt_d_d_d,  "cmgt  d2, d11, d29", 2, 11, 29)
   2826 GEN_THREEVEC_TEST(cmhi_d_d_d,  "cmhi  d2, d11, d29", 2, 11, 29)
   2827 GEN_THREEVEC_TEST(cmhs_d_d_d,  "cmhs  d2, d11, d29", 2, 11, 29)
   2828 GEN_THREEVEC_TEST(cmtst_d_d_d, "cmtst d2, d11, d29", 2, 11, 29)
   2829 
   2830 GEN_BINARY_TEST(cmeq, 2d, 2d, 2d)
   2831 GEN_BINARY_TEST(cmeq, 4s, 4s, 4s)
   2832 GEN_BINARY_TEST(cmeq, 2s, 2s, 2s)
   2833 GEN_BINARY_TEST(cmeq, 8h, 8h, 8h)
   2834 GEN_BINARY_TEST(cmeq, 4h, 4h, 4h)
   2835 GEN_BINARY_TEST(cmeq, 16b, 16b, 16b)
   2836 GEN_BINARY_TEST(cmeq, 8b, 8b, 8b)
   2837 GEN_BINARY_TEST(cmge, 2d, 2d, 2d)
   2838 GEN_BINARY_TEST(cmge, 4s, 4s, 4s)
   2839 GEN_BINARY_TEST(cmge, 2s, 2s, 2s)
   2840 GEN_BINARY_TEST(cmge, 8h, 8h, 8h)
   2841 GEN_BINARY_TEST(cmge, 4h, 4h, 4h)
   2842 GEN_BINARY_TEST(cmge, 16b, 16b, 16b)
   2843 GEN_BINARY_TEST(cmge, 8b, 8b, 8b)
   2844 GEN_BINARY_TEST(cmgt, 2d, 2d, 2d)
   2845 GEN_BINARY_TEST(cmgt, 4s, 4s, 4s)
   2846 GEN_BINARY_TEST(cmgt, 2s, 2s, 2s)
   2847 GEN_BINARY_TEST(cmgt, 8h, 8h, 8h)
   2848 GEN_BINARY_TEST(cmgt, 4h, 4h, 4h)
   2849 GEN_BINARY_TEST(cmgt, 16b, 16b, 16b)
   2850 GEN_BINARY_TEST(cmgt, 8b, 8b, 8b)
   2851 GEN_BINARY_TEST(cmhi, 2d, 2d, 2d)
   2852 GEN_BINARY_TEST(cmhi, 4s, 4s, 4s)
   2853 GEN_BINARY_TEST(cmhi, 2s, 2s, 2s)
   2854 GEN_BINARY_TEST(cmhi, 8h, 8h, 8h)
   2855 GEN_BINARY_TEST(cmhi, 4h, 4h, 4h)
   2856 GEN_BINARY_TEST(cmhi, 16b, 16b, 16b)
   2857 GEN_BINARY_TEST(cmhi, 8b, 8b, 8b)
   2858 GEN_BINARY_TEST(cmhs, 2d, 2d, 2d)
   2859 GEN_BINARY_TEST(cmhs, 4s, 4s, 4s)
   2860 GEN_BINARY_TEST(cmhs, 2s, 2s, 2s)
   2861 GEN_BINARY_TEST(cmhs, 8h, 8h, 8h)
   2862 GEN_BINARY_TEST(cmhs, 4h, 4h, 4h)
   2863 GEN_BINARY_TEST(cmhs, 16b, 16b, 16b)
   2864 GEN_BINARY_TEST(cmhs, 8b, 8b, 8b)
   2865 GEN_BINARY_TEST(cmtst, 2d, 2d, 2d)
   2866 GEN_BINARY_TEST(cmtst, 4s, 4s, 4s)
   2867 GEN_BINARY_TEST(cmtst, 2s, 2s, 2s)
   2868 GEN_BINARY_TEST(cmtst, 8h, 8h, 8h)
   2869 GEN_BINARY_TEST(cmtst, 4h, 4h, 4h)
   2870 GEN_BINARY_TEST(cmtst, 16b, 16b, 16b)
   2871 GEN_BINARY_TEST(cmtst, 8b, 8b, 8b)
   2872 
   2873 GEN_TWOVEC_TEST(cmeq_zero_d_d,  "cmeq  d2, d11, #0", 2, 11)
   2874 GEN_TWOVEC_TEST(cmge_zero_d_d,  "cmge  d2, d11, #0", 2, 11)
   2875 GEN_TWOVEC_TEST(cmgt_zero_d_d,  "cmgt  d2, d11, #0", 2, 11)
   2876 GEN_TWOVEC_TEST(cmle_zero_d_d,  "cmle  d2, d11, #0", 2, 11)
   2877 GEN_TWOVEC_TEST(cmlt_zero_d_d,  "cmlt  d2, d11, #0", 2, 11)
   2878 
   2879 GEN_TWOVEC_TEST(cmeq_zero_2d_2d,   "cmeq v5.2d,  v22.2d,  #0", 5, 22)
   2880 GEN_TWOVEC_TEST(cmeq_zero_4s_4s,   "cmeq v5.4s,  v22.4s,  #0", 5, 22)
   2881 GEN_TWOVEC_TEST(cmeq_zero_2s_2s,   "cmeq v5.2s,  v22.2s,  #0", 5, 22)
   2882 GEN_TWOVEC_TEST(cmeq_zero_8h_8h,   "cmeq v5.8h,  v22.8h,  #0", 5, 22)
   2883 GEN_TWOVEC_TEST(cmeq_zero_4h_4h,   "cmeq v5.4h,  v22.4h,  #0", 5, 22)
   2884 GEN_TWOVEC_TEST(cmeq_zero_16b_16b, "cmeq v5.16b, v22.16b, #0", 5, 22)
   2885 GEN_TWOVEC_TEST(cmeq_zero_8b_8b,   "cmeq v5.8b,  v22.8b,  #0", 5, 22)
   2886 GEN_TWOVEC_TEST(cmge_zero_2d_2d,   "cmge v5.2d,  v22.2d,  #0", 5, 22)
   2887 GEN_TWOVEC_TEST(cmge_zero_4s_4s,   "cmge v5.4s,  v22.4s,  #0", 5, 22)
   2888 GEN_TWOVEC_TEST(cmge_zero_2s_2s,   "cmge v5.2s,  v22.2s,  #0", 5, 22)
   2889 GEN_TWOVEC_TEST(cmge_zero_8h_8h,   "cmge v5.8h,  v22.8h,  #0", 5, 22)
   2890 GEN_TWOVEC_TEST(cmge_zero_4h_4h,   "cmge v5.4h,  v22.4h,  #0", 5, 22)
   2891 GEN_TWOVEC_TEST(cmge_zero_16b_16b, "cmge v5.16b, v22.16b, #0", 5, 22)
   2892 GEN_TWOVEC_TEST(cmge_zero_8b_8b,   "cmge v5.8b,  v22.8b,  #0", 5, 22)
   2893 GEN_TWOVEC_TEST(cmgt_zero_2d_2d,   "cmgt v5.2d,  v22.2d,  #0", 5, 22)
   2894 GEN_TWOVEC_TEST(cmgt_zero_4s_4s,   "cmgt v5.4s,  v22.4s,  #0", 5, 22)
   2895 GEN_TWOVEC_TEST(cmgt_zero_2s_2s,   "cmgt v5.2s,  v22.2s,  #0", 5, 22)
   2896 GEN_TWOVEC_TEST(cmgt_zero_8h_8h,   "cmgt v5.8h,  v22.8h,  #0", 5, 22)
   2897 GEN_TWOVEC_TEST(cmgt_zero_4h_4h,   "cmgt v5.4h,  v22.4h,  #0", 5, 22)
   2898 GEN_TWOVEC_TEST(cmgt_zero_16b_16b, "cmgt v5.16b, v22.16b, #0", 5, 22)
   2899 GEN_TWOVEC_TEST(cmgt_zero_8b_8b,   "cmgt v5.8b,  v22.8b,  #0", 5, 22)
   2900 GEN_TWOVEC_TEST(cmle_zero_2d_2d,   "cmle v5.2d,  v22.2d,  #0", 5, 22)
   2901 GEN_TWOVEC_TEST(cmle_zero_4s_4s,   "cmle v5.4s,  v22.4s,  #0", 5, 22)
   2902 GEN_TWOVEC_TEST(cmle_zero_2s_2s,   "cmle v5.2s,  v22.2s,  #0", 5, 22)
   2903 GEN_TWOVEC_TEST(cmle_zero_8h_8h,   "cmle v5.8h,  v22.8h,  #0", 5, 22)
   2904 GEN_TWOVEC_TEST(cmle_zero_4h_4h,   "cmle v5.4h,  v22.4h,  #0", 5, 22)
   2905 GEN_TWOVEC_TEST(cmle_zero_16b_16b, "cmle v5.16b, v22.16b, #0", 5, 22)
   2906 GEN_TWOVEC_TEST(cmle_zero_8b_8b,   "cmle v5.8b,  v22.8b,  #0", 5, 22)
   2907 GEN_TWOVEC_TEST(cmlt_zero_2d_2d,   "cmlt v5.2d,  v22.2d,  #0", 5, 22)
   2908 GEN_TWOVEC_TEST(cmlt_zero_4s_4s,   "cmlt v5.4s,  v22.4s,  #0", 5, 22)
   2909 GEN_TWOVEC_TEST(cmlt_zero_2s_2s,   "cmlt v5.2s,  v22.2s,  #0", 5, 22)
   2910 GEN_TWOVEC_TEST(cmlt_zero_8h_8h,   "cmlt v5.8h,  v22.8h,  #0", 5, 22)
   2911 GEN_TWOVEC_TEST(cmlt_zero_4h_4h,   "cmlt v5.4h,  v22.4h,  #0", 5, 22)
   2912 GEN_TWOVEC_TEST(cmlt_zero_16b_16b, "cmlt v5.16b, v22.16b, #0", 5, 22)
   2913 GEN_TWOVEC_TEST(cmlt_zero_8b_8b,   "cmlt v5.8b,  v22.8b,  #0", 5, 22)
   2914 
   2915 GEN_UNARY_TEST(cnt, 16b, 16b)
   2916 GEN_UNARY_TEST(cnt, 8b, 8b)
   2917 
   2918 GEN_TWOVEC_TEST(dup_d_d0,  "dup d22, v23.d[0]", 22, 23)
   2919 GEN_TWOVEC_TEST(dup_d_d1,  "dup d22, v23.d[1]", 22, 23)
   2920 GEN_TWOVEC_TEST(dup_s_s0,  "dup s22, v23.s[0]", 22, 23)
   2921 GEN_TWOVEC_TEST(dup_s_s3,  "dup s22, v23.s[3]", 22, 23)
   2922 GEN_TWOVEC_TEST(dup_h_h0,  "dup h22, v23.h[0]", 22, 23)
   2923 GEN_TWOVEC_TEST(dup_h_h6,  "dup h22, v23.h[6]", 22, 23)
   2924 GEN_TWOVEC_TEST(dup_b_b0,  "dup b0,  v23.b[0]",  22, 23)
   2925 GEN_TWOVEC_TEST(dup_b_b13, "dup b13, v23.b[13]", 22, 23)
   2926 
   2927 GEN_TWOVEC_TEST(dup_2d_d0,  "dup v9.2d, v17.d[0]", 9, 17)
   2928 GEN_TWOVEC_TEST(dup_2d_d1,  "dup v9.2d, v17.d[1]", 9, 17)
   2929 GEN_TWOVEC_TEST(dup_4s_s0,  "dup v9.4s, v17.s[0]", 9, 17)
   2930 GEN_TWOVEC_TEST(dup_4s_s3,  "dup v9.4s, v17.s[3]", 9, 17)
   2931 GEN_TWOVEC_TEST(dup_2s_s0,  "dup v9.2s, v17.s[0]", 9, 17)
   2932 GEN_TWOVEC_TEST(dup_2s_s2,  "dup v9.2s, v17.s[2]", 9, 17)
   2933 GEN_TWOVEC_TEST(dup_8h_h0,  "dup v9.8h, v17.h[0]", 9, 17)
   2934 GEN_TWOVEC_TEST(dup_8h_h6,  "dup v9.8h, v17.h[6]", 9, 17)
   2935 GEN_TWOVEC_TEST(dup_4h_h1,  "dup v9.4h, v17.h[1]", 9, 17)
   2936 GEN_TWOVEC_TEST(dup_4h_h5,  "dup v9.4h, v17.h[5]", 9, 17)
   2937 GEN_TWOVEC_TEST(dup_16b_b2,  "dup v9.16b, v17.b[2]", 9, 17)
   2938 GEN_TWOVEC_TEST(dup_16b_b12, "dup v9.16b, v17.b[12]", 9, 17)
   2939 GEN_TWOVEC_TEST(dup_8b_b3,  "dup v9.8b, v17.b[3]", 9, 17)
   2940 GEN_TWOVEC_TEST(dup_8b_b13, "dup v9.8b, v17.b[13]", 9, 17)
   2941 
   2942 GEN_TWOVEC_TEST(dup_2d_x,  "mov x10, v17.d[0];  dup v9.2d,  x10", 9, 17)
   2943 GEN_TWOVEC_TEST(dup_4s_w,  "mov x10, v17.d[0];  dup v9.4s,  w10", 9, 17)
   2944 GEN_TWOVEC_TEST(dup_2s_w,  "mov x10, v17.d[0];  dup v9.2s,  w10", 9, 17)
   2945 GEN_TWOVEC_TEST(dup_8h_w,  "mov x10, v17.d[0];  dup v9.8h,  w10",  9, 17)
   2946 GEN_TWOVEC_TEST(dup_4h_w,  "mov x10, v17.d[0];  dup v9.4h,  w10",  9, 17)
   2947 GEN_TWOVEC_TEST(dup_16b_w, "mov x10, v17.d[0];  dup v9.16b, w10", 9, 17)
   2948 GEN_TWOVEC_TEST(dup_8b_w,  "mov x10, v17.d[0];  dup v9.8b,  w10",  9, 17)
   2949 
   2950 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x0,
   2951                   "ext  v2.16b, v11.16b, v29.16b, #0", 2, 11, 29)
   2952 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x1,
   2953                   "ext  v2.16b, v11.16b, v29.16b, #1", 2, 11, 29)
   2954 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x2,
   2955                   "ext  v2.16b, v11.16b, v29.16b, #2", 2, 11, 29)
   2956 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x3,
   2957                   "ext  v2.16b, v11.16b, v29.16b, #3", 2, 11, 29)
   2958 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x4,
   2959                   "ext  v2.16b, v11.16b, v29.16b, #4", 2, 11, 29)
   2960 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x5,
   2961                   "ext  v2.16b, v11.16b, v29.16b, #5", 2, 11, 29)
   2962 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x6,
   2963                   "ext  v2.16b, v11.16b, v29.16b, #6", 2, 11, 29)
   2964 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x7,
   2965                   "ext  v2.16b, v11.16b, v29.16b, #7", 2, 11, 29)
   2966 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x8,
   2967                   "ext  v2.16b, v11.16b, v29.16b, #8", 2, 11, 29)
   2968 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x9,
   2969                   "ext  v2.16b, v11.16b, v29.16b, #9", 2, 11, 29)
   2970 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xA,
   2971                   "ext  v2.16b, v11.16b, v29.16b, #10", 2, 11, 29)
   2972 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xB,
   2973                   "ext  v2.16b, v11.16b, v29.16b, #11", 2, 11, 29)
   2974 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xC,
   2975                   "ext  v2.16b, v11.16b, v29.16b, #12", 2, 11, 29)
   2976 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xD,
   2977                   "ext  v2.16b, v11.16b, v29.16b, #13", 2, 11, 29)
   2978 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xE,
   2979                   "ext  v2.16b, v11.16b, v29.16b, #14", 2, 11, 29)
   2980 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xF,
   2981                   "ext  v2.16b, v11.16b, v29.16b, #15", 2, 11, 29)
   2982 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x0,
   2983                   "ext  v2.8b, v11.8b, v29.8b, #0", 2, 11, 29)
   2984 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x1,
   2985                   "ext  v2.8b, v11.8b, v29.8b, #1", 2, 11, 29)
   2986 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x2,
   2987                   "ext  v2.8b, v11.8b, v29.8b, #2", 2, 11, 29)
   2988 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x3,
   2989                   "ext  v2.8b, v11.8b, v29.8b, #3", 2, 11, 29)
   2990 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x4,
   2991                   "ext  v2.8b, v11.8b, v29.8b, #4", 2, 11, 29)
   2992 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x5,
   2993                   "ext  v2.8b, v11.8b, v29.8b, #5", 2, 11, 29)
   2994 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x6,
   2995                   "ext  v2.8b, v11.8b, v29.8b, #6", 2, 11, 29)
   2996 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x7,
   2997                   "ext  v2.8b, v11.8b, v29.8b, #7", 2, 11, 29)
   2998 
   2999 GEN_TWOVEC_TEST(ins_d0_d0, "ins v3.d[0], v24.d[0]", 3, 24)
   3000 GEN_TWOVEC_TEST(ins_d0_d1, "ins v3.d[0], v24.d[1]", 3, 24)
   3001 GEN_TWOVEC_TEST(ins_d1_d0, "ins v3.d[1], v24.d[0]", 3, 24)
   3002 GEN_TWOVEC_TEST(ins_d1_d1, "ins v3.d[1], v24.d[1]", 3, 24)
   3003 GEN_TWOVEC_TEST(ins_s0_s2, "ins v3.s[0], v24.s[2]", 3, 24)
   3004 GEN_TWOVEC_TEST(ins_s3_s0, "ins v3.s[3], v24.s[0]", 3, 24)
   3005 GEN_TWOVEC_TEST(ins_s2_s1, "ins v3.s[2], v24.s[1]", 3, 24)
   3006 GEN_TWOVEC_TEST(ins_s1_s3, "ins v3.s[1], v24.s[3]", 3, 24)
   3007 GEN_TWOVEC_TEST(ins_h0_h6, "ins v3.h[0], v24.h[6]", 3, 24)
   3008 GEN_TWOVEC_TEST(ins_h7_h0, "ins v3.h[7], v24.h[0]", 3, 24)
   3009 GEN_TWOVEC_TEST(ins_h6_h1, "ins v3.h[6], v24.h[1]", 3, 24)
   3010 GEN_TWOVEC_TEST(ins_h1_h7, "ins v3.h[1], v24.h[7]", 3, 24)
   3011 GEN_TWOVEC_TEST(ins_b0_b14, "ins v3.b[0],  v24.b[14]", 3, 24)
   3012 GEN_TWOVEC_TEST(ins_b15_b8, "ins v3.b[15], v24.b[8]",  3, 24)
   3013 GEN_TWOVEC_TEST(ins_b13_b9, "ins v3.b[13], v24.b[9]",  3, 24)
   3014 GEN_TWOVEC_TEST(ins_b5_b12, "ins v3.b[5],  v24.b[12]", 3, 24)
   3015 
   3016 // test_INS_general is a handwritten function
   3017 
   3018 GEN_THREEVEC_TEST(mla_4s_4s_s0, "mla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3019 GEN_THREEVEC_TEST(mla_4s_4s_s3, "mla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3020 GEN_THREEVEC_TEST(mla_2s_2s_s0, "mla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3021 GEN_THREEVEC_TEST(mla_2s_2s_s3, "mla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3022 // For the 'h' version of these, Rm can only be <= 15 (!)
   3023 GEN_THREEVEC_TEST(mla_8h_8h_h1, "mla v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3024 GEN_THREEVEC_TEST(mla_8h_8h_h5, "mla v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3025 GEN_THREEVEC_TEST(mla_4h_4h_h2, "mla v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3026 GEN_THREEVEC_TEST(mla_4h_4h_h7, "mla v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3027 GEN_THREEVEC_TEST(mls_4s_4s_s0, "mls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3028 GEN_THREEVEC_TEST(mls_4s_4s_s3, "mls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3029 GEN_THREEVEC_TEST(mls_2s_2s_s0, "mls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3030 GEN_THREEVEC_TEST(mls_2s_2s_s3, "mls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3031 // For the 'h' version of these, Rm can only be <= 15 (!)
   3032 GEN_THREEVEC_TEST(mls_8h_8h_h1, "mls v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3033 GEN_THREEVEC_TEST(mls_8h_8h_h5, "mls v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3034 GEN_THREEVEC_TEST(mls_4h_4h_h2, "mls v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3035 GEN_THREEVEC_TEST(mls_4h_4h_h7, "mls v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3036 GEN_THREEVEC_TEST(mul_4s_4s_s0, "mul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3037 GEN_THREEVEC_TEST(mul_4s_4s_s3, "mul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3038 GEN_THREEVEC_TEST(mul_2s_2s_s0, "mul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3039 GEN_THREEVEC_TEST(mul_2s_2s_s3, "mul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3040 // For the 'h' version of these, Rm can only be <= 15 (!)
   3041 GEN_THREEVEC_TEST(mul_8h_8h_h1, "mul v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3042 GEN_THREEVEC_TEST(mul_8h_8h_h5, "mul v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3043 GEN_THREEVEC_TEST(mul_4h_4h_h2, "mul v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3044 GEN_THREEVEC_TEST(mul_4h_4h_h7, "mul v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3045 
   3046 GEN_BINARY_TEST(mla, 4s, 4s, 4s)
   3047 GEN_BINARY_TEST(mla, 2s, 2s, 2s)
   3048 GEN_BINARY_TEST(mla, 8h, 8h, 8h)
   3049 GEN_BINARY_TEST(mla, 4h, 4h, 4h)
   3050 GEN_BINARY_TEST(mla, 16b, 16b, 16b)
   3051 GEN_BINARY_TEST(mla, 8b, 8b, 8b)
   3052 GEN_BINARY_TEST(mls, 4s, 4s, 4s)
   3053 GEN_BINARY_TEST(mls, 2s, 2s, 2s)
   3054 GEN_BINARY_TEST(mls, 8h, 8h, 8h)
   3055 GEN_BINARY_TEST(mls, 4h, 4h, 4h)
   3056 GEN_BINARY_TEST(mls, 16b, 16b, 16b)
   3057 GEN_BINARY_TEST(mls, 8b, 8b, 8b)
   3058 GEN_BINARY_TEST(mul, 4s, 4s, 4s)
   3059 GEN_BINARY_TEST(mul, 2s, 2s, 2s)
   3060 GEN_BINARY_TEST(mul, 8h, 8h, 8h)
   3061 GEN_BINARY_TEST(mul, 4h, 4h, 4h)
   3062 GEN_BINARY_TEST(mul, 16b, 16b, 16b)
   3063 GEN_BINARY_TEST(mul, 8b, 8b, 8b)
   3064 
   3065 /* overkill -- don't need two vecs, only one */
   3066 GEN_TWOVEC_TEST(movi_16b_0x9C_lsl0, "movi v22.16b, #0x9C, LSL #0", 22, 23)
   3067 GEN_TWOVEC_TEST(movi_8b_0x8B_lsl0,  "movi v22.8b,  #0x8B, LSL #0", 22, 23)
   3068 
   3069 GEN_TWOVEC_TEST(movi_8h_0x5A_lsl0,  "movi v22.8h,  #0x5A, LSL #0", 22, 23)
   3070 GEN_TWOVEC_TEST(movi_8h_0xA5_lsl8,  "movi v22.8h,  #0xA5, LSL #8", 22, 23)
   3071 GEN_TWOVEC_TEST(movi_4h_0x5A_lsl0,  "movi v22.4h,  #0x5A, LSL #0", 22, 23)
   3072 GEN_TWOVEC_TEST(movi_4h_0xA5_lsl8,  "movi v22.4h,  #0xA5, LSL #8", 22, 23)
   3073 GEN_TWOVEC_TEST(mvni_8h_0x5A_lsl0,  "mvni v22.8h,  #0x5A, LSL #0", 22, 23)
   3074 GEN_TWOVEC_TEST(mvni_8h_0xA5_lsl8,  "mvni v22.8h,  #0xA5, LSL #8", 22, 23)
   3075 GEN_TWOVEC_TEST(mvni_4h_0x5A_lsl0,  "mvni v22.4h,  #0x5A, LSL #0", 22, 23)
   3076 GEN_TWOVEC_TEST(mvni_4h_0xA5_lsl8,  "mvni v22.4h,  #0xA5, LSL #8", 22, 23)
   3077 
   3078 GEN_TWOVEC_TEST(movi_4s_0x5A_lsl0,  "movi v22.4s,  #0x5A, LSL #0",  22, 23)
   3079 GEN_TWOVEC_TEST(movi_4s_0x6B_lsl8,  "movi v22.4s,  #0x6B, LSL #8",  22, 23)
   3080 GEN_TWOVEC_TEST(movi_4s_0x49_lsl16, "movi v22.4s,  #0x49, LSL #16", 22, 23)
   3081 GEN_TWOVEC_TEST(movi_4s_0x3D_lsl24, "movi v22.4s,  #0x3D, LSL #24", 22, 23)
   3082 GEN_TWOVEC_TEST(movi_2s_0x5A_lsl0,  "movi v22.2s,  #0x5A, LSL #0",  22, 23)
   3083 GEN_TWOVEC_TEST(movi_2s_0x6B_lsl8,  "movi v22.2s,  #0x6B, LSL #8",  22, 23)
   3084 GEN_TWOVEC_TEST(movi_2s_0x49_lsl16, "movi v22.2s,  #0x49, LSL #16", 22, 23)
   3085 GEN_TWOVEC_TEST(movi_2s_0x3D_lsl24, "movi v22.2s,  #0x3D, LSL #24", 22, 23)
   3086 GEN_TWOVEC_TEST(mvni_4s_0x5A_lsl0,  "mvni v22.4s,  #0x5A, LSL #0",  22, 23)
   3087 GEN_TWOVEC_TEST(mvni_4s_0x6B_lsl8,  "mvni v22.4s,  #0x6B, LSL #8",  22, 23)
   3088 GEN_TWOVEC_TEST(mvni_4s_0x49_lsl16, "mvni v22.4s,  #0x49, LSL #16", 22, 23)
   3089 GEN_TWOVEC_TEST(mvni_4s_0x3D_lsl24, "mvni v22.4s,  #0x3D, LSL #24", 22, 23)
   3090 GEN_TWOVEC_TEST(mvni_2s_0x5A_lsl0,  "mvni v22.2s,  #0x5A, LSL #0",  22, 23)
   3091 GEN_TWOVEC_TEST(mvni_2s_0x6B_lsl8,  "mvni v22.2s,  #0x6B, LSL #8",  22, 23)
   3092 GEN_TWOVEC_TEST(mvni_2s_0x49_lsl16, "mvni v22.2s,  #0x49, LSL #16", 22, 23)
   3093 GEN_TWOVEC_TEST(mvni_2s_0x3D_lsl24, "mvni v22.2s,  #0x3D, LSL #24", 22, 23)
   3094 
   3095 /* overkill -- don't need two vecs, only one */
   3096 GEN_TWOVEC_TEST(movi_4s_0x6B_msl8,  "movi v22.4s,  #0x6B, MSL #8", 22, 23)
   3097 GEN_TWOVEC_TEST(movi_4s_0x94_msl16, "movi v22.4s,  #0x94, MSL #16", 22, 23)
   3098 GEN_TWOVEC_TEST(movi_2s_0x7A_msl8,  "movi v22.2s,  #0x7A, MSL #8", 22, 23)
   3099 GEN_TWOVEC_TEST(movi_2s_0xA5_msl16, "movi v22.2s,  #0xA5, MSL #16", 22, 23)
   3100 GEN_TWOVEC_TEST(mvni_4s_0x6B_msl8,  "mvni v22.4s,  #0x6B, MSL #8", 22, 23)
   3101 GEN_TWOVEC_TEST(mvni_4s_0x94_msl16, "mvni v22.4s,  #0x94, MSL #16", 22, 23)
   3102 GEN_TWOVEC_TEST(mvni_2s_0x7A_msl8,  "mvni v22.2s,  #0x7A, MSL #8", 22, 23)
   3103 GEN_TWOVEC_TEST(mvni_2s_0xA5_msl16, "mvni v22.2s,  #0xA5, MSL #16", 22, 23)
   3104 
   3105 GEN_TWOVEC_TEST(movi_d_0xA5,  "movi d22,    #0xFF00FF0000FF00FF", 22, 23)
   3106 GEN_TWOVEC_TEST(movi_2d_0xB4, "movi v22.2d, #0xFF00FFFF00FF0000", 22, 23)
   3107 
   3108 GEN_UNARY_TEST(not, 16b, 16b)
   3109 GEN_UNARY_TEST(not, 8b,  8b)
   3110 
   3111 GEN_BINARY_TEST(pmul, 16b, 16b, 16b)
   3112 GEN_BINARY_TEST(pmul, 8b, 8b, 8b)
   3113 
   3114 GEN_BINARY_TEST(pmull,  8h, 8b,  8b)
   3115 GEN_BINARY_TEST(pmull2, 8h, 16b, 16b)
   3116 //GEN_BINARY_TEST(pmull,  1q, 1d,  1d)
   3117 //GEN_BINARY_TEST(pmull,  1q, 2d,  2d)
   3118 
   3119 GEN_UNARY_TEST(rbit, 16b, 16b)
   3120 GEN_UNARY_TEST(rbit, 8b, 8b)
   3121 GEN_UNARY_TEST(rev16, 16b, 16b)
   3122 GEN_UNARY_TEST(rev16, 8b, 8b)
   3123 GEN_UNARY_TEST(rev32, 16b, 16b)
   3124 GEN_UNARY_TEST(rev32, 8b, 8b)
   3125 GEN_UNARY_TEST(rev32, 8h, 8h)
   3126 GEN_UNARY_TEST(rev32, 4h, 4h)
   3127 GEN_UNARY_TEST(rev64, 16b, 16b)
   3128 GEN_UNARY_TEST(rev64, 8b, 8b)
   3129 GEN_UNARY_TEST(rev64, 8h, 8h)
   3130 GEN_UNARY_TEST(rev64, 4h, 4h)
   3131 GEN_UNARY_TEST(rev64, 4s, 4s)
   3132 GEN_UNARY_TEST(rev64, 2s, 2s)
   3133 
   3134 GEN_BINARY_TEST(saba, 4s, 4s, 4s)
   3135 GEN_BINARY_TEST(saba, 2s, 2s, 2s)
   3136 GEN_BINARY_TEST(saba, 8h, 8h, 8h)
   3137 GEN_BINARY_TEST(saba, 4h, 4h, 4h)
   3138 GEN_BINARY_TEST(saba, 16b, 16b, 16b)
   3139 GEN_BINARY_TEST(saba, 8b, 8b, 8b)
   3140 GEN_BINARY_TEST(uaba, 4s, 4s, 4s)
   3141 GEN_BINARY_TEST(uaba, 2s, 2s, 2s)
   3142 GEN_BINARY_TEST(uaba, 8h, 8h, 8h)
   3143 GEN_BINARY_TEST(uaba, 4h, 4h, 4h)
   3144 GEN_BINARY_TEST(uaba, 16b, 16b, 16b)
   3145 GEN_BINARY_TEST(uaba, 8b, 8b, 8b)
   3146 
   3147 GEN_THREEVEC_TEST(sabal_2d_2s_2s,  "sabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3148 GEN_THREEVEC_TEST(sabal2_2d_4s_4s, "sabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3149 GEN_THREEVEC_TEST(sabal_4s_4h_4h,  "sabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3150 GEN_THREEVEC_TEST(sabal2_4s_8h_8h, "sabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3151 GEN_THREEVEC_TEST(sabal_8h_8b_8b,  "sabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3152 GEN_THREEVEC_TEST(sabal2_8h_16b_16b,
   3153                                    "sabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3154 GEN_THREEVEC_TEST(uabal_2d_2s_2s,  "uabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3155 GEN_THREEVEC_TEST(uabal2_2d_4s_4s, "uabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3156 GEN_THREEVEC_TEST(uabal_4s_4h_4h,  "uabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3157 GEN_THREEVEC_TEST(uabal2_4s_8h_8h, "uabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3158 GEN_THREEVEC_TEST(uabal_8h_8b_8b,  "uabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3159 GEN_THREEVEC_TEST(uabal2_8h_16b_16b,
   3160                                    "uabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3161 
   3162 GEN_THREEVEC_TEST(sabd_4s_4s_4s,    "sabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3163 GEN_THREEVEC_TEST(sabd_2s_2s_2s,    "sabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3164 GEN_THREEVEC_TEST(sabd_8h_8h_8h,    "sabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3165 GEN_THREEVEC_TEST(sabd_4h_4h_4h,    "sabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3166 GEN_THREEVEC_TEST(sabd_16b_16b_16b, "sabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3167 GEN_THREEVEC_TEST(sabd_8b_8b_8b,    "sabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3168 GEN_THREEVEC_TEST(uabd_4s_4s_4s,    "uabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3169 GEN_THREEVEC_TEST(uabd_2s_2s_2s,    "uabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3170 GEN_THREEVEC_TEST(uabd_8h_8h_8h,    "uabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3171 GEN_THREEVEC_TEST(uabd_4h_4h_4h,    "uabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3172 GEN_THREEVEC_TEST(uabd_16b_16b_16b, "uabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3173 GEN_THREEVEC_TEST(uabd_8b_8b_8b,    "uabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3174 
   3175 GEN_THREEVEC_TEST(sabdl_2d_2s_2s,  "sabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3176 GEN_THREEVEC_TEST(sabdl2_2d_4s_4s, "sabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3177 GEN_THREEVEC_TEST(sabdl_4s_4h_4h,  "sabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3178 GEN_THREEVEC_TEST(sabdl2_4s_8h_8h, "sabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3179 GEN_THREEVEC_TEST(sabdl_8h_8b_8b,  "sabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3180 GEN_THREEVEC_TEST(sabdl2_8h_16b_16b,
   3181                                    "sabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3182 GEN_THREEVEC_TEST(uabdl_2d_2s_2s,  "uabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3183 GEN_THREEVEC_TEST(uabdl2_2d_4s_4s, "uabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3184 GEN_THREEVEC_TEST(uabdl_4s_4h_4h,  "uabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3185 GEN_THREEVEC_TEST(uabdl2_4s_8h_8h, "uabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3186 GEN_THREEVEC_TEST(uabdl_8h_8b_8b,  "uabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3187 GEN_THREEVEC_TEST(uabdl2_8h_16b_16b,
   3188                                    "uabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3189 
   3190 GEN_TWOVEC_TEST(sadalp_4h_8b,  "sadalp v3.4h, v19.8b",  3, 19)
   3191 GEN_TWOVEC_TEST(sadalp_8h_16b, "sadalp v3.8h, v19.16b", 3, 19)
   3192 GEN_TWOVEC_TEST(sadalp_2s_4h,  "sadalp v3.2s, v19.4h",  3, 19)
   3193 GEN_TWOVEC_TEST(sadalp_4s_8h,  "sadalp v3.4s, v19.8h",  3, 19)
   3194 GEN_TWOVEC_TEST(sadalp_1d_2s,  "sadalp v3.1d, v19.2s",  3, 19)
   3195 GEN_TWOVEC_TEST(sadalp_2d_4s,  "sadalp v3.2d, v19.4s",  3, 19)
   3196 GEN_TWOVEC_TEST(uadalp_4h_8b,  "uadalp v3.4h, v19.8b",  3, 19)
   3197 GEN_TWOVEC_TEST(uadalp_8h_16b, "uadalp v3.8h, v19.16b", 3, 19)
   3198 GEN_TWOVEC_TEST(uadalp_2s_4h,  "uadalp v3.2s, v19.4h",  3, 19)
   3199 GEN_TWOVEC_TEST(uadalp_4s_8h,  "uadalp v3.4s, v19.8h",  3, 19)
   3200 GEN_TWOVEC_TEST(uadalp_1d_2s,  "uadalp v3.1d, v19.2s",  3, 19)
   3201 GEN_TWOVEC_TEST(uadalp_2d_4s,  "uadalp v3.2d, v19.4s",  3, 19)
   3202 
   3203 GEN_THREEVEC_TEST(saddl_2d_2s_2s,  "saddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3204 GEN_THREEVEC_TEST(saddl2_2d_4s_4s, "saddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3205 GEN_THREEVEC_TEST(saddl_4s_4h_4h,  "saddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3206 GEN_THREEVEC_TEST(saddl2_4s_8h_8h, "saddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3207 GEN_THREEVEC_TEST(saddl_8h_8b_8b,  "saddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3208 GEN_THREEVEC_TEST(saddl2_8h_16b_16b,
   3209                                    "saddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3210 GEN_THREEVEC_TEST(uaddl_2d_2s_2s,  "uaddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3211 GEN_THREEVEC_TEST(uaddl2_2d_4s_4s, "uaddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3212 GEN_THREEVEC_TEST(uaddl_4s_4h_4h,  "uaddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3213 GEN_THREEVEC_TEST(uaddl2_4s_8h_8h, "uaddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3214 GEN_THREEVEC_TEST(uaddl_8h_8b_8b,  "uaddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3215 GEN_THREEVEC_TEST(uaddl2_8h_16b_16b,
   3216                                    "uaddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3217 GEN_THREEVEC_TEST(ssubl_2d_2s_2s,  "ssubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3218 GEN_THREEVEC_TEST(ssubl2_2d_4s_4s, "ssubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3219 GEN_THREEVEC_TEST(ssubl_4s_4h_4h,  "ssubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3220 GEN_THREEVEC_TEST(ssubl2_4s_8h_8h, "ssubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3221 GEN_THREEVEC_TEST(ssubl_8h_8b_8b,  "ssubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3222 GEN_THREEVEC_TEST(ssubl2_8h_16b_16b,
   3223                                    "ssubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3224 GEN_THREEVEC_TEST(usubl_2d_2s_2s,  "usubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3225 GEN_THREEVEC_TEST(usubl2_2d_4s_4s, "usubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3226 GEN_THREEVEC_TEST(usubl_4s_4h_4h,  "usubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3227 GEN_THREEVEC_TEST(usubl2_4s_8h_8h, "usubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3228 GEN_THREEVEC_TEST(usubl_8h_8b_8b,  "usubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3229 GEN_THREEVEC_TEST(usubl2_8h_16b_16b,
   3230                                    "usubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3231 
   3232 GEN_TWOVEC_TEST(saddlp_4h_8b,  "saddlp v3.4h, v19.8b",  3, 19)
   3233 GEN_TWOVEC_TEST(saddlp_8h_16b, "saddlp v3.8h, v19.16b", 3, 19)
   3234 GEN_TWOVEC_TEST(saddlp_2s_4h,  "saddlp v3.2s, v19.4h",  3, 19)
   3235 GEN_TWOVEC_TEST(saddlp_4s_8h,  "saddlp v3.4s, v19.8h",  3, 19)
   3236 GEN_TWOVEC_TEST(saddlp_1d_2s,  "saddlp v3.1d, v19.2s",  3, 19)
   3237 GEN_TWOVEC_TEST(saddlp_2d_4s,  "saddlp v3.2d, v19.4s",  3, 19)
   3238 GEN_TWOVEC_TEST(uaddlp_4h_8b,  "uaddlp v3.4h, v19.8b",  3, 19)
   3239 GEN_TWOVEC_TEST(uaddlp_8h_16b, "uaddlp v3.8h, v19.16b", 3, 19)
   3240 GEN_TWOVEC_TEST(uaddlp_2s_4h,  "uaddlp v3.2s, v19.4h",  3, 19)
   3241 GEN_TWOVEC_TEST(uaddlp_4s_8h,  "uaddlp v3.4s, v19.8h",  3, 19)
   3242 GEN_TWOVEC_TEST(uaddlp_1d_2s,  "uaddlp v3.1d, v19.2s",  3, 19)
   3243 GEN_TWOVEC_TEST(uaddlp_2d_4s,  "uaddlp v3.2d, v19.4s",  3, 19)
   3244 
   3245 GEN_TWOVEC_TEST(saddlv_h_16b, "saddlv h3, v19.16b",  3, 19)
   3246 GEN_TWOVEC_TEST(saddlv_h_8b,  "saddlv h3, v19.8b",   3, 19)
   3247 GEN_TWOVEC_TEST(saddlv_s_8h,  "saddlv s3, v19.8h",   3, 19)
   3248 GEN_TWOVEC_TEST(saddlv_s_4h,  "saddlv s3, v19.4h",   3, 19)
   3249 GEN_TWOVEC_TEST(saddlv_d_4s,  "saddlv d3, v19.4s",   3, 19)
   3250 GEN_TWOVEC_TEST(uaddlv_h_16b, "uaddlv h3, v19.16b",  3, 19)
   3251 GEN_TWOVEC_TEST(uaddlv_h_8b,  "uaddlv h3, v19.8b",   3, 19)
   3252 GEN_TWOVEC_TEST(uaddlv_s_8h,  "uaddlv s3, v19.8h",   3, 19)
   3253 GEN_TWOVEC_TEST(uaddlv_s_4h,  "uaddlv s3, v19.4h",   3, 19)
   3254 GEN_TWOVEC_TEST(uaddlv_d_4s,  "uaddlv d3, v19.4s",   3, 19)
   3255 
   3256 GEN_THREEVEC_TEST(saddw2_8h_8h_16b, "saddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3257 GEN_THREEVEC_TEST(saddw_8h_8h_8b,   "saddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3258 GEN_THREEVEC_TEST(saddw2_4s_4s_8h,  "saddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3259 GEN_THREEVEC_TEST(saddw_4s_4s_4h,   "saddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3260 GEN_THREEVEC_TEST(saddw2_2d_2d_4s,  "saddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3261 GEN_THREEVEC_TEST(saddw_2d_2d_2s,   "saddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3262 GEN_THREEVEC_TEST(uaddw2_8h_8h_16b, "uaddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3263 GEN_THREEVEC_TEST(uaddw_8h_8h_8b,   "uaddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3264 GEN_THREEVEC_TEST(uaddw2_4s_4s_8h,  "uaddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3265 GEN_THREEVEC_TEST(uaddw_4s_4s_4h,   "uaddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3266 GEN_THREEVEC_TEST(uaddw2_2d_2d_4s,  "uaddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3267 GEN_THREEVEC_TEST(uaddw_2d_2d_2s,   "uaddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3268 GEN_THREEVEC_TEST(ssubw2_8h_8h_16b, "ssubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3269 GEN_THREEVEC_TEST(ssubw_8h_8h_8b,   "ssubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3270 GEN_THREEVEC_TEST(ssubw2_4s_4s_8h,  "ssubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3271 GEN_THREEVEC_TEST(ssubw_4s_4s_4h,   "ssubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3272 GEN_THREEVEC_TEST(ssubw2_2d_2d_4s,  "ssubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3273 GEN_THREEVEC_TEST(ssubw_2d_2d_2s,   "ssubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3274 GEN_THREEVEC_TEST(usubw2_8h_8h_16b, "usubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3275 GEN_THREEVEC_TEST(usubw_8h_8h_8b,   "usubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3276 GEN_THREEVEC_TEST(usubw2_4s_4s_8h,  "usubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3277 GEN_THREEVEC_TEST(usubw_4s_4s_4h,   "usubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3278 GEN_THREEVEC_TEST(usubw2_2d_2d_4s,  "usubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3279 GEN_THREEVEC_TEST(usubw_2d_2d_2s,   "usubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3280 
   3281 GEN_THREEVEC_TEST(shadd_4s_4s_4s,   "shadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3282 GEN_THREEVEC_TEST(shadd_2s_2s_2s,   "shadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3283 GEN_THREEVEC_TEST(shadd_8h_8h_8h,   "shadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3284 GEN_THREEVEC_TEST(shadd_4h_4h_4h,   "shadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3285 GEN_THREEVEC_TEST(shadd_16b_16b_16b,"shadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3286 GEN_THREEVEC_TEST(shadd_8b_8b_8b,   "shadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3287 GEN_THREEVEC_TEST(uhadd_4s_4s_4s,   "uhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3288 GEN_THREEVEC_TEST(uhadd_2s_2s_2s,   "uhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3289 GEN_THREEVEC_TEST(uhadd_8h_8h_8h,   "uhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3290 GEN_THREEVEC_TEST(uhadd_4h_4h_4h,   "uhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3291 GEN_THREEVEC_TEST(uhadd_16b_16b_16b,"uhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3292 GEN_THREEVEC_TEST(uhadd_8b_8b_8b,   "uhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3293 GEN_THREEVEC_TEST(shsub_4s_4s_4s,   "shsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3294 GEN_THREEVEC_TEST(shsub_2s_2s_2s,   "shsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3295 GEN_THREEVEC_TEST(shsub_8h_8h_8h,   "shsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3296 GEN_THREEVEC_TEST(shsub_4h_4h_4h,   "shsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3297 GEN_THREEVEC_TEST(shsub_16b_16b_16b,"shsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3298 GEN_THREEVEC_TEST(shsub_8b_8b_8b,   "shsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3299 GEN_THREEVEC_TEST(uhsub_4s_4s_4s,   "uhsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3300 GEN_THREEVEC_TEST(uhsub_2s_2s_2s,   "uhsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3301 GEN_THREEVEC_TEST(uhsub_8h_8h_8h,   "uhsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3302 GEN_THREEVEC_TEST(uhsub_4h_4h_4h,   "uhsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3303 GEN_THREEVEC_TEST(uhsub_16b_16b_16b,"uhsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3304 GEN_THREEVEC_TEST(uhsub_8b_8b_8b,   "uhsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3305 
   3306 GEN_TWOVEC_TEST(shll_8h_8b_8,   "shll  v3.8h, v24.8b,  #8", 3, 24)
   3307 GEN_TWOVEC_TEST(shll2_8h_16b_8, "shll2 v3.8h, v24.16b, #8", 3, 24)
   3308 GEN_TWOVEC_TEST(shll_4s_4h_16,  "shll  v3.4s, v24.4h, #16", 3, 24)
   3309 GEN_TWOVEC_TEST(shll2_4s_8h_16, "shll2 v3.4s, v24.8h, #16", 3, 24)
   3310 GEN_TWOVEC_TEST(shll_2d_2s_32,  "shll  v3.2d, v24.2s, #32", 3, 24)
   3311 GEN_TWOVEC_TEST(shll2_2d_4s_32, "shll2 v3.2d, v24.4s, #32", 3, 24)
   3312 
   3313 GEN_TWOVEC_TEST(shrn_2s_2d_1,   "shrn  v4.2s,  v29.2d, #1",  4, 29)
   3314 GEN_TWOVEC_TEST(shrn_2s_2d_32,  "shrn  v4.2s,  v29.2d, #32", 4, 29)
   3315 GEN_TWOVEC_TEST(shrn2_4s_2d_1,  "shrn2 v4.4s,  v29.2d, #1",  4, 29)
   3316 GEN_TWOVEC_TEST(shrn2_4s_2d_32, "shrn2 v4.4s,  v29.2d, #32", 4, 29)
   3317 GEN_TWOVEC_TEST(shrn_4h_4s_1,   "shrn  v4.4h,  v29.4s, #1",  4, 29)
   3318 GEN_TWOVEC_TEST(shrn_4h_4s_16,  "shrn  v4.4h,  v29.4s, #16", 4, 29)
   3319 GEN_TWOVEC_TEST(shrn2_8h_4s_1,  "shrn2 v4.8h,  v29.4s, #1",  4, 29)
   3320 GEN_TWOVEC_TEST(shrn2_8h_4s_16, "shrn2 v4.8h,  v29.4s, #16", 4, 29)
   3321 GEN_TWOVEC_TEST(shrn_8b_8h_1,   "shrn  v4.8b,  v29.8h, #1",  4, 29)
   3322 GEN_TWOVEC_TEST(shrn_8b_8h_8,   "shrn  v4.8b,  v29.8h, #8",  4, 29)
   3323 GEN_TWOVEC_TEST(shrn2_16b_8h_1, "shrn2 v4.16b, v29.8h, #1",  4, 29)
   3324 GEN_TWOVEC_TEST(shrn2_16b_8h_8, "shrn2 v4.16b, v29.8h, #8",  4, 29)
   3325 GEN_TWOVEC_TEST(rshrn_2s_2d_1,   "rshrn  v4.2s,  v29.2d, #1",  4, 29)
   3326 GEN_TWOVEC_TEST(rshrn_2s_2d_32,  "rshrn  v4.2s,  v29.2d, #32", 4, 29)
   3327 GEN_TWOVEC_TEST(rshrn2_4s_2d_1,  "rshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3328 GEN_TWOVEC_TEST(rshrn2_4s_2d_32, "rshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3329 GEN_TWOVEC_TEST(rshrn_4h_4s_1,   "rshrn  v4.4h,  v29.4s, #1",  4, 29)
   3330 GEN_TWOVEC_TEST(rshrn_4h_4s_16,  "rshrn  v4.4h,  v29.4s, #16", 4, 29)
   3331 GEN_TWOVEC_TEST(rshrn2_8h_4s_1,  "rshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3332 GEN_TWOVEC_TEST(rshrn2_8h_4s_16, "rshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3333 GEN_TWOVEC_TEST(rshrn_8b_8h_1,   "rshrn  v4.8b,  v29.8h, #1",  4, 29)
   3334 GEN_TWOVEC_TEST(rshrn_8b_8h_8,   "rshrn  v4.8b,  v29.8h, #8",  4, 29)
   3335 GEN_TWOVEC_TEST(rshrn2_16b_8h_1, "rshrn2 v4.16b, v29.8h, #1",  4, 29)
   3336 GEN_TWOVEC_TEST(rshrn2_16b_8h_8, "rshrn2 v4.16b, v29.8h, #8",  4, 29)
   3337 
   3338 GEN_TWOVEC_TEST(sli_d_d_0,  "sli d5, d28, #0",  5, 28)
   3339 GEN_TWOVEC_TEST(sli_d_d_32, "sli d5, d28, #32", 5, 28)
   3340 GEN_TWOVEC_TEST(sli_d_d_63, "sli d5, d28, #63", 5, 28)
   3341 GEN_TWOVEC_TEST(sri_d_d_1,  "sri d5, d28, #1",  5, 28)
   3342 GEN_TWOVEC_TEST(sri_d_d_33, "sri d5, d28, #33", 5, 28)
   3343 GEN_TWOVEC_TEST(sri_d_d_64, "sri d5, d28, #64", 5, 28)
   3344 
   3345 GEN_TWOVEC_TEST(sli_2d_2d_0,   "sli v6.2d,  v27.2d, #0",  6, 27)
   3346 GEN_TWOVEC_TEST(sli_2d_2d_32,  "sli v6.2d,  v27.2d, #32", 6, 27)
   3347 GEN_TWOVEC_TEST(sli_2d_2d_63,  "sli v6.2d,  v27.2d, #63", 6, 27)
   3348 GEN_TWOVEC_TEST(sli_4s_4s_0,   "sli v6.4s,  v27.4s, #0",  6, 27)
   3349 GEN_TWOVEC_TEST(sli_4s_4s_16,  "sli v6.4s,  v27.4s, #16", 6, 27)
   3350 GEN_TWOVEC_TEST(sli_4s_4s_31,  "sli v6.4s,  v27.4s, #31", 6, 27)
   3351 GEN_TWOVEC_TEST(sli_2s_2s_0,   "sli v6.2s,  v27.2s, #0",  6, 27)
   3352 GEN_TWOVEC_TEST(sli_2s_2s_16,  "sli v6.2s,  v27.2s, #16", 6, 27)
   3353 GEN_TWOVEC_TEST(sli_2s_2s_31,  "sli v6.2s,  v27.2s, #31", 6, 27)
   3354 GEN_TWOVEC_TEST(sli_8h_8h_0,   "sli v6.8h,  v27.8h, #0",  6, 27)
   3355 GEN_TWOVEC_TEST(sli_8h_8h_8,   "sli v6.8h,  v27.8h, #8",  6, 27)
   3356 GEN_TWOVEC_TEST(sli_8h_8h_15,  "sli v6.8h,  v27.8h, #15", 6, 27)
   3357 GEN_TWOVEC_TEST(sli_4h_4h_0,   "sli v6.4h,  v27.4h, #0",  6, 27)
   3358 GEN_TWOVEC_TEST(sli_4h_4h_8,   "sli v6.4h,  v27.4h, #8",  6, 27)
   3359 GEN_TWOVEC_TEST(sli_4h_4h_15,  "sli v6.4h,  v27.4h, #15", 6, 27)
   3360 GEN_TWOVEC_TEST(sli_16b_16b_0, "sli v6.16b, v27.16b, #0", 6, 27)
   3361 GEN_TWOVEC_TEST(sli_16b_16b_3, "sli v6.16b, v27.16b, #3", 6, 27)
   3362 GEN_TWOVEC_TEST(sli_16b_16b_7, "sli v6.16b, v27.16b, #7", 6, 27)
   3363 GEN_TWOVEC_TEST(sli_8b_8b_0,   "sli v6.8b,  v27.8b, #0",  6, 27)
   3364 GEN_TWOVEC_TEST(sli_8b_8b_3,   "sli v6.8b,  v27.8b, #3",  6, 27)
   3365 GEN_TWOVEC_TEST(sli_8b_8b_7,   "sli v6.8b,  v27.8b, #7",  6, 27)
   3366 GEN_TWOVEC_TEST(sri_2d_2d_1,   "sri v6.2d,  v27.2d,  #1",  6, 27)
   3367 GEN_TWOVEC_TEST(sri_2d_2d_33,  "sri v6.2d,  v27.2d,  #33", 6, 27)
   3368 GEN_TWOVEC_TEST(sri_2d_2d_64,  "sri v6.2d,  v27.2d,  #64", 6, 27)
   3369 GEN_TWOVEC_TEST(sri_4s_4s_1,   "sri v6.4s,  v27.4s,  #1",  6, 27)
   3370 GEN_TWOVEC_TEST(sri_4s_4s_17,  "sri v6.4s,  v27.4s,  #17", 6, 27)
   3371 GEN_TWOVEC_TEST(sri_4s_4s_32,  "sri v6.4s,  v27.4s,  #32", 6, 27)
   3372 GEN_TWOVEC_TEST(sri_2s_2s_1,   "sri v6.2s,  v27.2s,  #1",  6, 27)
   3373 GEN_TWOVEC_TEST(sri_2s_2s_17,  "sri v6.2s,  v27.2s,  #17", 6, 27)
   3374 GEN_TWOVEC_TEST(sri_2s_2s_32,  "sri v6.2s,  v27.2s,  #32", 6, 27)
   3375 GEN_TWOVEC_TEST(sri_8h_8h_1,   "sri v6.8h,  v27.8h,  #1",  6, 27)
   3376 GEN_TWOVEC_TEST(sri_8h_8h_8,   "sri v6.8h,  v27.8h,  #8",  6, 27)
   3377 GEN_TWOVEC_TEST(sri_8h_8h_16,  "sri v6.8h,  v27.8h,  #16", 6, 27)
   3378 GEN_TWOVEC_TEST(sri_4h_4h_1,   "sri v6.4h,  v27.4h,  #1",  6, 27)
   3379 GEN_TWOVEC_TEST(sri_4h_4h_8,   "sri v6.4h,  v27.4h,  #8",  6, 27)
   3380 GEN_TWOVEC_TEST(sri_4h_4h_16,  "sri v6.4h,  v27.4h,  #16", 6, 27)
   3381 GEN_TWOVEC_TEST(sri_16b_16b_1, "sri v6.16b, v27.16b, #1", 6, 27)
   3382 GEN_TWOVEC_TEST(sri_16b_16b_4, "sri v6.16b, v27.16b, #4", 6, 27)
   3383 GEN_TWOVEC_TEST(sri_16b_16b_8, "sri v6.16b, v27.16b, #8", 6, 27)
   3384 GEN_TWOVEC_TEST(sri_8b_8b_1,   "sri v6.8b,  v27.8b,  #1",  6, 27)
   3385 GEN_TWOVEC_TEST(sri_8b_8b_4,   "sri v6.8b,  v27.8b,  #4",  6, 27)
   3386 GEN_TWOVEC_TEST(sri_8b_8b_8,   "sri v6.8b,  v27.8b,  #8",  6, 27)
   3387 
   3388 GEN_BINARY_TEST(smax, 4s, 4s, 4s)
   3389 GEN_BINARY_TEST(smax, 2s, 2s, 2s)
   3390 GEN_BINARY_TEST(smax, 8h, 8h, 8h)
   3391 GEN_BINARY_TEST(smax, 4h, 4h, 4h)
   3392 GEN_BINARY_TEST(smax, 16b, 16b, 16b)
   3393 GEN_BINARY_TEST(smax, 8b, 8b, 8b)
   3394 GEN_BINARY_TEST(umax, 4s, 4s, 4s)
   3395 GEN_BINARY_TEST(umax, 2s, 2s, 2s)
   3396 GEN_BINARY_TEST(umax, 8h, 8h, 8h)
   3397 GEN_BINARY_TEST(umax, 4h, 4h, 4h)
   3398 GEN_BINARY_TEST(umax, 16b, 16b, 16b)
   3399 GEN_BINARY_TEST(umax, 8b, 8b, 8b)
   3400 GEN_BINARY_TEST(smin, 4s, 4s, 4s)
   3401 GEN_BINARY_TEST(smin, 2s, 2s, 2s)
   3402 GEN_BINARY_TEST(smin, 8h, 8h, 8h)
   3403 GEN_BINARY_TEST(smin, 4h, 4h, 4h)
   3404 GEN_BINARY_TEST(smin, 16b, 16b, 16b)
   3405 GEN_BINARY_TEST(smin, 8b, 8b, 8b)
   3406 GEN_BINARY_TEST(umin, 4s, 4s, 4s)
   3407 GEN_BINARY_TEST(umin, 2s, 2s, 2s)
   3408 GEN_BINARY_TEST(umin, 8h, 8h, 8h)
   3409 GEN_BINARY_TEST(umin, 4h, 4h, 4h)
   3410 GEN_BINARY_TEST(umin, 16b, 16b, 16b)
   3411 GEN_BINARY_TEST(umin, 8b, 8b, 8b)
   3412 
   3413 GEN_BINARY_TEST(smaxp, 4s, 4s, 4s)
   3414 GEN_BINARY_TEST(smaxp, 2s, 2s, 2s)
   3415 GEN_BINARY_TEST(smaxp, 8h, 8h, 8h)
   3416 GEN_BINARY_TEST(smaxp, 4h, 4h, 4h)
   3417 GEN_BINARY_TEST(smaxp, 16b, 16b, 16b)
   3418 GEN_BINARY_TEST(smaxp, 8b, 8b, 8b)
   3419 GEN_BINARY_TEST(umaxp, 4s, 4s, 4s)
   3420 GEN_BINARY_TEST(umaxp, 2s, 2s, 2s)
   3421 GEN_BINARY_TEST(umaxp, 8h, 8h, 8h)
   3422 GEN_BINARY_TEST(umaxp, 4h, 4h, 4h)
   3423 GEN_BINARY_TEST(umaxp, 16b, 16b, 16b)
   3424 GEN_BINARY_TEST(umaxp, 8b, 8b, 8b)
   3425 GEN_BINARY_TEST(sminp, 4s, 4s, 4s)
   3426 GEN_BINARY_TEST(sminp, 2s, 2s, 2s)
   3427 GEN_BINARY_TEST(sminp, 8h, 8h, 8h)
   3428 GEN_BINARY_TEST(sminp, 4h, 4h, 4h)
   3429 GEN_BINARY_TEST(sminp, 16b, 16b, 16b)
   3430 GEN_BINARY_TEST(sminp, 8b, 8b, 8b)
   3431 GEN_BINARY_TEST(uminp, 4s, 4s, 4s)
   3432 GEN_BINARY_TEST(uminp, 2s, 2s, 2s)
   3433 GEN_BINARY_TEST(uminp, 8h, 8h, 8h)
   3434 GEN_BINARY_TEST(uminp, 4h, 4h, 4h)
   3435 GEN_BINARY_TEST(uminp, 16b, 16b, 16b)
   3436 GEN_BINARY_TEST(uminp, 8b, 8b, 8b)
   3437 
   3438 // test_SMAXV is a handwritten function
   3439 // test_UMAXV is a handwritten function
   3440 // test_SMINV is a handwritten function
   3441 // test_UMINV is a handwritten function
   3442 
   3443 GEN_THREEVEC_TEST(smlal_2d_2s_s0,  "smlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3444 GEN_THREEVEC_TEST(smlal_2d_2s_s3,  "smlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3445 GEN_THREEVEC_TEST(smlal2_2d_4s_s1, "smlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3446 GEN_THREEVEC_TEST(smlal2_2d_4s_s2, "smlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3447 GEN_THREEVEC_TEST(smlal_4s_4h_h0,  "smlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3448 GEN_THREEVEC_TEST(smlal_4s_4h_h7,  "smlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3449 GEN_THREEVEC_TEST(smlal2_4s_8h_h1, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3450 GEN_THREEVEC_TEST(smlal2_4s_8h_h4, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3451 GEN_THREEVEC_TEST(umlal_2d_2s_s0,  "umlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3452 GEN_THREEVEC_TEST(umlal_2d_2s_s3,  "umlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3453 GEN_THREEVEC_TEST(umlal2_2d_4s_s1, "umlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3454 GEN_THREEVEC_TEST(umlal2_2d_4s_s2, "umlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3455 GEN_THREEVEC_TEST(umlal_4s_4h_h0,  "umlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3456 GEN_THREEVEC_TEST(umlal_4s_4h_h7,  "umlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3457 GEN_THREEVEC_TEST(umlal2_4s_8h_h1, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3458 GEN_THREEVEC_TEST(umlal2_4s_8h_h4, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3459 GEN_THREEVEC_TEST(smlsl_2d_2s_s0,  "smlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3460 GEN_THREEVEC_TEST(smlsl_2d_2s_s3,  "smlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3461 GEN_THREEVEC_TEST(smlsl2_2d_4s_s1, "smlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3462 GEN_THREEVEC_TEST(smlsl2_2d_4s_s2, "smlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3463 GEN_THREEVEC_TEST(smlsl_4s_4h_h0,  "smlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3464 GEN_THREEVEC_TEST(smlsl_4s_4h_h7,  "smlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3465 GEN_THREEVEC_TEST(smlsl2_4s_8h_h1, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3466 GEN_THREEVEC_TEST(smlsl2_4s_8h_h4, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3467 GEN_THREEVEC_TEST(umlsl_2d_2s_s0,  "umlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3468 GEN_THREEVEC_TEST(umlsl_2d_2s_s3,  "umlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3469 GEN_THREEVEC_TEST(umlsl2_2d_4s_s1, "umlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3470 GEN_THREEVEC_TEST(umlsl2_2d_4s_s2, "umlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3471 GEN_THREEVEC_TEST(umlsl_4s_4h_h0,  "umlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3472 GEN_THREEVEC_TEST(umlsl_4s_4h_h7,  "umlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3473 GEN_THREEVEC_TEST(umlsl2_4s_8h_h1, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3474 GEN_THREEVEC_TEST(umlsl2_4s_8h_h4, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3475 GEN_THREEVEC_TEST(smull_2d_2s_s0,  "smull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3476 GEN_THREEVEC_TEST(smull_2d_2s_s3,  "smull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3477 GEN_THREEVEC_TEST(smull2_2d_4s_s1, "smull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3478 GEN_THREEVEC_TEST(smull2_2d_4s_s2, "smull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3479 GEN_THREEVEC_TEST(smull_4s_4h_h0,  "smull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3480 GEN_THREEVEC_TEST(smull_4s_4h_h7,  "smull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3481 GEN_THREEVEC_TEST(smull2_4s_8h_h1, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3482 GEN_THREEVEC_TEST(smull2_4s_8h_h4, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3483 GEN_THREEVEC_TEST(umull_2d_2s_s0,  "umull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3484 GEN_THREEVEC_TEST(umull_2d_2s_s3,  "umull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3485 GEN_THREEVEC_TEST(umull2_2d_4s_s1, "umull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3486 GEN_THREEVEC_TEST(umull2_2d_4s_s2, "umull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3487 GEN_THREEVEC_TEST(umull_4s_4h_h0,  "umull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3488 GEN_THREEVEC_TEST(umull_4s_4h_h7,  "umull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3489 GEN_THREEVEC_TEST(umull2_4s_8h_h1, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3490 GEN_THREEVEC_TEST(umull2_4s_8h_h4, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3491 
   3492 GEN_THREEVEC_TEST(smlal_2d_2s_2s,  "smlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3493 GEN_THREEVEC_TEST(smlal2_2d_4s_4s, "smlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3494 GEN_THREEVEC_TEST(smlal_4s_4h_4h,  "smlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3495 GEN_THREEVEC_TEST(smlal2_4s_8h_8h, "smlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3496 GEN_THREEVEC_TEST(smlal_8h_8b_8b,  "smlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3497 GEN_THREEVEC_TEST(smlal2_8h_16b_16b,
   3498                                    "smlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3499 GEN_THREEVEC_TEST(umlal_2d_2s_2s,  "umlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3500 GEN_THREEVEC_TEST(umlal2_2d_4s_4s, "umlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3501 GEN_THREEVEC_TEST(umlal_4s_4h_4h,  "umlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3502 GEN_THREEVEC_TEST(umlal2_4s_8h_8h, "umlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3503 GEN_THREEVEC_TEST(umlal_8h_8b_8b,  "umlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3504 GEN_THREEVEC_TEST(umlal2_8h_16b_16b,
   3505                                    "umlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3506 GEN_THREEVEC_TEST(smlsl_2d_2s_2s,  "smlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3507 GEN_THREEVEC_TEST(smlsl2_2d_4s_4s, "smlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3508 GEN_THREEVEC_TEST(smlsl_4s_4h_4h,  "smlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3509 GEN_THREEVEC_TEST(smlsl2_4s_8h_8h, "smlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3510 GEN_THREEVEC_TEST(smlsl_8h_8b_8b,  "smlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3511 GEN_THREEVEC_TEST(smlsl2_8h_16b_16b,
   3512                                    "smlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3513 GEN_THREEVEC_TEST(umlsl_2d_2s_2s,  "umlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3514 GEN_THREEVEC_TEST(umlsl2_2d_4s_4s, "umlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3515 GEN_THREEVEC_TEST(umlsl_4s_4h_4h,  "umlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3516 GEN_THREEVEC_TEST(umlsl2_4s_8h_8h, "umlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3517 GEN_THREEVEC_TEST(umlsl_8h_8b_8b,  "umlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3518 GEN_THREEVEC_TEST(umlsl2_8h_16b_16b,
   3519                                    "umlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3520 GEN_THREEVEC_TEST(smull_2d_2s_2s,  "smull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3521 GEN_THREEVEC_TEST(smull2_2d_4s_4s, "smull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3522 GEN_THREEVEC_TEST(smull_4s_4h_4h,  "smull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3523 GEN_THREEVEC_TEST(smull2_4s_8h_8h, "smull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3524 GEN_THREEVEC_TEST(smull_8h_8b_8b,  "smull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3525 GEN_THREEVEC_TEST(smull2_8h_16b_16b,
   3526                                    "smull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3527 GEN_THREEVEC_TEST(umull_2d_2s_2s,  "umull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3528 GEN_THREEVEC_TEST(umull2_2d_4s_4s, "umull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3529 GEN_THREEVEC_TEST(umull_4s_4h_4h,  "umull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3530 GEN_THREEVEC_TEST(umull2_4s_8h_8h, "umull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3531 GEN_THREEVEC_TEST(umull_8h_8b_8b,  "umull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3532 GEN_THREEVEC_TEST(umull2_8h_16b_16b,
   3533                                    "umull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3534 
   3535 GEN_ONEINT_ONEVEC_TEST(umov_x_d0,  "umov x9, v10.d[0]", 9, 10)
   3536 GEN_ONEINT_ONEVEC_TEST(umov_x_d1,  "umov x9, v10.d[1]", 9, 10)
   3537 GEN_ONEINT_ONEVEC_TEST(umov_w_s0,  "umov w9, v10.s[0]", 9, 10)
   3538 GEN_ONEINT_ONEVEC_TEST(umov_w_s3,  "umov w9, v10.s[3]", 9, 10)
   3539 GEN_ONEINT_ONEVEC_TEST(umov_w_h0,  "umov w9, v10.h[0]", 9, 10)
   3540 GEN_ONEINT_ONEVEC_TEST(umov_w_h7,  "umov w9, v10.h[7]", 9, 10)
   3541 GEN_ONEINT_ONEVEC_TEST(umov_w_b0,  "umov w9, v10.b[0]", 9, 10)
   3542 GEN_ONEINT_ONEVEC_TEST(umov_w_b15, "umov w9, v10.b[15]", 9, 10)
   3543 GEN_ONEINT_ONEVEC_TEST(smov_x_s0,  "smov x9, v10.s[0]", 9, 10)
   3544 GEN_ONEINT_ONEVEC_TEST(smov_x_s3,  "smov x9, v10.s[3]", 9, 10)
   3545 GEN_ONEINT_ONEVEC_TEST(smov_x_h0,  "smov x9, v10.h[0]", 9, 10)
   3546 GEN_ONEINT_ONEVEC_TEST(smov_x_h7,  "smov x9, v10.h[7]", 9, 10)
   3547 GEN_ONEINT_ONEVEC_TEST(smov_w_h0,  "smov w9, v10.h[0]", 9, 10)
   3548 GEN_ONEINT_ONEVEC_TEST(smov_w_h7,  "smov w9, v10.h[7]", 9, 10)
   3549 GEN_ONEINT_ONEVEC_TEST(smov_x_b0,  "smov x9, v10.b[0]", 9, 10)
   3550 GEN_ONEINT_ONEVEC_TEST(smov_x_b15, "smov x9, v10.b[15]", 9, 10)
   3551 GEN_ONEINT_ONEVEC_TEST(smov_w_b0,  "smov w9, v10.b[0]", 9, 10)
   3552 GEN_ONEINT_ONEVEC_TEST(smov_w_b15, "smov w9, v10.b[15]", 9, 10)
   3553 
   3554 GEN_TWOVEC_TEST(sqabs_d_d, "sqabs d7, d30", 7, 30)
   3555 GEN_TWOVEC_TEST(sqabs_s_s, "sqabs s7, s30", 7, 30)
   3556 GEN_TWOVEC_TEST(sqabs_h_h, "sqabs h7, h30", 7, 30)
   3557 GEN_TWOVEC_TEST(sqabs_b_b, "sqabs b7, b30", 7, 30)
   3558 GEN_TWOVEC_TEST(sqneg_d_d, "sqneg d7, d30", 7, 30)
   3559 GEN_TWOVEC_TEST(sqneg_s_s, "sqneg s7, s30", 7, 30)
   3560 GEN_TWOVEC_TEST(sqneg_h_h, "sqneg h7, h30", 7, 30)
   3561 GEN_TWOVEC_TEST(sqneg_b_b, "sqneg b7, b30", 7, 30)
   3562 
   3563 GEN_UNARY_TEST(sqabs, 2d, 2d)
   3564 GEN_UNARY_TEST(sqabs, 4s, 4s)
   3565 GEN_UNARY_TEST(sqabs, 2s, 2s)
   3566 GEN_UNARY_TEST(sqabs, 8h, 8h)
   3567 GEN_UNARY_TEST(sqabs, 4h, 4h)
   3568 GEN_UNARY_TEST(sqabs, 16b, 16b)
   3569 GEN_UNARY_TEST(sqabs, 8b, 8b)
   3570 GEN_UNARY_TEST(sqneg, 2d, 2d)
   3571 GEN_UNARY_TEST(sqneg, 4s, 4s)
   3572 GEN_UNARY_TEST(sqneg, 2s, 2s)
   3573 GEN_UNARY_TEST(sqneg, 8h, 8h)
   3574 GEN_UNARY_TEST(sqneg, 4h, 4h)
   3575 GEN_UNARY_TEST(sqneg, 16b, 16b)
   3576 GEN_UNARY_TEST(sqneg, 8b, 8b)
   3577 
   3578 GEN_THREEVEC_TEST(sqadd_d_d_d, "sqadd d1, d2, d4", 1, 2, 4)
   3579 GEN_THREEVEC_TEST(sqadd_s_s_s, "sqadd s1, s2, s4", 1, 2, 4)
   3580 GEN_THREEVEC_TEST(sqadd_h_h_h, "sqadd h1, h2, h4", 1, 2, 4)
   3581 GEN_THREEVEC_TEST(sqadd_b_b_b, "sqadd b1, b2, b4", 1, 2, 4)
   3582 GEN_THREEVEC_TEST(uqadd_d_d_d, "uqadd d1, d2, d4", 1, 2, 4)
   3583 GEN_THREEVEC_TEST(uqadd_s_s_s, "uqadd s1, s2, s4", 1, 2, 4)
   3584 GEN_THREEVEC_TEST(uqadd_h_h_h, "uqadd h1, h2, h4", 1, 2, 4)
   3585 GEN_THREEVEC_TEST(uqadd_b_b_b, "uqadd b1, b2, b4", 1, 2, 4)
   3586 GEN_THREEVEC_TEST(sqsub_d_d_d, "sqsub d1, d2, d4", 1, 2, 4)
   3587 GEN_THREEVEC_TEST(sqsub_s_s_s, "sqsub s1, s2, s4", 1, 2, 4)
   3588 GEN_THREEVEC_TEST(sqsub_h_h_h, "sqsub h1, h2, h4", 1, 2, 4)
   3589 GEN_THREEVEC_TEST(sqsub_b_b_b, "sqsub b1, b2, b4", 1, 2, 4)
   3590 GEN_THREEVEC_TEST(uqsub_d_d_d, "uqsub d1, d2, d4", 1, 2, 4)
   3591 GEN_THREEVEC_TEST(uqsub_s_s_s, "uqsub s1, s2, s4", 1, 2, 4)
   3592 GEN_THREEVEC_TEST(uqsub_h_h_h, "uqsub h1, h2, h4", 1, 2, 4)
   3593 GEN_THREEVEC_TEST(uqsub_b_b_b, "uqsub b1, b2, b4", 1, 2, 4)
   3594 
   3595 GEN_THREEVEC_TEST(sqadd_2d_2d_2d,    "sqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3596 GEN_THREEVEC_TEST(sqadd_4s_4s_4s,    "sqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3597 GEN_THREEVEC_TEST(sqadd_2s_2s_2s,    "sqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3598 GEN_THREEVEC_TEST(sqadd_8h_8h_8h,    "sqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3599 GEN_THREEVEC_TEST(sqadd_4h_4h_4h,    "sqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3600 GEN_THREEVEC_TEST(sqadd_16b_16b_16b, "sqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3601 GEN_THREEVEC_TEST(sqadd_8b_8b_8b,    "sqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3602 GEN_THREEVEC_TEST(uqadd_2d_2d_2d,    "uqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3603 GEN_THREEVEC_TEST(uqadd_4s_4s_4s,    "uqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3604 GEN_THREEVEC_TEST(uqadd_2s_2s_2s,    "uqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3605 GEN_THREEVEC_TEST(uqadd_8h_8h_8h,    "uqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3606 GEN_THREEVEC_TEST(uqadd_4h_4h_4h,    "uqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3607 GEN_THREEVEC_TEST(uqadd_16b_16b_16b, "uqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3608 GEN_THREEVEC_TEST(uqadd_8b_8b_8b,    "uqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3609 GEN_THREEVEC_TEST(sqsub_2d_2d_2d,    "sqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3610 GEN_THREEVEC_TEST(sqsub_4s_4s_4s,    "sqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3611 GEN_THREEVEC_TEST(sqsub_2s_2s_2s,    "sqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3612 GEN_THREEVEC_TEST(sqsub_8h_8h_8h,    "sqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3613 GEN_THREEVEC_TEST(sqsub_4h_4h_4h,    "sqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3614 GEN_THREEVEC_TEST(sqsub_16b_16b_16b, "sqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3615 GEN_THREEVEC_TEST(sqsub_8b_8b_8b,    "sqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3616 GEN_THREEVEC_TEST(uqsub_2d_2d_2d,    "uqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3617 GEN_THREEVEC_TEST(uqsub_4s_4s_4s,    "uqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3618 GEN_THREEVEC_TEST(uqsub_2s_2s_2s,    "uqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3619 GEN_THREEVEC_TEST(uqsub_8h_8h_8h,    "uqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3620 GEN_THREEVEC_TEST(uqsub_4h_4h_4h,    "uqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3621 GEN_THREEVEC_TEST(uqsub_16b_16b_16b, "uqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3622 GEN_THREEVEC_TEST(uqsub_8b_8b_8b,    "uqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3623 
   3624 GEN_THREEVEC_TEST(sqdmlal_d_s_s0, "sqdmlal d31, s30, v29.s[0]", 31,30,29)
   3625 GEN_THREEVEC_TEST(sqdmlal_d_s_s3, "sqdmlal d31, s30, v29.s[3]", 31,30,29)
   3626 GEN_THREEVEC_TEST(sqdmlal_s_h_h1, "sqdmlal s31, h30, v13.h[1]", 31,30,13)
   3627 GEN_THREEVEC_TEST(sqdmlal_s_h_h5, "sqdmlal s31, h30, v13.h[5]", 31,30,13)
   3628 GEN_THREEVEC_TEST(sqdmlsl_d_s_s0, "sqdmlsl d31, s30, v29.s[0]", 31,30,29)
   3629 GEN_THREEVEC_TEST(sqdmlsl_d_s_s3, "sqdmlsl d31, s30, v29.s[3]", 31,30,29)
   3630 GEN_THREEVEC_TEST(sqdmlsl_s_h_h1, "sqdmlsl s31, h30, v13.h[1]", 31,30,13)
   3631 GEN_THREEVEC_TEST(sqdmlsl_s_h_h5, "sqdmlsl s31, h30, v13.h[5]", 31,30,13)
   3632 GEN_THREEVEC_TEST(sqdmull_d_s_s0, "sqdmull d31, s30, v29.s[0]", 31,30,29)
   3633 GEN_THREEVEC_TEST(sqdmull_d_s_s3, "sqdmull d31, s30, v29.s[3]", 31,30,29)
   3634 GEN_THREEVEC_TEST(sqdmull_s_h_h1, "sqdmull s31, h30, v13.h[1]", 31,30,13)
   3635 GEN_THREEVEC_TEST(sqdmull_s_h_h5, "sqdmull s31, h30, v13.h[5]", 31,30,13)
   3636 
   3637 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s0, "sqdmlal  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3638 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s3, "sqdmlal  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3639 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s1,"sqdmlal2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3640 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s2,"sqdmlal2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3641 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h0, "sqdmlal  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3642 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h7, "sqdmlal  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3643 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h1,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3644 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h4,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3645 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s0, "sqdmlsl  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3646 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s3, "sqdmlsl  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3647 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s1,"sqdmlsl2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3648 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s2,"sqdmlsl2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3649 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h0, "sqdmlsl  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3650 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h7, "sqdmlsl  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3651 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h1,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3652 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h4,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3653 GEN_THREEVEC_TEST(sqdmull_2d_2s_s0, "sqdmull  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3654 GEN_THREEVEC_TEST(sqdmull_2d_2s_s3, "sqdmull  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3655 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s1,"sqdmull2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3656 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s2,"sqdmull2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3657 GEN_THREEVEC_TEST(sqdmull_4s_4h_h0, "sqdmull  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3658 GEN_THREEVEC_TEST(sqdmull_4s_4h_h7, "sqdmull  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3659 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h1,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3660 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h4,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3661 
   3662 GEN_THREEVEC_TEST(sqdmlal_d_s_s, "sqdmlal d0, s8, s16", 0, 8, 16)
   3663 GEN_THREEVEC_TEST(sqdmlal_s_h_h, "sqdmlal s0, h8, h16", 0, 8, 16)
   3664 GEN_THREEVEC_TEST(sqdmlsl_d_s_s, "sqdmlsl d0, s8, s16", 0, 8, 16)
   3665 GEN_THREEVEC_TEST(sqdmlsl_s_h_h, "sqdmlsl s0, h8, h16", 0, 8, 16)
   3666 GEN_THREEVEC_TEST(sqdmull_d_s_s, "sqdmull d0, s8, s16", 0, 8, 16)
   3667 GEN_THREEVEC_TEST(sqdmull_s_h_h, "sqdmull s0, h8, h16", 0, 8, 16)
   3668 
   3669 GEN_THREEVEC_TEST(sqdmlal_2d_2s_2s,  "sqdmlal  v2.2d, v11.2s, v29.2s", 2,11,29)
   3670 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_4s, "sqdmlal2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3671 GEN_THREEVEC_TEST(sqdmlal_4s_4h_4h,  "sqdmlal  v2.4s, v11.4h, v29.4h", 2,11,29)
   3672 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_8h, "sqdmlal2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3673 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_2s,  "sqdmlsl  v2.2d, v11.2s, v29.2s", 2,11,29)
   3674 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_4s, "sqdmlsl2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3675 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_4h,  "sqdmlsl  v2.4s, v11.4h, v29.4h", 2,11,29)
   3676 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_8h, "sqdmlsl2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3677 GEN_THREEVEC_TEST(sqdmull_2d_2s_2s,  "sqdmull  v2.2d, v11.2s, v29.2s", 2,11,29)
   3678 GEN_THREEVEC_TEST(sqdmull2_2d_4s_4s, "sqdmull2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3679 GEN_THREEVEC_TEST(sqdmull_4s_4h_4h,  "sqdmull  v2.4s, v11.4h, v29.4h", 2,11,29)
   3680 GEN_THREEVEC_TEST(sqdmull2_4s_8h_8h, "sqdmull2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3681 
   3682 GEN_THREEVEC_TEST(sqdmulh_s_s_s1, "sqdmulh s0, s1, v2.s[1]", 0,1,2)
   3683 GEN_THREEVEC_TEST(sqdmulh_s_s_s3, "sqdmulh s0, s1, v2.s[3]", 0,1,2)
   3684 GEN_THREEVEC_TEST(sqdmulh_h_h_h2, "sqdmulh h0, h1, v2.h[2]", 0,1,2)
   3685 GEN_THREEVEC_TEST(sqdmulh_h_h_h7, "sqdmulh h0, h1, v2.h[7]", 0,1,2)
   3686 GEN_THREEVEC_TEST(sqrdmulh_s_s_s1, "sqrdmulh s0, s1, v2.s[1]", 0,1,2)
   3687 GEN_THREEVEC_TEST(sqrdmulh_s_s_s3, "sqrdmulh s0, s1, v2.s[3]", 0,1,2)
   3688 GEN_THREEVEC_TEST(sqrdmulh_h_h_h2, "sqrdmulh h0, h1, v2.h[2]", 0,1,2)
   3689 GEN_THREEVEC_TEST(sqrdmulh_h_h_h7, "sqrdmulh h0, h1, v2.h[7]", 0,1,2)
   3690 
   3691 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s1, "sqdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3692 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s3, "sqdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3693 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s1, "sqdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3694 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s3, "sqdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3695 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h2, "sqdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3696 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h7, "sqdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3697 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h2, "sqdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3698 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h7, "sqdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3699 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s1, "sqrdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3700 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s3, "sqrdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3701 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s1, "sqrdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3702 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s3, "sqrdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3703 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h2, "sqrdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3704 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h7, "sqrdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3705 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h2, "sqrdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3706 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h7, "sqrdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3707 
   3708 GEN_THREEVEC_TEST(sqdmulh_s_s_s,  "sqdmulh  s1, s2, s4", 1, 2, 4)
   3709 GEN_THREEVEC_TEST(sqdmulh_h_h_h,  "sqdmulh  h1, h2, h4", 1, 2, 4)
   3710 GEN_THREEVEC_TEST(sqrdmulh_s_s_s, "sqrdmulh s1, s2, s4", 1, 2, 4)
   3711 GEN_THREEVEC_TEST(sqrdmulh_h_h_h, "sqrdmulh h1, h2, h4", 1, 2, 4)
   3712 
   3713 GEN_THREEVEC_TEST(sqdmulh_4s_4s_4s, "sqdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3714 GEN_THREEVEC_TEST(sqdmulh_2s_2s_2s, "sqdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3715 GEN_THREEVEC_TEST(sqdmulh_8h_8h_8h, "sqdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3716 GEN_THREEVEC_TEST(sqdmulh_4h_4h_4h, "sqdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3717 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_4s, "sqrdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3718 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_2s, "sqrdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3719 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_8h, "sqrdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3720 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_4h, "sqrdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3721 
   3722 GEN_THREEVEC_TEST(sqshl_d_d_d, "sqshl d1, d2, d4", 1, 2, 4)
   3723 GEN_THREEVEC_TEST(sqshl_s_s_s, "sqshl s1, s2, s4", 1, 2, 4)
   3724 GEN_THREEVEC_TEST(sqshl_h_h_h, "sqshl h1, h2, h4", 1, 2, 4)
   3725 GEN_THREEVEC_TEST(sqshl_b_b_b, "sqshl b1, b2, b4", 1, 2, 4)
   3726 GEN_THREEVEC_TEST(uqshl_d_d_d, "uqshl d1, d2, d4", 1, 2, 4)
   3727 GEN_THREEVEC_TEST(uqshl_s_s_s, "uqshl s1, s2, s4", 1, 2, 4)
   3728 GEN_THREEVEC_TEST(uqshl_h_h_h, "uqshl h1, h2, h4", 1, 2, 4)
   3729 GEN_THREEVEC_TEST(uqshl_b_b_b, "uqshl b1, b2, b4", 1, 2, 4)
   3730 GEN_THREEVEC_TEST(sqrshl_d_d_d, "sqrshl d1, d2, d4", 1, 2, 4)
   3731 GEN_THREEVEC_TEST(sqrshl_s_s_s, "sqrshl s1, s2, s4", 1, 2, 4)
   3732 GEN_THREEVEC_TEST(sqrshl_h_h_h, "sqrshl h1, h2, h4", 1, 2, 4)
   3733 GEN_THREEVEC_TEST(sqrshl_b_b_b, "sqrshl b1, b2, b4", 1, 2, 4)
   3734 GEN_THREEVEC_TEST(uqrshl_d_d_d, "uqrshl d1, d2, d4", 1, 2, 4)
   3735 GEN_THREEVEC_TEST(uqrshl_s_s_s, "uqrshl s1, s2, s4", 1, 2, 4)
   3736 GEN_THREEVEC_TEST(uqrshl_h_h_h, "uqrshl h1, h2, h4", 1, 2, 4)
   3737 GEN_THREEVEC_TEST(uqrshl_b_b_b, "uqrshl b1, b2, b4", 1, 2, 4)
   3738 
   3739 GEN_THREEVEC_TEST(sqshl_2d_2d_2d,    "sqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3740 GEN_THREEVEC_TEST(sqshl_4s_4s_4s,    "sqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3741 GEN_THREEVEC_TEST(sqshl_2s_2s_2s,    "sqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3742 GEN_THREEVEC_TEST(sqshl_8h_8h_8h,    "sqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3743 GEN_THREEVEC_TEST(sqshl_4h_4h_4h,    "sqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3744 GEN_THREEVEC_TEST(sqshl_16b_16b_16b, "sqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3745 GEN_THREEVEC_TEST(sqshl_8b_8b_8b,    "sqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3746 GEN_THREEVEC_TEST(uqshl_2d_2d_2d,    "uqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3747 GEN_THREEVEC_TEST(uqshl_4s_4s_4s,    "uqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3748 GEN_THREEVEC_TEST(uqshl_2s_2s_2s,    "uqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3749 GEN_THREEVEC_TEST(uqshl_8h_8h_8h,    "uqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3750 GEN_THREEVEC_TEST(uqshl_4h_4h_4h,    "uqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3751 GEN_THREEVEC_TEST(uqshl_16b_16b_16b, "uqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3752 GEN_THREEVEC_TEST(uqshl_8b_8b_8b,    "uqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3753 GEN_THREEVEC_TEST(sqrshl_2d_2d_2d,    "sqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3754 GEN_THREEVEC_TEST(sqrshl_4s_4s_4s,    "sqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3755 GEN_THREEVEC_TEST(sqrshl_2s_2s_2s,    "sqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3756 GEN_THREEVEC_TEST(sqrshl_8h_8h_8h,    "sqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3757 GEN_THREEVEC_TEST(sqrshl_4h_4h_4h,    "sqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3758 GEN_THREEVEC_TEST(sqrshl_16b_16b_16b, "sqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3759 GEN_THREEVEC_TEST(sqrshl_8b_8b_8b,    "sqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3760 GEN_THREEVEC_TEST(uqrshl_2d_2d_2d,    "uqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3761 GEN_THREEVEC_TEST(uqrshl_4s_4s_4s,    "uqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3762 GEN_THREEVEC_TEST(uqrshl_2s_2s_2s,    "uqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3763 GEN_THREEVEC_TEST(uqrshl_8h_8h_8h,    "uqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3764 GEN_THREEVEC_TEST(uqrshl_4h_4h_4h,    "uqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3765 GEN_THREEVEC_TEST(uqrshl_16b_16b_16b, "uqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3766 GEN_THREEVEC_TEST(uqrshl_8b_8b_8b,    "uqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3767 
   3768 GEN_TWOVEC_TEST(sqrshrn_s_d_1,  "sqrshrn s2, d5, #1",  2, 5)
   3769 GEN_TWOVEC_TEST(sqrshrn_s_d_17, "sqrshrn s2, d5, #17", 2, 5)
   3770 GEN_TWOVEC_TEST(sqrshrn_s_d_32, "sqrshrn s2, d5, #32", 2, 5)
   3771 GEN_TWOVEC_TEST(sqrshrn_h_s_1,  "sqrshrn h2, s5, #1",  2, 5)
   3772 GEN_TWOVEC_TEST(sqrshrn_h_s_9,  "sqrshrn h2, s5, #9",  2, 5)
   3773 GEN_TWOVEC_TEST(sqrshrn_h_s_16, "sqrshrn h2, s5, #16", 2, 5)
   3774 GEN_TWOVEC_TEST(sqrshrn_b_h_1,  "sqrshrn b2, h5, #1",  2, 5)
   3775 GEN_TWOVEC_TEST(sqrshrn_b_h_4,  "sqrshrn b2, h5, #4",  2, 5)
   3776 GEN_TWOVEC_TEST(sqrshrn_b_h_8,  "sqrshrn b2, h5, #8",  2, 5)
   3777 GEN_TWOVEC_TEST(uqrshrn_s_d_1,  "uqrshrn s2, d5, #1",  2, 5)
   3778 GEN_TWOVEC_TEST(uqrshrn_s_d_17, "uqrshrn s2, d5, #17", 2, 5)
   3779 GEN_TWOVEC_TEST(uqrshrn_s_d_32, "uqrshrn s2, d5, #32", 2, 5)
   3780 GEN_TWOVEC_TEST(uqrshrn_h_s_1,  "uqrshrn h2, s5, #1",  2, 5)
   3781 GEN_TWOVEC_TEST(uqrshrn_h_s_9,  "uqrshrn h2, s5, #9",  2, 5)
   3782 GEN_TWOVEC_TEST(uqrshrn_h_s_16, "uqrshrn h2, s5, #16", 2, 5)
   3783 GEN_TWOVEC_TEST(uqrshrn_b_h_1,  "uqrshrn b2, h5, #1",  2, 5)
   3784 GEN_TWOVEC_TEST(uqrshrn_b_h_4,  "uqrshrn b2, h5, #4",  2, 5)
   3785 GEN_TWOVEC_TEST(uqrshrn_b_h_8,  "uqrshrn b2, h5, #8",  2, 5)
   3786 GEN_TWOVEC_TEST(sqshrn_s_d_1,  "sqshrn s2, d5, #1",  2, 5)
   3787 GEN_TWOVEC_TEST(sqshrn_s_d_17, "sqshrn s2, d5, #17", 2, 5)
   3788 GEN_TWOVEC_TEST(sqshrn_s_d_32, "sqshrn s2, d5, #32", 2, 5)
   3789 GEN_TWOVEC_TEST(sqshrn_h_s_1,  "sqshrn h2, s5, #1",  2, 5)
   3790 GEN_TWOVEC_TEST(sqshrn_h_s_9,  "sqshrn h2, s5, #9",  2, 5)
   3791 GEN_TWOVEC_TEST(sqshrn_h_s_16, "sqshrn h2, s5, #16", 2, 5)
   3792 GEN_TWOVEC_TEST(sqshrn_b_h_1,  "sqshrn b2, h5, #1",  2, 5)
   3793 GEN_TWOVEC_TEST(sqshrn_b_h_4,  "sqshrn b2, h5, #4",  2, 5)
   3794 GEN_TWOVEC_TEST(sqshrn_b_h_8,  "sqshrn b2, h5, #8",  2, 5)
   3795 GEN_TWOVEC_TEST(uqshrn_s_d_1,  "uqshrn s2, d5, #1",  2, 5)
   3796 GEN_TWOVEC_TEST(uqshrn_s_d_17, "uqshrn s2, d5, #17", 2, 5)
   3797 GEN_TWOVEC_TEST(uqshrn_s_d_32, "uqshrn s2, d5, #32", 2, 5)
   3798 GEN_TWOVEC_TEST(uqshrn_h_s_1,  "uqshrn h2, s5, #1",  2, 5)
   3799 GEN_TWOVEC_TEST(uqshrn_h_s_9,  "uqshrn h2, s5, #9",  2, 5)
   3800 GEN_TWOVEC_TEST(uqshrn_h_s_16, "uqshrn h2, s5, #16", 2, 5)
   3801 GEN_TWOVEC_TEST(uqshrn_b_h_1,  "uqshrn b2, h5, #1",  2, 5)
   3802 GEN_TWOVEC_TEST(uqshrn_b_h_4,  "uqshrn b2, h5, #4",  2, 5)
   3803 GEN_TWOVEC_TEST(uqshrn_b_h_8,  "uqshrn b2, h5, #8",  2, 5)
   3804 GEN_TWOVEC_TEST(sqrshrun_s_d_1,  "sqrshrun s2, d5, #1",  2, 5)
   3805 GEN_TWOVEC_TEST(sqrshrun_s_d_17, "sqrshrun s2, d5, #17", 2, 5)
   3806 GEN_TWOVEC_TEST(sqrshrun_s_d_32, "sqrshrun s2, d5, #32", 2, 5)
   3807 GEN_TWOVEC_TEST(sqrshrun_h_s_1,  "sqrshrun h2, s5, #1",  2, 5)
   3808 GEN_TWOVEC_TEST(sqrshrun_h_s_9,  "sqrshrun h2, s5, #9",  2, 5)
   3809 GEN_TWOVEC_TEST(sqrshrun_h_s_16, "sqrshrun h2, s5, #16", 2, 5)
   3810 GEN_TWOVEC_TEST(sqrshrun_b_h_1,  "sqrshrun b2, h5, #1",  2, 5)
   3811 GEN_TWOVEC_TEST(sqrshrun_b_h_4,  "sqrshrun b2, h5, #4",  2, 5)
   3812 GEN_TWOVEC_TEST(sqrshrun_b_h_8,  "sqrshrun b2, h5, #8",  2, 5)
   3813 GEN_TWOVEC_TEST(sqshrun_s_d_1,  "sqshrun s2, d5, #1",  2, 5)
   3814 GEN_TWOVEC_TEST(sqshrun_s_d_17, "sqshrun s2, d5, #17", 2, 5)
   3815 GEN_TWOVEC_TEST(sqshrun_s_d_32, "sqshrun s2, d5, #32", 2, 5)
   3816 GEN_TWOVEC_TEST(sqshrun_h_s_1,  "sqshrun h2, s5, #1",  2, 5)
   3817 GEN_TWOVEC_TEST(sqshrun_h_s_9,  "sqshrun h2, s5, #9",  2, 5)
   3818 GEN_TWOVEC_TEST(sqshrun_h_s_16, "sqshrun h2, s5, #16", 2, 5)
   3819 GEN_TWOVEC_TEST(sqshrun_b_h_1,  "sqshrun b2, h5, #1",  2, 5)
   3820 GEN_TWOVEC_TEST(sqshrun_b_h_4,  "sqshrun b2, h5, #4",  2, 5)
   3821 GEN_TWOVEC_TEST(sqshrun_b_h_8,  "sqshrun b2, h5, #8",  2, 5)
   3822 
   3823 GEN_TWOVEC_TEST(sqrshrn_2s_2d_1,   "sqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3824 GEN_TWOVEC_TEST(sqrshrn_2s_2d_17,  "sqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3825 GEN_TWOVEC_TEST(sqrshrn_2s_2d_32,  "sqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3826 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_1,  "sqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3827 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_17, "sqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3828 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_32, "sqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3829 GEN_TWOVEC_TEST(sqrshrn_4h_4s_1,   "sqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3830 GEN_TWOVEC_TEST(sqrshrn_4h_4s_9,   "sqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3831 GEN_TWOVEC_TEST(sqrshrn_4h_4s_16,  "sqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3832 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_1,  "sqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3833 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_9,  "sqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3834 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_16, "sqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3835 GEN_TWOVEC_TEST(sqrshrn_8b_8h_1,   "sqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3836 GEN_TWOVEC_TEST(sqrshrn_8b_8h_4,   "sqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3837 GEN_TWOVEC_TEST(sqrshrn_8b_8h_8,   "sqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3838 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_1, "sqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3839 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_4, "sqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3840 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_8, "sqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3841 GEN_TWOVEC_TEST(uqrshrn_2s_2d_1,   "uqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3842 GEN_TWOVEC_TEST(uqrshrn_2s_2d_17,  "uqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3843 GEN_TWOVEC_TEST(uqrshrn_2s_2d_32,  "uqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3844 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_1,  "uqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3845 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_17, "uqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3846 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_32, "uqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3847 GEN_TWOVEC_TEST(uqrshrn_4h_4s_1,   "uqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3848 GEN_TWOVEC_TEST(uqrshrn_4h_4s_9,   "uqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3849 GEN_TWOVEC_TEST(uqrshrn_4h_4s_16,  "uqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3850 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_1,  "uqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3851 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_9,  "uqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3852 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_16, "uqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3853 GEN_TWOVEC_TEST(uqrshrn_8b_8h_1,   "uqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3854 GEN_TWOVEC_TEST(uqrshrn_8b_8h_4,   "uqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3855 GEN_TWOVEC_TEST(uqrshrn_8b_8h_8,   "uqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3856 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_1, "uqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3857 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_4, "uqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3858 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_8, "uqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3859 GEN_TWOVEC_TEST(sqshrn_2s_2d_1,   "sqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3860 GEN_TWOVEC_TEST(sqshrn_2s_2d_17,  "sqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3861 GEN_TWOVEC_TEST(sqshrn_2s_2d_32,  "sqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3862 GEN_TWOVEC_TEST(sqshrn2_4s_2d_1,  "sqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3863 GEN_TWOVEC_TEST(sqshrn2_4s_2d_17, "sqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3864 GEN_TWOVEC_TEST(sqshrn2_4s_2d_32, "sqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3865 GEN_TWOVEC_TEST(sqshrn_4h_4s_1,   "sqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3866 GEN_TWOVEC_TEST(sqshrn_4h_4s_9,   "sqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3867 GEN_TWOVEC_TEST(sqshrn_4h_4s_16,  "sqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3868 GEN_TWOVEC_TEST(sqshrn2_8h_4s_1,  "sqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3869 GEN_TWOVEC_TEST(sqshrn2_8h_4s_9,  "sqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3870 GEN_TWOVEC_TEST(sqshrn2_8h_4s_16, "sqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3871 GEN_TWOVEC_TEST(sqshrn_8b_8h_1,   "sqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3872 GEN_TWOVEC_TEST(sqshrn_8b_8h_4,   "sqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3873 GEN_TWOVEC_TEST(sqshrn_8b_8h_8,   "sqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3874 GEN_TWOVEC_TEST(sqshrn2_16b_8h_1, "sqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3875 GEN_TWOVEC_TEST(sqshrn2_16b_8h_4, "sqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3876 GEN_TWOVEC_TEST(sqshrn2_16b_8h_8, "sqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3877 GEN_TWOVEC_TEST(uqshrn_2s_2d_1,   "uqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3878 GEN_TWOVEC_TEST(uqshrn_2s_2d_17,  "uqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3879 GEN_TWOVEC_TEST(uqshrn_2s_2d_32,  "uqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3880 GEN_TWOVEC_TEST(uqshrn2_4s_2d_1,  "uqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3881 GEN_TWOVEC_TEST(uqshrn2_4s_2d_17, "uqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3882 GEN_TWOVEC_TEST(uqshrn2_4s_2d_32, "uqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3883 GEN_TWOVEC_TEST(uqshrn_4h_4s_1,   "uqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3884 GEN_TWOVEC_TEST(uqshrn_4h_4s_9,   "uqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3885 GEN_TWOVEC_TEST(uqshrn_4h_4s_16,  "uqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3886 GEN_TWOVEC_TEST(uqshrn2_8h_4s_1,  "uqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3887 GEN_TWOVEC_TEST(uqshrn2_8h_4s_9,  "uqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3888 GEN_TWOVEC_TEST(uqshrn2_8h_4s_16, "uqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3889 GEN_TWOVEC_TEST(uqshrn_8b_8h_1,   "uqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3890 GEN_TWOVEC_TEST(uqshrn_8b_8h_4,   "uqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3891 GEN_TWOVEC_TEST(uqshrn_8b_8h_8,   "uqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3892 GEN_TWOVEC_TEST(uqshrn2_16b_8h_1, "uqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3893 GEN_TWOVEC_TEST(uqshrn2_16b_8h_4, "uqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3894 GEN_TWOVEC_TEST(uqshrn2_16b_8h_8, "uqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3895 GEN_TWOVEC_TEST(sqrshrun_2s_2d_1,   "sqrshrun  v4.2s,  v29.2d, #1",  4, 29)
   3896 GEN_TWOVEC_TEST(sqrshrun_2s_2d_17,  "sqrshrun  v4.2s,  v29.2d, #17", 4, 29)
   3897 GEN_TWOVEC_TEST(sqrshrun_2s_2d_32,  "sqrshrun  v4.2s,  v29.2d, #32", 4, 29)
   3898 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_1,  "sqrshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3899 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_17, "sqrshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3900 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_32, "sqrshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3901 GEN_TWOVEC_TEST(sqrshrun_4h_4s_1,   "sqrshrun  v4.4h,  v29.4s, #1",  4, 29)
   3902 GEN_TWOVEC_TEST(sqrshrun_4h_4s_9,   "sqrshrun  v4.4h,  v29.4s, #9",  4, 29)
   3903 GEN_TWOVEC_TEST(sqrshrun_4h_4s_16,  "sqrshrun  v4.4h,  v29.4s, #16", 4, 29)
   3904 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_1,  "sqrshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3905 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_9,  "sqrshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3906 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_16, "sqrshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3907 GEN_TWOVEC_TEST(sqrshrun_8b_8h_1,   "sqrshrun  v4.8b,  v29.8h, #1",  4, 29)
   3908 GEN_TWOVEC_TEST(sqrshrun_8b_8h_4,   "sqrshrun  v4.8b,  v29.8h, #4",  4, 29)
   3909 GEN_TWOVEC_TEST(sqrshrun_8b_8h_8,   "sqrshrun  v4.8b,  v29.8h, #8",  4, 29)
   3910 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_1, "sqrshrun2 v4.16b, v29.8h, #1",  4, 29)
   3911 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_4, "sqrshrun2 v4.16b, v29.8h, #4",  4, 29)
   3912 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_8, "sqrshrun2 v4.16b, v29.8h, #8",  4, 29)
   3913 GEN_TWOVEC_TEST(sqshrun_2s_2d_1,   "sqshrun  v4.2s,  v29.2d, #1",  4, 29)
   3914 GEN_TWOVEC_TEST(sqshrun_2s_2d_17,  "sqshrun  v4.2s,  v29.2d, #17", 4, 29)
   3915 GEN_TWOVEC_TEST(sqshrun_2s_2d_32,  "sqshrun  v4.2s,  v29.2d, #32", 4, 29)
   3916 GEN_TWOVEC_TEST(sqshrun2_4s_2d_1,  "sqshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3917 GEN_TWOVEC_TEST(sqshrun2_4s_2d_17, "sqshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3918 GEN_TWOVEC_TEST(sqshrun2_4s_2d_32, "sqshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3919 GEN_TWOVEC_TEST(sqshrun_4h_4s_1,   "sqshrun  v4.4h,  v29.4s, #1",  4, 29)
   3920 GEN_TWOVEC_TEST(sqshrun_4h_4s_9,   "sqshrun  v4.4h,  v29.4s, #9",  4, 29)
   3921 GEN_TWOVEC_TEST(sqshrun_4h_4s_16,  "sqshrun  v4.4h,  v29.4s, #16", 4, 29)
   3922 GEN_TWOVEC_TEST(sqshrun2_8h_4s_1,  "sqshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3923 GEN_TWOVEC_TEST(sqshrun2_8h_4s_9,  "sqshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3924 GEN_TWOVEC_TEST(sqshrun2_8h_4s_16, "sqshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3925 GEN_TWOVEC_TEST(sqshrun_8b_8h_1,   "sqshrun  v4.8b,  v29.8h, #1",  4, 29)
   3926 GEN_TWOVEC_TEST(sqshrun_8b_8h_4,   "sqshrun  v4.8b,  v29.8h, #4",  4, 29)
   3927 GEN_TWOVEC_TEST(sqshrun_8b_8h_8,   "sqshrun  v4.8b,  v29.8h, #8",  4, 29)
   3928 GEN_TWOVEC_TEST(sqshrun2_16b_8h_1, "sqshrun2 v4.16b, v29.8h, #1",  4, 29)
   3929 GEN_TWOVEC_TEST(sqshrun2_16b_8h_4, "sqshrun2 v4.16b, v29.8h, #4",  4, 29)
   3930 GEN_TWOVEC_TEST(sqshrun2_16b_8h_8, "sqshrun2 v4.16b, v29.8h, #8",  4, 29)
   3931 
   3932 GEN_TWOVEC_TEST(sqshl_d_d_0,  "sqshl d5, d28, #0",  5, 28)
   3933 GEN_TWOVEC_TEST(sqshl_d_d_32, "sqshl d5, d28, #32", 5, 28)
   3934 GEN_TWOVEC_TEST(sqshl_d_d_63, "sqshl d5, d28, #63", 5, 28)
   3935 GEN_TWOVEC_TEST(sqshl_s_s_0,  "sqshl s5, s28, #0",  5, 28)
   3936 GEN_TWOVEC_TEST(sqshl_s_s_16, "sqshl s5, s28, #16", 5, 28)
   3937 GEN_TWOVEC_TEST(sqshl_s_s_31, "sqshl s5, s28, #31", 5, 28)
   3938 GEN_TWOVEC_TEST(sqshl_h_h_0,  "sqshl h5, h28, #0",  5, 28)
   3939 GEN_TWOVEC_TEST(sqshl_h_h_8,  "sqshl h5, h28, #8",  5, 28)
   3940 GEN_TWOVEC_TEST(sqshl_h_h_15, "sqshl h5, h28, #15", 5, 28)
   3941 GEN_TWOVEC_TEST(sqshl_b_b_0,  "sqshl b5, b28, #0",  5, 28)
   3942 GEN_TWOVEC_TEST(sqshl_b_b_1,  "sqshl b5, b28, #1",  5, 28)
   3943 GEN_TWOVEC_TEST(sqshl_b_b_4,  "sqshl b5, b28, #4",  5, 28)
   3944 GEN_TWOVEC_TEST(sqshl_b_b_6,  "sqshl b5, b28, #6",  5, 28)
   3945 GEN_TWOVEC_TEST(sqshl_b_b_7,  "sqshl b5, b28, #7",  5, 28)
   3946 GEN_TWOVEC_TEST(uqshl_d_d_0,  "uqshl d5, d28, #0",  5, 28)
   3947 GEN_TWOVEC_TEST(uqshl_d_d_32, "uqshl d5, d28, #32", 5, 28)
   3948 GEN_TWOVEC_TEST(uqshl_d_d_63, "uqshl d5, d28, #63", 5, 28)
   3949 GEN_TWOVEC_TEST(uqshl_s_s_0,  "uqshl s5, s28, #0",  5, 28)
   3950 GEN_TWOVEC_TEST(uqshl_s_s_16, "uqshl s5, s28, #16", 5, 28)
   3951 GEN_TWOVEC_TEST(uqshl_s_s_31, "uqshl s5, s28, #31", 5, 28)
   3952 GEN_TWOVEC_TEST(uqshl_h_h_0,  "uqshl h5, h28, #0",  5, 28)
   3953 GEN_TWOVEC_TEST(uqshl_h_h_8,  "uqshl h5, h28, #8",  5, 28)
   3954 GEN_TWOVEC_TEST(uqshl_h_h_15, "uqshl h5, h28, #15", 5, 28)
   3955 GEN_TWOVEC_TEST(uqshl_b_b_0,  "uqshl b5, b28, #0",  5, 28)
   3956 GEN_TWOVEC_TEST(uqshl_b_b_1,  "uqshl b5, b28, #1",  5, 28)
   3957 GEN_TWOVEC_TEST(uqshl_b_b_4,  "uqshl b5, b28, #4",  5, 28)
   3958 GEN_TWOVEC_TEST(uqshl_b_b_6,  "uqshl b5, b28, #6",  5, 28)
   3959 GEN_TWOVEC_TEST(uqshl_b_b_7,  "uqshl b5, b28, #7",  5, 28)
   3960 GEN_TWOVEC_TEST(sqshlu_d_d_0,  "sqshlu d5, d28, #0",  5, 28)
   3961 GEN_TWOVEC_TEST(sqshlu_d_d_32, "sqshlu d5, d28, #32", 5, 28)
   3962 GEN_TWOVEC_TEST(sqshlu_d_d_63, "sqshlu d5, d28, #63", 5, 28)
   3963 GEN_TWOVEC_TEST(sqshlu_s_s_0,  "sqshlu s5, s28, #0",  5, 28)
   3964 GEN_TWOVEC_TEST(sqshlu_s_s_16, "sqshlu s5, s28, #16", 5, 28)
   3965 GEN_TWOVEC_TEST(sqshlu_s_s_31, "sqshlu s5, s28, #31", 5, 28)
   3966 GEN_TWOVEC_TEST(sqshlu_h_h_0,  "sqshlu h5, h28, #0",  5, 28)
   3967 GEN_TWOVEC_TEST(sqshlu_h_h_8,  "sqshlu h5, h28, #8",  5, 28)
   3968 GEN_TWOVEC_TEST(sqshlu_h_h_15, "sqshlu h5, h28, #15", 5, 28)
   3969 GEN_TWOVEC_TEST(sqshlu_b_b_0,  "sqshlu b5, b28, #0",  5, 28)
   3970 GEN_TWOVEC_TEST(sqshlu_b_b_1,  "sqshlu b5, b28, #1",  5, 28)
   3971 GEN_TWOVEC_TEST(sqshlu_b_b_2,  "sqshlu b5, b28, #2",  5, 28)
   3972 GEN_TWOVEC_TEST(sqshlu_b_b_3,  "sqshlu b5, b28, #3",  5, 28)
   3973 GEN_TWOVEC_TEST(sqshlu_b_b_4,  "sqshlu b5, b28, #4",  5, 28)
   3974 GEN_TWOVEC_TEST(sqshlu_b_b_5,  "sqshlu b5, b28, #5",  5, 28)
   3975 GEN_TWOVEC_TEST(sqshlu_b_b_6,  "sqshlu b5, b28, #6",  5, 28)
   3976 GEN_TWOVEC_TEST(sqshlu_b_b_7,  "sqshlu b5, b28, #7",  5, 28)
   3977 
   3978 GEN_TWOVEC_TEST(sqshl_2d_2d_0,   "sqshl v6.2d,  v27.2d, #0",  6, 27)
   3979 GEN_TWOVEC_TEST(sqshl_2d_2d_32,  "sqshl v6.2d,  v27.2d, #32", 6, 27)
   3980 GEN_TWOVEC_TEST(sqshl_2d_2d_63,  "sqshl v6.2d,  v27.2d, #63", 6, 27)
   3981 GEN_TWOVEC_TEST(sqshl_4s_4s_0,   "sqshl v6.4s,  v27.4s, #0",  6, 27)
   3982 GEN_TWOVEC_TEST(sqshl_4s_4s_16,  "sqshl v6.4s,  v27.4s, #16", 6, 27)
   3983 GEN_TWOVEC_TEST(sqshl_4s_4s_31,  "sqshl v6.4s,  v27.4s, #31", 6, 27)
   3984 GEN_TWOVEC_TEST(sqshl_2s_2s_0,   "sqshl v6.2s,  v27.2s, #0",  6, 27)
   3985 GEN_TWOVEC_TEST(sqshl_2s_2s_16,  "sqshl v6.2s,  v27.2s, #16", 6, 27)
   3986 GEN_TWOVEC_TEST(sqshl_2s_2s_31,  "sqshl v6.2s,  v27.2s, #31", 6, 27)
   3987 GEN_TWOVEC_TEST(sqshl_8h_8h_0,   "sqshl v6.8h,  v27.8h, #0",  6, 27)
   3988 GEN_TWOVEC_TEST(sqshl_8h_8h_8,   "sqshl v6.8h,  v27.8h, #8",  6, 27)
   3989 GEN_TWOVEC_TEST(sqshl_8h_8h_15,  "sqshl v6.8h,  v27.8h, #15", 6, 27)
   3990 GEN_TWOVEC_TEST(sqshl_4h_4h_0,   "sqshl v6.4h,  v27.4h, #0",  6, 27)
   3991 GEN_TWOVEC_TEST(sqshl_4h_4h_8,   "sqshl v6.4h,  v27.4h, #8",  6, 27)
   3992 GEN_TWOVEC_TEST(sqshl_4h_4h_15,  "sqshl v6.4h,  v27.4h, #15", 6, 27)
   3993 GEN_TWOVEC_TEST(sqshl_16b_16b_0, "sqshl v6.16b, v27.16b, #0", 6, 27)
   3994 GEN_TWOVEC_TEST(sqshl_16b_16b_3, "sqshl v6.16b, v27.16b, #3", 6, 27)
   3995 GEN_TWOVEC_TEST(sqshl_16b_16b_7, "sqshl v6.16b, v27.16b, #7", 6, 27)
   3996 GEN_TWOVEC_TEST(sqshl_8b_8b_0,   "sqshl v6.8b,  v27.8b, #0",  6, 27)
   3997 GEN_TWOVEC_TEST(sqshl_8b_8b_3,   "sqshl v6.8b,  v27.8b, #3",  6, 27)
   3998 GEN_TWOVEC_TEST(sqshl_8b_8b_7,   "sqshl v6.8b,  v27.8b, #7",  6, 27)
   3999 GEN_TWOVEC_TEST(uqshl_2d_2d_0,   "uqshl v6.2d,  v27.2d, #0",  6, 27)
   4000 GEN_TWOVEC_TEST(uqshl_2d_2d_32,  "uqshl v6.2d,  v27.2d, #32", 6, 27)
   4001 GEN_TWOVEC_TEST(uqshl_2d_2d_63,  "uqshl v6.2d,  v27.2d, #63", 6, 27)
   4002 GEN_TWOVEC_TEST(uqshl_4s_4s_0,   "uqshl v6.4s,  v27.4s, #0",  6, 27)
   4003 GEN_TWOVEC_TEST(uqshl_4s_4s_16,  "uqshl v6.4s,  v27.4s, #16", 6, 27)
   4004 GEN_TWOVEC_TEST(uqshl_4s_4s_31,  "uqshl v6.4s,  v27.4s, #31", 6, 27)
   4005 GEN_TWOVEC_TEST(uqshl_2s_2s_0,   "uqshl v6.2s,  v27.2s, #0",  6, 27)
   4006 GEN_TWOVEC_TEST(uqshl_2s_2s_16,  "uqshl v6.2s,  v27.2s, #16", 6, 27)
   4007 GEN_TWOVEC_TEST(uqshl_2s_2s_31,  "uqshl v6.2s,  v27.2s, #31", 6, 27)
   4008 GEN_TWOVEC_TEST(uqshl_8h_8h_0,   "uqshl v6.8h,  v27.8h, #0",  6, 27)
   4009 GEN_TWOVEC_TEST(uqshl_8h_8h_8,   "uqshl v6.8h,  v27.8h, #8",  6, 27)
   4010 GEN_TWOVEC_TEST(uqshl_8h_8h_15,  "uqshl v6.8h,  v27.8h, #15", 6, 27)
   4011 GEN_TWOVEC_TEST(uqshl_4h_4h_0,   "uqshl v6.4h,  v27.4h, #0",  6, 27)
   4012 GEN_TWOVEC_TEST(uqshl_4h_4h_8,   "uqshl v6.4h,  v27.4h, #8",  6, 27)
   4013 GEN_TWOVEC_TEST(uqshl_4h_4h_15,  "uqshl v6.4h,  v27.4h, #15", 6, 27)
   4014 GEN_TWOVEC_TEST(uqshl_16b_16b_0, "uqshl v6.16b, v27.16b, #0", 6, 27)
   4015 GEN_TWOVEC_TEST(uqshl_16b_16b_3, "uqshl v6.16b, v27.16b, #3", 6, 27)
   4016 GEN_TWOVEC_TEST(uqshl_16b_16b_7, "uqshl v6.16b, v27.16b, #7", 6, 27)
   4017 GEN_TWOVEC_TEST(uqshl_8b_8b_0,   "uqshl v6.8b,  v27.8b, #0",  6, 27)
   4018 GEN_TWOVEC_TEST(uqshl_8b_8b_3,   "uqshl v6.8b,  v27.8b, #3",  6, 27)
   4019 GEN_TWOVEC_TEST(uqshl_8b_8b_7,   "uqshl v6.8b,  v27.8b, #7",  6, 27)
   4020 GEN_TWOVEC_TEST(sqshlu_2d_2d_0,   "sqshlu v6.2d,  v27.2d, #0",  6, 27)
   4021 GEN_TWOVEC_TEST(sqshlu_2d_2d_32,  "sqshlu v6.2d,  v27.2d, #32", 6, 27)
   4022 GEN_TWOVEC_TEST(sqshlu_2d_2d_63,  "sqshlu v6.2d,  v27.2d, #63", 6, 27)
   4023 GEN_TWOVEC_TEST(sqshlu_4s_4s_0,   "sqshlu v6.4s,  v27.4s, #0",  6, 27)
   4024 GEN_TWOVEC_TEST(sqshlu_4s_4s_16,  "sqshlu v6.4s,  v27.4s, #16", 6, 27)
   4025 GEN_TWOVEC_TEST(sqshlu_4s_4s_31,  "sqshlu v6.4s,  v27.4s, #31", 6, 27)
   4026 GEN_TWOVEC_TEST(sqshlu_2s_2s_0,   "sqshlu v6.2s,  v27.2s, #0",  6, 27)
   4027 GEN_TWOVEC_TEST(sqshlu_2s_2s_16,  "sqshlu v6.2s,  v27.2s, #16", 6, 27)
   4028 GEN_TWOVEC_TEST(sqshlu_2s_2s_31,  "sqshlu v6.2s,  v27.2s, #31", 6, 27)
   4029 GEN_TWOVEC_TEST(sqshlu_8h_8h_0,   "sqshlu v6.8h,  v27.8h, #0",  6, 27)
   4030 GEN_TWOVEC_TEST(sqshlu_8h_8h_8,   "sqshlu v6.8h,  v27.8h, #8",  6, 27)
   4031 GEN_TWOVEC_TEST(sqshlu_8h_8h_15,  "sqshlu v6.8h,  v27.8h, #15", 6, 27)
   4032 GEN_TWOVEC_TEST(sqshlu_4h_4h_0,   "sqshlu v6.4h,  v27.4h, #0",  6, 27)
   4033 GEN_TWOVEC_TEST(sqshlu_4h_4h_8,   "sqshlu v6.4h,  v27.4h, #8",  6, 27)
   4034 GEN_TWOVEC_TEST(sqshlu_4h_4h_15,  "sqshlu v6.4h,  v27.4h, #15", 6, 27)
   4035 GEN_TWOVEC_TEST(sqshlu_16b_16b_0, "sqshlu v6.16b, v27.16b, #0", 6, 27)
   4036 GEN_TWOVEC_TEST(sqshlu_16b_16b_3, "sqshlu v6.16b, v27.16b, #3", 6, 27)
   4037 GEN_TWOVEC_TEST(sqshlu_16b_16b_7, "sqshlu v6.16b, v27.16b, #7", 6, 27)
   4038 GEN_TWOVEC_TEST(sqshlu_8b_8b_0,   "sqshlu v6.8b,  v27.8b, #0",  6, 27)
   4039 GEN_TWOVEC_TEST(sqshlu_8b_8b_3,   "sqshlu v6.8b,  v27.8b, #3",  6, 27)
   4040 GEN_TWOVEC_TEST(sqshlu_8b_8b_7,   "sqshlu v6.8b,  v27.8b, #7",  6, 27)
   4041 
   4042 GEN_TWOVEC_TEST(sqxtn_s_d,  "sqxtn s31,  d0", 31, 0)
   4043 GEN_TWOVEC_TEST(sqxtn_h_s,  "sqxtn h31,  s0", 31, 0)
   4044 GEN_TWOVEC_TEST(sqxtn_b_h,  "sqxtn b31,  h0", 31, 0)
   4045 GEN_TWOVEC_TEST(uqxtn_s_d,  "uqxtn s31,  d0", 31, 0)
   4046 GEN_TWOVEC_TEST(uqxtn_h_s,  "uqxtn h31,  s0", 31, 0)
   4047 GEN_TWOVEC_TEST(uqxtn_b_h,  "uqxtn b31,  h0", 31, 0)
   4048 GEN_TWOVEC_TEST(sqxtun_s_d, "sqxtun s31, d0", 31, 0)
   4049 GEN_TWOVEC_TEST(sqxtun_h_s, "sqxtun h31, s0", 31, 0)
   4050 GEN_TWOVEC_TEST(sqxtun_b_h, "sqxtun b31, h0", 31, 0)
   4051 
   4052 GEN_UNARY_TEST(sqxtn,   2s, 2d)
   4053 GEN_UNARY_TEST(sqxtn2,  4s, 2d)
   4054 GEN_UNARY_TEST(sqxtn,   4h, 4s)
   4055 GEN_UNARY_TEST(sqxtn2,  8h, 4s)
   4056 GEN_UNARY_TEST(sqxtn,   8b, 8h)
   4057 GEN_UNARY_TEST(sqxtn2, 16b, 8h)
   4058 GEN_UNARY_TEST(uqxtn,   2s, 2d)
   4059 GEN_UNARY_TEST(uqxtn2,  4s, 2d)
   4060 GEN_UNARY_TEST(uqxtn,   4h, 4s)
   4061 GEN_UNARY_TEST(uqxtn2,  8h, 4s)
   4062 GEN_UNARY_TEST(uqxtn,   8b, 8h)
   4063 GEN_UNARY_TEST(uqxtn2, 16b, 8h)
   4064 GEN_UNARY_TEST(sqxtun,   2s, 2d)
   4065 GEN_UNARY_TEST(sqxtun2,  4s, 2d)
   4066 GEN_UNARY_TEST(sqxtun,   4h, 4s)
   4067 GEN_UNARY_TEST(sqxtun2,  8h, 4s)
   4068 GEN_UNARY_TEST(sqxtun,   8b, 8h)
   4069 GEN_UNARY_TEST(sqxtun2, 16b, 8h)
   4070 
   4071 GEN_THREEVEC_TEST(srhadd_4s_4s_4s,"srhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4072 GEN_THREEVEC_TEST(srhadd_2s_2s_2s,"srhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4073 GEN_THREEVEC_TEST(srhadd_8h_8h_8h,"srhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4074 GEN_THREEVEC_TEST(srhadd_4h_4h_4h,"srhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4075 GEN_THREEVEC_TEST(srhadd_16b_16b_16b,
   4076                                   "srhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4077 GEN_THREEVEC_TEST(srhadd_8b_8b_8b,"srhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4078 GEN_THREEVEC_TEST(urhadd_4s_4s_4s,"urhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4079 GEN_THREEVEC_TEST(urhadd_2s_2s_2s,"urhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4080 GEN_THREEVEC_TEST(urhadd_8h_8h_8h,"urhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4081 GEN_THREEVEC_TEST(urhadd_4h_4h_4h,"urhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4082 GEN_THREEVEC_TEST(urhadd_16b_16b_16b,
   4083                                   "urhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4084 GEN_THREEVEC_TEST(urhadd_8b_8b_8b,"urhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4085 
   4086 GEN_THREEVEC_TEST(sshl_d_d_d, "sshl d29, d28, d27", 29, 28, 27)
   4087 GEN_THREEVEC_TEST(ushl_d_d_d, "ushl d29, d28, d27", 29, 28, 27)
   4088 
   4089 GEN_THREEVEC_TEST(sshl_2d_2d_2d,    "sshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4090 GEN_THREEVEC_TEST(sshl_4s_4s_4s,    "sshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4091 GEN_THREEVEC_TEST(sshl_2s_2s_2s,    "sshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4092 GEN_THREEVEC_TEST(sshl_8h_8h_8h,    "sshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4093 GEN_THREEVEC_TEST(sshl_4h_4h_4h,    "sshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4094 GEN_THREEVEC_TEST(sshl_16b_16b_16b, "sshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4095 GEN_THREEVEC_TEST(sshl_8b_8b_8b,    "sshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4096 GEN_THREEVEC_TEST(ushl_2d_2d_2d,    "ushl v29.2d, v28.2d, v27.2d", 29,28,27)
   4097 GEN_THREEVEC_TEST(ushl_4s_4s_4s,    "ushl v29.4s, v28.4s, v27.4s", 29,28,27)
   4098 GEN_THREEVEC_TEST(ushl_2s_2s_2s,    "ushl v29.2s, v28.2s, v27.2s", 29,28,27)
   4099 GEN_THREEVEC_TEST(ushl_8h_8h_8h,    "ushl v29.8h, v28.8h, v27.8h", 29,28,27)
   4100 GEN_THREEVEC_TEST(ushl_4h_4h_4h,    "ushl v29.4h, v28.4h, v27.4h", 29,28,27)
   4101 GEN_THREEVEC_TEST(ushl_16b_16b_16b, "ushl v29.16b, v28.16b, v27.16b", 29,28,27)
   4102 GEN_THREEVEC_TEST(ushl_8b_8b_8b,    "ushl v29.8b, v28.8b, v27.8b", 29,28,27)
   4103 
   4104 GEN_TWOVEC_TEST(shl_d_d_0,  "shl d5, d28, #0",  5, 28)
   4105 GEN_TWOVEC_TEST(shl_d_d_32, "shl d5, d28, #32", 5, 28)
   4106 GEN_TWOVEC_TEST(shl_d_d_63, "shl d5, d28, #63", 5, 28)
   4107 GEN_TWOVEC_TEST(sshr_d_d_1,  "sshr d5, d28, #1",  5, 28)
   4108 GEN_TWOVEC_TEST(sshr_d_d_32, "sshr d5, d28, #32", 5, 28)
   4109 GEN_TWOVEC_TEST(sshr_d_d_64, "sshr d5, d28, #64", 5, 28)
   4110 GEN_TWOVEC_TEST(ushr_d_d_1,  "ushr d5, d28, #1",  5, 28)
   4111 GEN_TWOVEC_TEST(ushr_d_d_32, "ushr d5, d28, #32", 5, 28)
   4112 GEN_TWOVEC_TEST(ushr_d_d_64, "ushr d5, d28, #64", 5, 28)
   4113 
   4114 GEN_SHIFT_TEST(shl,  2d, 2d, 0)
   4115 GEN_SHIFT_TEST(shl,  2d, 2d, 13)
   4116 GEN_SHIFT_TEST(shl,  2d, 2d, 63)
   4117 GEN_SHIFT_TEST(shl,  4s, 4s, 0)
   4118 GEN_SHIFT_TEST(shl,  4s, 4s, 13)
   4119 GEN_SHIFT_TEST(shl,  4s, 4s, 31)
   4120 GEN_SHIFT_TEST(shl,  2s, 2s, 0)
   4121 GEN_SHIFT_TEST(shl,  2s, 2s, 13)
   4122 GEN_SHIFT_TEST(shl,  2s, 2s, 31)
   4123 GEN_SHIFT_TEST(shl,  8h, 8h, 0)
   4124 GEN_SHIFT_TEST(shl,  8h, 8h, 13)
   4125 GEN_SHIFT_TEST(shl,  8h, 8h, 15)
   4126 GEN_SHIFT_TEST(shl,  4h, 4h, 0)
   4127 GEN_SHIFT_TEST(shl,  4h, 4h, 13)
   4128 GEN_SHIFT_TEST(shl,  4h, 4h, 15)
   4129 GEN_SHIFT_TEST(shl,  16b, 16b, 0)
   4130 GEN_SHIFT_TEST(shl,  16b, 16b, 7)
   4131 GEN_SHIFT_TEST(shl,  8b, 8b, 0)
   4132 GEN_SHIFT_TEST(shl,  8b, 8b, 7)
   4133 GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
   4134 GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
   4135 GEN_SHIFT_TEST(sshr, 2d, 2d, 64)
   4136 GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
   4137 GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
   4138 GEN_SHIFT_TEST(sshr, 4s, 4s, 32)
   4139 GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
   4140 GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
   4141 GEN_SHIFT_TEST(sshr, 2s, 2s, 32)
   4142 GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
   4143 GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
   4144 GEN_SHIFT_TEST(sshr, 8h, 8h, 16)
   4145 GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
   4146 GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
   4147 GEN_SHIFT_TEST(sshr, 4h, 4h, 16)
   4148 GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
   4149 GEN_SHIFT_TEST(sshr, 16b, 16b, 8)
   4150 GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
   4151 GEN_SHIFT_TEST(sshr, 8b, 8b, 8)
   4152 GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
   4153 GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
   4154 GEN_SHIFT_TEST(ushr, 2d, 2d, 64)
   4155 GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
   4156 GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
   4157 GEN_SHIFT_TEST(ushr, 4s, 4s, 32)
   4158 GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
   4159 GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
   4160 GEN_SHIFT_TEST(ushr, 2s, 2s, 32)
   4161 GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
   4162 GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
   4163 GEN_SHIFT_TEST(ushr, 8h, 8h, 16)
   4164 GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
   4165 GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
   4166 GEN_SHIFT_TEST(ushr, 4h, 4h, 16)
   4167 GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
   4168 GEN_SHIFT_TEST(ushr, 16b, 16b, 8)
   4169 GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
   4170 GEN_SHIFT_TEST(ushr, 8b, 8b, 8)
   4171 
   4172 GEN_TWOVEC_TEST(ssra_d_d_1,  "ssra d5, d28, #1",  5, 28)
   4173 GEN_TWOVEC_TEST(ssra_d_d_32, "ssra d5, d28, #32", 5, 28)
   4174 GEN_TWOVEC_TEST(ssra_d_d_64, "ssra d5, d28, #64", 5, 28)
   4175 GEN_TWOVEC_TEST(usra_d_d_1,  "usra d5, d28, #1",  5, 28)
   4176 GEN_TWOVEC_TEST(usra_d_d_32, "usra d5, d28, #32", 5, 28)
   4177 GEN_TWOVEC_TEST(usra_d_d_64, "usra d5, d28, #64", 5, 28)
   4178 
   4179 GEN_TWOVEC_TEST(ssra_2d_2d_1,   "ssra v6.2d,  v27.2d, #1",  6, 27)
   4180 GEN_TWOVEC_TEST(ssra_2d_2d_32,  "ssra v6.2d,  v27.2d, #32", 6, 27)
   4181 GEN_TWOVEC_TEST(ssra_2d_2d_64,  "ssra v6.2d,  v27.2d, #64", 6, 27)
   4182 GEN_TWOVEC_TEST(ssra_4s_4s_1,   "ssra v6.4s,  v27.4s, #1",  6, 27)
   4183 GEN_TWOVEC_TEST(ssra_4s_4s_16,  "ssra v6.4s,  v27.4s, #16", 6, 27)
   4184 GEN_TWOVEC_TEST(ssra_4s_4s_32,  "ssra v6.4s,  v27.4s, #32", 6, 27)
   4185 GEN_TWOVEC_TEST(ssra_2s_2s_1,   "ssra v6.2s,  v27.2s, #1",  6, 27)
   4186 GEN_TWOVEC_TEST(ssra_2s_2s_16,  "ssra v6.2s,  v27.2s, #16", 6, 27)
   4187 GEN_TWOVEC_TEST(ssra_2s_2s_32,  "ssra v6.2s,  v27.2s, #32", 6, 27)
   4188 GEN_TWOVEC_TEST(ssra_8h_8h_1,   "ssra v6.8h,  v27.8h, #1",  6, 27)
   4189 GEN_TWOVEC_TEST(ssra_8h_8h_8,   "ssra v6.8h,  v27.8h, #8",  6, 27)
   4190 GEN_TWOVEC_TEST(ssra_8h_8h_16,  "ssra v6.8h,  v27.8h, #16", 6, 27)
   4191 GEN_TWOVEC_TEST(ssra_4h_4h_1,   "ssra v6.4h,  v27.4h, #1",  6, 27)
   4192 GEN_TWOVEC_TEST(ssra_4h_4h_8,   "ssra v6.4h,  v27.4h, #8",  6, 27)
   4193 GEN_TWOVEC_TEST(ssra_4h_4h_16,  "ssra v6.4h,  v27.4h, #16", 6, 27)
   4194 GEN_TWOVEC_TEST(ssra_16b_16b_1, "ssra v6.16b, v27.16b, #1", 6, 27)
   4195 GEN_TWOVEC_TEST(ssra_16b_16b_3, "ssra v6.16b, v27.16b, #3", 6, 27)
   4196 GEN_TWOVEC_TEST(ssra_16b_16b_8, "ssra v6.16b, v27.16b, #8", 6, 27)
   4197 GEN_TWOVEC_TEST(ssra_8b_8b_1,   "ssra v6.8b,  v27.8b, #1",  6, 27)
   4198 GEN_TWOVEC_TEST(ssra_8b_8b_3,   "ssra v6.8b,  v27.8b, #3",  6, 27)
   4199 GEN_TWOVEC_TEST(ssra_8b_8b_8,   "ssra v6.8b,  v27.8b, #8",  6, 27)
   4200 GEN_TWOVEC_TEST(usra_2d_2d_1,   "usra v6.2d,  v27.2d, #1",  6, 27)
   4201 GEN_TWOVEC_TEST(usra_2d_2d_32,  "usra v6.2d,  v27.2d, #32", 6, 27)
   4202 GEN_TWOVEC_TEST(usra_2d_2d_64,  "usra v6.2d,  v27.2d, #64", 6, 27)
   4203 GEN_TWOVEC_TEST(usra_4s_4s_1,   "usra v6.4s,  v27.4s, #1",  6, 27)
   4204 GEN_TWOVEC_TEST(usra_4s_4s_16,  "usra v6.4s,  v27.4s, #16", 6, 27)
   4205 GEN_TWOVEC_TEST(usra_4s_4s_32,  "usra v6.4s,  v27.4s, #32", 6, 27)
   4206 GEN_TWOVEC_TEST(usra_2s_2s_1,   "usra v6.2s,  v27.2s, #1",  6, 27)
   4207 GEN_TWOVEC_TEST(usra_2s_2s_16,  "usra v6.2s,  v27.2s, #16", 6, 27)
   4208 GEN_TWOVEC_TEST(usra_2s_2s_32,  "usra v6.2s,  v27.2s, #32", 6, 27)
   4209 GEN_TWOVEC_TEST(usra_8h_8h_1,   "usra v6.8h,  v27.8h, #1",  6, 27)
   4210 GEN_TWOVEC_TEST(usra_8h_8h_8,   "usra v6.8h,  v27.8h, #8",  6, 27)
   4211 GEN_TWOVEC_TEST(usra_8h_8h_16,  "usra v6.8h,  v27.8h, #16", 6, 27)
   4212 GEN_TWOVEC_TEST(usra_4h_4h_1,   "usra v6.4h,  v27.4h, #1",  6, 27)
   4213 GEN_TWOVEC_TEST(usra_4h_4h_8,   "usra v6.4h,  v27.4h, #8",  6, 27)
   4214 GEN_TWOVEC_TEST(usra_4h_4h_16,  "usra v6.4h,  v27.4h, #16", 6, 27)
   4215 GEN_TWOVEC_TEST(usra_16b_16b_1, "usra v6.16b, v27.16b, #1", 6, 27)
   4216 GEN_TWOVEC_TEST(usra_16b_16b_3, "usra v6.16b, v27.16b, #3", 6, 27)
   4217 GEN_TWOVEC_TEST(usra_16b_16b_8, "usra v6.16b, v27.16b, #8", 6, 27)
   4218 GEN_TWOVEC_TEST(usra_8b_8b_1,   "usra v6.8b,  v27.8b, #1",  6, 27)
   4219 GEN_TWOVEC_TEST(usra_8b_8b_3,   "usra v6.8b,  v27.8b, #3",  6, 27)
   4220 GEN_TWOVEC_TEST(usra_8b_8b_8,   "usra v6.8b,  v27.8b, #8",  6, 27)
   4221 
   4222 GEN_THREEVEC_TEST(srshl_d_d_d, "srshl d29, d28, d27", 29, 28, 27)
   4223 GEN_THREEVEC_TEST(urshl_d_d_d, "urshl d29, d28, d27", 29, 28, 27)
   4224 
   4225 GEN_THREEVEC_TEST(srshl_2d_2d_2d,   "srshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4226 GEN_THREEVEC_TEST(srshl_4s_4s_4s,   "srshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4227 GEN_THREEVEC_TEST(srshl_2s_2s_2s,   "srshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4228 GEN_THREEVEC_TEST(srshl_8h_8h_8h,   "srshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4229 GEN_THREEVEC_TEST(srshl_4h_4h_4h,   "srshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4230 GEN_THREEVEC_TEST(srshl_16b_16b_16b,"srshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4231 GEN_THREEVEC_TEST(srshl_8b_8b_8b,   "srshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4232 GEN_THREEVEC_TEST(urshl_2d_2d_2d,   "urshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4233 GEN_THREEVEC_TEST(urshl_4s_4s_4s,   "urshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4234 GEN_THREEVEC_TEST(urshl_2s_2s_2s,   "urshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4235 GEN_THREEVEC_TEST(urshl_8h_8h_8h,   "urshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4236 GEN_THREEVEC_TEST(urshl_4h_4h_4h,   "urshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4237 GEN_THREEVEC_TEST(urshl_16b_16b_16b,"urshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4238 GEN_THREEVEC_TEST(urshl_8b_8b_8b,   "urshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4239 
   4240 GEN_TWOVEC_TEST(srshr_d_d_1,  "srshr d5, d28, #1",  5, 28)
   4241 GEN_TWOVEC_TEST(srshr_d_d_32, "srshr d5, d28, #32", 5, 28)
   4242 GEN_TWOVEC_TEST(srshr_d_d_64, "srshr d5, d28, #64", 5, 28)
   4243 GEN_TWOVEC_TEST(urshr_d_d_1,  "urshr d5, d28, #1",  5, 28)
   4244 GEN_TWOVEC_TEST(urshr_d_d_32, "urshr d5, d28, #32", 5, 28)
   4245 GEN_TWOVEC_TEST(urshr_d_d_64, "urshr d5, d28, #64", 5, 28)
   4246 
   4247 GEN_TWOVEC_TEST(srshr_2d_2d_1,   "srshr v6.2d,  v27.2d, #1",  6, 27)
   4248 GEN_TWOVEC_TEST(srshr_2d_2d_32,  "srshr v6.2d,  v27.2d, #32", 6, 27)
   4249 GEN_TWOVEC_TEST(srshr_2d_2d_64,  "srshr v6.2d,  v27.2d, #64", 6, 27)
   4250 GEN_TWOVEC_TEST(srshr_4s_4s_1,   "srshr v6.4s,  v27.4s, #1",  6, 27)
   4251 GEN_TWOVEC_TEST(srshr_4s_4s_16,  "srshr v6.4s,  v27.4s, #16", 6, 27)
   4252 GEN_TWOVEC_TEST(srshr_4s_4s_32,  "srshr v6.4s,  v27.4s, #32", 6, 27)
   4253 GEN_TWOVEC_TEST(srshr_2s_2s_1,   "srshr v6.2s,  v27.2s, #1",  6, 27)
   4254 GEN_TWOVEC_TEST(srshr_2s_2s_16,  "srshr v6.2s,  v27.2s, #16", 6, 27)
   4255 GEN_TWOVEC_TEST(srshr_2s_2s_32,  "srshr v6.2s,  v27.2s, #32", 6, 27)
   4256 GEN_TWOVEC_TEST(srshr_8h_8h_1,   "srshr v6.8h,  v27.8h, #1",  6, 27)
   4257 GEN_TWOVEC_TEST(srshr_8h_8h_8,   "srshr v6.8h,  v27.8h, #8",  6, 27)
   4258 GEN_TWOVEC_TEST(srshr_8h_8h_16,  "srshr v6.8h,  v27.8h, #16", 6, 27)
   4259 GEN_TWOVEC_TEST(srshr_4h_4h_1,   "srshr v6.4h,  v27.4h, #1",  6, 27)
   4260 GEN_TWOVEC_TEST(srshr_4h_4h_8,   "srshr v6.4h,  v27.4h, #8",  6, 27)
   4261 GEN_TWOVEC_TEST(srshr_4h_4h_16,  "srshr v6.4h,  v27.4h, #16", 6, 27)
   4262 GEN_TWOVEC_TEST(srshr_16b_16b_1, "srshr v6.16b, v27.16b, #1", 6, 27)
   4263 GEN_TWOVEC_TEST(srshr_16b_16b_3, "srshr v6.16b, v27.16b, #3", 6, 27)
   4264 GEN_TWOVEC_TEST(srshr_16b_16b_8, "srshr v6.16b, v27.16b, #8", 6, 27)
   4265 GEN_TWOVEC_TEST(srshr_8b_8b_1,   "srshr v6.8b,  v27.8b, #1",  6, 27)
   4266 GEN_TWOVEC_TEST(srshr_8b_8b_3,   "srshr v6.8b,  v27.8b, #3",  6, 27)
   4267 GEN_TWOVEC_TEST(srshr_8b_8b_8,   "srshr v6.8b,  v27.8b, #8",  6, 27)
   4268 GEN_TWOVEC_TEST(urshr_2d_2d_1,   "urshr v6.2d,  v27.2d, #1",  6, 27)
   4269 GEN_TWOVEC_TEST(urshr_2d_2d_32,  "urshr v6.2d,  v27.2d, #32", 6, 27)
   4270 GEN_TWOVEC_TEST(urshr_2d_2d_64,  "urshr v6.2d,  v27.2d, #64", 6, 27)
   4271 GEN_TWOVEC_TEST(urshr_4s_4s_1,   "urshr v6.4s,  v27.4s, #1",  6, 27)
   4272 GEN_TWOVEC_TEST(urshr_4s_4s_16,  "urshr v6.4s,  v27.4s, #16", 6, 27)
   4273 GEN_TWOVEC_TEST(urshr_4s_4s_32,  "urshr v6.4s,  v27.4s, #32", 6, 27)
   4274 GEN_TWOVEC_TEST(urshr_2s_2s_1,   "urshr v6.2s,  v27.2s, #1",  6, 27)
   4275 GEN_TWOVEC_TEST(urshr_2s_2s_16,  "urshr v6.2s,  v27.2s, #16", 6, 27)
   4276 GEN_TWOVEC_TEST(urshr_2s_2s_32,  "urshr v6.2s,  v27.2s, #32", 6, 27)
   4277 GEN_TWOVEC_TEST(urshr_8h_8h_1,   "urshr v6.8h,  v27.8h, #1",  6, 27)
   4278 GEN_TWOVEC_TEST(urshr_8h_8h_8,   "urshr v6.8h,  v27.8h, #8",  6, 27)
   4279 GEN_TWOVEC_TEST(urshr_8h_8h_16,  "urshr v6.8h,  v27.8h, #16", 6, 27)
   4280 GEN_TWOVEC_TEST(urshr_4h_4h_1,   "urshr v6.4h,  v27.4h, #1",  6, 27)
   4281 GEN_TWOVEC_TEST(urshr_4h_4h_8,   "urshr v6.4h,  v27.4h, #8",  6, 27)
   4282 GEN_TWOVEC_TEST(urshr_4h_4h_16,  "urshr v6.4h,  v27.4h, #16", 6, 27)
   4283 GEN_TWOVEC_TEST(urshr_16b_16b_1, "urshr v6.16b, v27.16b, #1", 6, 27)
   4284 GEN_TWOVEC_TEST(urshr_16b_16b_3, "urshr v6.16b, v27.16b, #3", 6, 27)
   4285 GEN_TWOVEC_TEST(urshr_16b_16b_8, "urshr v6.16b, v27.16b, #8", 6, 27)
   4286 GEN_TWOVEC_TEST(urshr_8b_8b_1,   "urshr v6.8b,  v27.8b, #1",  6, 27)
   4287 GEN_TWOVEC_TEST(urshr_8b_8b_3,   "urshr v6.8b,  v27.8b, #3",  6, 27)
   4288 GEN_TWOVEC_TEST(urshr_8b_8b_8,   "urshr v6.8b,  v27.8b, #8",  6, 27)
   4289 
   4290 GEN_TWOVEC_TEST(srsra_d_d_1,  "srsra d5, d28, #1",  5, 28)
   4291 GEN_TWOVEC_TEST(srsra_d_d_32, "srsra d5, d28, #32", 5, 28)
   4292 GEN_TWOVEC_TEST(srsra_d_d_64, "srsra d5, d28, #64", 5, 28)
   4293 GEN_TWOVEC_TEST(ursra_d_d_1,  "ursra d5, d28, #1",  5, 28)
   4294 GEN_TWOVEC_TEST(ursra_d_d_32, "ursra d5, d28, #32", 5, 28)
   4295 GEN_TWOVEC_TEST(ursra_d_d_64, "ursra d5, d28, #64", 5, 28)
   4296 
   4297 GEN_TWOVEC_TEST(srsra_2d_2d_1,   "srsra v6.2d,  v27.2d, #1",  6, 27)
   4298 GEN_TWOVEC_TEST(srsra_2d_2d_32,  "srsra v6.2d,  v27.2d, #32", 6, 27)
   4299 GEN_TWOVEC_TEST(srsra_2d_2d_64,  "srsra v6.2d,  v27.2d, #64", 6, 27)
   4300 GEN_TWOVEC_TEST(srsra_4s_4s_1,   "srsra v6.4s,  v27.4s, #1",  6, 27)
   4301 GEN_TWOVEC_TEST(srsra_4s_4s_16,  "srsra v6.4s,  v27.4s, #16", 6, 27)
   4302 GEN_TWOVEC_TEST(srsra_4s_4s_32,  "srsra v6.4s,  v27.4s, #32", 6, 27)
   4303 GEN_TWOVEC_TEST(srsra_2s_2s_1,   "srsra v6.2s,  v27.2s, #1",  6, 27)
   4304 GEN_TWOVEC_TEST(srsra_2s_2s_16,  "srsra v6.2s,  v27.2s, #16", 6, 27)
   4305 GEN_TWOVEC_TEST(srsra_2s_2s_32,  "srsra v6.2s,  v27.2s, #32", 6, 27)
   4306 GEN_TWOVEC_TEST(srsra_8h_8h_1,   "srsra v6.8h,  v27.8h, #1",  6, 27)
   4307 GEN_TWOVEC_TEST(srsra_8h_8h_8,   "srsra v6.8h,  v27.8h, #8",  6, 27)
   4308 GEN_TWOVEC_TEST(srsra_8h_8h_16,  "srsra v6.8h,  v27.8h, #16", 6, 27)
   4309 GEN_TWOVEC_TEST(srsra_4h_4h_1,   "srsra v6.4h,  v27.4h, #1",  6, 27)
   4310 GEN_TWOVEC_TEST(srsra_4h_4h_8,   "srsra v6.4h,  v27.4h, #8",  6, 27)
   4311 GEN_TWOVEC_TEST(srsra_4h_4h_16,  "srsra v6.4h,  v27.4h, #16", 6, 27)
   4312 GEN_TWOVEC_TEST(srsra_16b_16b_1, "srsra v6.16b, v27.16b, #1", 6, 27)
   4313 GEN_TWOVEC_TEST(srsra_16b_16b_3, "srsra v6.16b, v27.16b, #3", 6, 27)
   4314 GEN_TWOVEC_TEST(srsra_16b_16b_8, "srsra v6.16b, v27.16b, #8", 6, 27)
   4315 GEN_TWOVEC_TEST(srsra_8b_8b_1,   "srsra v6.8b,  v27.8b, #1",  6, 27)
   4316 GEN_TWOVEC_TEST(srsra_8b_8b_3,   "srsra v6.8b,  v27.8b, #3",  6, 27)
   4317 GEN_TWOVEC_TEST(srsra_8b_8b_8,   "srsra v6.8b,  v27.8b, #8",  6, 27)
   4318 GEN_TWOVEC_TEST(ursra_2d_2d_1,   "ursra v6.2d,  v27.2d, #1",  6, 27)
   4319 GEN_TWOVEC_TEST(ursra_2d_2d_32,  "ursra v6.2d,  v27.2d, #32", 6, 27)
   4320 GEN_TWOVEC_TEST(ursra_2d_2d_64,  "ursra v6.2d,  v27.2d, #64", 6, 27)
   4321 GEN_TWOVEC_TEST(ursra_4s_4s_1,   "ursra v6.4s,  v27.4s, #1",  6, 27)
   4322 GEN_TWOVEC_TEST(ursra_4s_4s_16,  "ursra v6.4s,  v27.4s, #16", 6, 27)
   4323 GEN_TWOVEC_TEST(ursra_4s_4s_32,  "ursra v6.4s,  v27.4s, #32", 6, 27)
   4324 GEN_TWOVEC_TEST(ursra_2s_2s_1,   "ursra v6.2s,  v27.2s, #1",  6, 27)
   4325 GEN_TWOVEC_TEST(ursra_2s_2s_16,  "ursra v6.2s,  v27.2s, #16", 6, 27)
   4326 GEN_TWOVEC_TEST(ursra_2s_2s_32,  "ursra v6.2s,  v27.2s, #32", 6, 27)
   4327 GEN_TWOVEC_TEST(ursra_8h_8h_1,   "ursra v6.8h,  v27.8h, #1",  6, 27)
   4328 GEN_TWOVEC_TEST(ursra_8h_8h_8,   "ursra v6.8h,  v27.8h, #8",  6, 27)
   4329 GEN_TWOVEC_TEST(ursra_8h_8h_16,  "ursra v6.8h,  v27.8h, #16", 6, 27)
   4330 GEN_TWOVEC_TEST(ursra_4h_4h_1,   "ursra v6.4h,  v27.4h, #1",  6, 27)
   4331 GEN_TWOVEC_TEST(ursra_4h_4h_8,   "ursra v6.4h,  v27.4h, #8",  6, 27)
   4332 GEN_TWOVEC_TEST(ursra_4h_4h_16,  "ursra v6.4h,  v27.4h, #16", 6, 27)
   4333 GEN_TWOVEC_TEST(ursra_16b_16b_1, "ursra v6.16b, v27.16b, #1", 6, 27)
   4334 GEN_TWOVEC_TEST(ursra_16b_16b_3, "ursra v6.16b, v27.16b, #3", 6, 27)
   4335 GEN_TWOVEC_TEST(ursra_16b_16b_8, "ursra v6.16b, v27.16b, #8", 6, 27)
   4336 GEN_TWOVEC_TEST(ursra_8b_8b_1,   "ursra v6.8b,  v27.8b, #1",  6, 27)
   4337 GEN_TWOVEC_TEST(ursra_8b_8b_3,   "ursra v6.8b,  v27.8b, #3",  6, 27)
   4338 GEN_TWOVEC_TEST(ursra_8b_8b_8,   "ursra v6.8b,  v27.8b, #8",  6, 27)
   4339 
   4340 GEN_SHIFT_TEST(sshll,  2d, 2s,  0)
   4341 GEN_SHIFT_TEST(sshll,  2d, 2s,  15)
   4342 GEN_SHIFT_TEST(sshll,  2d, 2s,  31)
   4343 GEN_SHIFT_TEST(sshll2, 2d, 4s,  0)
   4344 GEN_SHIFT_TEST(sshll2, 2d, 4s,  15)
   4345 GEN_SHIFT_TEST(sshll2, 2d, 4s,  31)
   4346 GEN_SHIFT_TEST(sshll,  4s, 4h,  0)
   4347 GEN_SHIFT_TEST(sshll,  4s, 4h,  7)
   4348 GEN_SHIFT_TEST(sshll,  4s, 4h,  15)
   4349 GEN_SHIFT_TEST(sshll2, 4s, 8h,  0)
   4350 GEN_SHIFT_TEST(sshll2, 4s, 8h,  7)
   4351 GEN_SHIFT_TEST(sshll2, 4s, 8h,  15)
   4352 GEN_SHIFT_TEST(sshll,  8h, 8b,  0)
   4353 GEN_SHIFT_TEST(sshll,  8h, 8b,  3)
   4354 GEN_SHIFT_TEST(sshll,  8h, 8b,  7)
   4355 GEN_SHIFT_TEST(sshll2, 8h, 16b, 0)
   4356 GEN_SHIFT_TEST(sshll2, 8h, 16b, 3)
   4357 GEN_SHIFT_TEST(sshll2, 8h, 16b, 7)
   4358 GEN_SHIFT_TEST(ushll,  2d, 2s, 0)
   4359 GEN_SHIFT_TEST(ushll,  2d, 2s, 15)
   4360 GEN_SHIFT_TEST(ushll,  2d, 2s, 31)
   4361 GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
   4362 GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
   4363 GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
   4364 GEN_SHIFT_TEST(ushll,  4s, 4h,  0)
   4365 GEN_SHIFT_TEST(ushll,  4s, 4h,  7)
   4366 GEN_SHIFT_TEST(ushll,  4s, 4h,  15)
   4367 GEN_SHIFT_TEST(ushll2, 4s, 8h,  0)
   4368 GEN_SHIFT_TEST(ushll2, 4s, 8h,  7)
   4369 GEN_SHIFT_TEST(ushll2, 4s, 8h,  15)
   4370 GEN_SHIFT_TEST(ushll,  8h, 8b,  0)
   4371 GEN_SHIFT_TEST(ushll,  8h, 8b,  3)
   4372 GEN_SHIFT_TEST(ushll,  8h, 8b,  7)
   4373 GEN_SHIFT_TEST(ushll2, 8h, 16b, 0)
   4374 GEN_SHIFT_TEST(ushll2, 8h, 16b, 3)
   4375 GEN_SHIFT_TEST(ushll2, 8h, 16b, 7)
   4376 
   4377 GEN_TWOVEC_TEST(suqadd_d_d,  "suqadd d22, d23",   22, 23)
   4378 GEN_TWOVEC_TEST(suqadd_s_s,  "suqadd s22, s23",   22, 23)
   4379 GEN_TWOVEC_TEST(suqadd_h_h,  "suqadd h22, h23",   22, 23)
   4380 GEN_TWOVEC_TEST(suqadd_b_b,  "suqadd b22, b23",   22, 23)
   4381 GEN_TWOVEC_TEST(usqadd_d_d,  "usqadd d22, d23",   22, 23)
   4382 GEN_TWOVEC_TEST(usqadd_s_s,  "usqadd s22, s23",   22, 23)
   4383 GEN_TWOVEC_TEST(usqadd_h_h,  "usqadd h22, h23",   22, 23)
   4384 GEN_TWOVEC_TEST(usqadd_b_b,  "usqadd b22, b23",   22, 23)
   4385 
   4386 GEN_TWOVEC_TEST(suqadd_2d_2d,   "suqadd v6.2d,  v27.2d",  6, 27)
   4387 GEN_TWOVEC_TEST(suqadd_4s_4s,   "suqadd v6.4s,  v27.4s",  6, 27)
   4388 GEN_TWOVEC_TEST(suqadd_2s_2s,   "suqadd v6.2s,  v27.2s",  6, 27)
   4389 GEN_TWOVEC_TEST(suqadd_8h_8h,   "suqadd v6.8h,  v27.8h",  6, 27)
   4390 GEN_TWOVEC_TEST(suqadd_4h_4h,   "suqadd v6.4h,  v27.4h",  6, 27)
   4391 GEN_TWOVEC_TEST(suqadd_16b_16b, "suqadd v6.16b, v27.16b", 6, 27)
   4392 GEN_TWOVEC_TEST(suqadd_8b_8b,   "suqadd v6.8b,  v27.8b",  6, 27)
   4393 GEN_TWOVEC_TEST(usqadd_2d_2d,   "usqadd v6.2d,  v27.2d",  6, 27)
   4394 GEN_TWOVEC_TEST(usqadd_4s_4s,   "usqadd v6.4s,  v27.4s",  6, 27)
   4395 GEN_TWOVEC_TEST(usqadd_2s_2s,   "usqadd v6.2s,  v27.2s",  6, 27)
   4396 GEN_TWOVEC_TEST(usqadd_8h_8h,   "usqadd v6.8h,  v27.8h",  6, 27)
   4397 GEN_TWOVEC_TEST(usqadd_4h_4h,   "usqadd v6.4h,  v27.4h",  6, 27)
   4398 GEN_TWOVEC_TEST(usqadd_16b_16b, "usqadd v6.16b, v27.16b", 6, 27)
   4399 GEN_TWOVEC_TEST(usqadd_8b_8b,   "usqadd v6.8b,  v27.8b",  6, 27)
   4400 
   4401 // Uses v15 as the first table entry
   4402 GEN_THREEVEC_TEST(
   4403    tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4404 // and v15 ^ v21 as the second table entry
   4405 GEN_THREEVEC_TEST(
   4406    tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4407                  "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4408 // and v15 ^ v23 as the third table entry
   4409 GEN_THREEVEC_TEST(
   4410    tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4411                  "eor v17.16b, v15.16b, v23.16b ; "
   4412                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4413                  21, 15, 23)
   4414 // and v21 ^ v23 as the fourth table entry
   4415 GEN_THREEVEC_TEST(
   4416    tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4417                  "eor v17.16b, v15.16b, v23.16b ; "
   4418                  "eor v18.16b, v21.16b, v23.16b ; "
   4419                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4420                  21, 15, 23)
   4421 // Same register scheme for tbl .8b, tbx .16b, tbx.8b
   4422 GEN_THREEVEC_TEST(
   4423    tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4424 GEN_THREEVEC_TEST(
   4425    tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4426                 "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4427 GEN_THREEVEC_TEST(
   4428    tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4429                 "eor v17.16b, v15.16b, v23.16b ; "
   4430                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4431                 21, 15, 23)
   4432 GEN_THREEVEC_TEST(
   4433    tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4434                 "eor v17.16b, v15.16b, v23.16b ; "
   4435                 "eor v18.16b, v21.16b, v23.16b ; "
   4436                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4437                 21, 15, 23)
   4438 
   4439 GEN_THREEVEC_TEST(
   4440    tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4441 GEN_THREEVEC_TEST(
   4442    tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4443                  "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4444 GEN_THREEVEC_TEST(
   4445    tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4446                  "eor v17.16b, v15.16b, v23.16b ; "
   4447                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4448                  21, 15, 23)
   4449 GEN_THREEVEC_TEST(
   4450    tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4451                  "eor v17.16b, v15.16b, v23.16b ; "
   4452                  "eor v18.16b, v21.16b, v23.16b ; "
   4453                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4454                  21, 15, 23)
   4455 // Same register scheme for tbx .8b, tbx .16b, tbx.8b
   4456 GEN_THREEVEC_TEST(
   4457    tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4458 GEN_THREEVEC_TEST(
   4459    tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4460                 "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4461 GEN_THREEVEC_TEST(
   4462    tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4463                 "eor v17.16b, v15.16b, v23.16b ; "
   4464                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4465                 21, 15, 23)
   4466 GEN_THREEVEC_TEST(
   4467    tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4468                 "eor v17.16b, v15.16b, v23.16b ; "
   4469                 "eor v18.16b, v21.16b, v23.16b ; "
   4470                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4471                 21, 15, 23)
   4472 
   4473 GEN_THREEVEC_TEST(trn1_2d_2d_2d,    "trn1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4474 GEN_THREEVEC_TEST(trn1_4s_4s_4s,    "trn1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4475 GEN_THREEVEC_TEST(trn1_2s_2s_2s,    "trn1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4476 GEN_THREEVEC_TEST(trn1_8h_8h_8h,    "trn1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4477 GEN_THREEVEC_TEST(trn1_4h_4h_4h,    "trn1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4478 GEN_THREEVEC_TEST(trn1_16b_16b_16b, "trn1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4479 GEN_THREEVEC_TEST(trn1_8b_8b_8b,    "trn1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4480 GEN_THREEVEC_TEST(trn2_2d_2d_2d,    "trn2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4481 GEN_THREEVEC_TEST(trn2_4s_4s_4s,    "trn2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4482 GEN_THREEVEC_TEST(trn2_2s_2s_2s,    "trn2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4483 GEN_THREEVEC_TEST(trn2_8h_8h_8h,    "trn2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4484 GEN_THREEVEC_TEST(trn2_4h_4h_4h,    "trn2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4485 GEN_THREEVEC_TEST(trn2_16b_16b_16b, "trn2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4486 GEN_THREEVEC_TEST(trn2_8b_8b_8b,    "trn2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4487 
   4488 GEN_TWOVEC_TEST(urecpe_4s_4s,   "urecpe v6.4s,  v27.4s",  6, 27)
   4489 GEN_TWOVEC_TEST(urecpe_2s_2s,   "urecpe v6.2s,  v27.2s",  6, 27)
   4490 GEN_TWOVEC_TEST(ursqrte_4s_4s,   "ursqrte v6.4s,  v27.4s",  6, 27)
   4491 GEN_TWOVEC_TEST(ursqrte_2s_2s,   "ursqrte v6.2s,  v27.2s",  6, 27)
   4492 
   4493 GEN_THREEVEC_TEST(uzp1_2d_2d_2d,    "uzp1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4494 GEN_THREEVEC_TEST(uzp1_4s_4s_4s,    "uzp1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4495 GEN_THREEVEC_TEST(uzp1_2s_2s_2s,    "uzp1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4496 GEN_THREEVEC_TEST(uzp1_8h_8h_8h,    "uzp1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4497 GEN_THREEVEC_TEST(uzp1_4h_4h_4h,    "uzp1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4498 GEN_THREEVEC_TEST(uzp1_16b_16b_16b, "uzp1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4499 GEN_THREEVEC_TEST(uzp1_8b_8b_8b,    "uzp1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4500 GEN_THREEVEC_TEST(uzp2_2d_2d_2d,    "uzp2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4501 GEN_THREEVEC_TEST(uzp2_4s_4s_4s,    "uzp2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4502 GEN_THREEVEC_TEST(uzp2_2s_2s_2s,    "uzp2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4503 GEN_THREEVEC_TEST(uzp2_8h_8h_8h,    "uzp2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4504 GEN_THREEVEC_TEST(uzp2_4h_4h_4h,    "uzp2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4505 GEN_THREEVEC_TEST(uzp2_16b_16b_16b, "uzp2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4506 GEN_THREEVEC_TEST(uzp2_8b_8b_8b,    "uzp2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4507 GEN_THREEVEC_TEST(zip1_2d_2d_2d,    "zip1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4508 GEN_THREEVEC_TEST(zip1_4s_4s_4s,    "zip1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4509 GEN_THREEVEC_TEST(zip1_2s_2s_2s,    "zip1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4510 GEN_THREEVEC_TEST(zip1_8h_8h_8h,    "zip1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4511 GEN_THREEVEC_TEST(zip1_4h_4h_4h,    "zip1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4512 GEN_THREEVEC_TEST(zip1_16b_16b_16b, "zip1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4513 GEN_THREEVEC_TEST(zip1_8b_8b_8b,    "zip1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4514 GEN_THREEVEC_TEST(zip2_2d_2d_2d,    "zip2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4515 GEN_THREEVEC_TEST(zip2_4s_4s_4s,    "zip2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4516 GEN_THREEVEC_TEST(zip2_2s_2s_2s,    "zip2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4517 GEN_THREEVEC_TEST(zip2_8h_8h_8h,    "zip2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4518 GEN_THREEVEC_TEST(zip2_4h_4h_4h,    "zip2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4519 GEN_THREEVEC_TEST(zip2_16b_16b_16b, "zip2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4520 GEN_THREEVEC_TEST(zip2_8b_8b_8b,    "zip2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4521 
   4522 GEN_UNARY_TEST(xtn,  2s, 2d)
   4523 GEN_UNARY_TEST(xtn2, 4s, 2d)
   4524 GEN_UNARY_TEST(xtn,  4h, 4s)
   4525 GEN_UNARY_TEST(xtn2, 8h, 4s)
   4526 GEN_UNARY_TEST(xtn,  8b, 8h)
   4527 GEN_UNARY_TEST(xtn2, 16b, 8h)
   4528 
   4529 // ======================== MEM ========================
   4530 
   4531 // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   4532 
   4533 // ======================== CRYPTO ========================
   4534 
   4535 // These tests are believed to be correct but are disabled because
   4536 // GNU assembler (GNU Binutils) 2.24.0.20140311 Linaro 2014.03
   4537 // cannot be persuaded to accept those instructions (AFAICT).
   4538 
   4539 //GEN_TWOVEC_TEST(aesd_16b_16b,    "aesd v6.16b,  v27.16b",  6, 27)
   4540 //GEN_TWOVEC_TEST(aese_16b_16b,    "aese v6.16b,  v27.16b",  6, 27)
   4541 //GEN_TWOVEC_TEST(aesimc_16b_16b,  "aesimc v6.16b,  v27.16b",  6, 27)
   4542 //GEN_TWOVEC_TEST(aesmc_16b_16b,   "aesmc v6.16b,  v27.16b",  6, 27)
   4543 //
   4544 //GEN_THREEVEC_TEST(sha1c_q_s_4s,     "sha1c q29, s28, v27.4s", 29,28,27)
   4545 //GEN_TWOVEC_TEST(sha1h_s_s,          "sha1h s6,  s27",  6, 27)
   4546 //GEN_THREEVEC_TEST(sha1m_q_s_4s,     "sha1m q29, s28, v27.4s", 29,28,27)
   4547 //GEN_THREEVEC_TEST(sha1p_q_s_4s,     "sha1p q29, s28, v27.4s", 29,28,27)
   4548 //GEN_THREEVEC_TEST(sha1su0_4s_4s_4s, "sha1su0 v29.4s, v28.4s, v27.4s", 29,28,27)
   4549 //GEN_TWOVEC_TEST(sha1su1_4s_4s,      "sha1su1 v6.4s,  v27.4s",  6, 27)
   4550 //
   4551 //GEN_THREEVEC_TEST(sha256h2_q_q_4s,  "sha256h2 q29, q28, v27.4s", 29,28,27)
   4552 //GEN_THREEVEC_TEST(sha256h_q_q_4s,   "sha256h q29, q28, v27.4s", 29,28,27)
   4553 //GEN_TWOVEC_TEST(sha256su0_4s_4s,    "sha256su0 v6.4s,  v27.4s",  6, 27)
   4554 //GEN_THREEVEC_TEST(sha256su1_4s_4s_4s, "sha256su1 v29.4s, v28.4s, v27.4s",
   4555 //                                      29,28,27)
   4556 
   4557 
   4558 /* ---------------------------------------------------------------- */
   4559 /* -- main()                                                     -- */
   4560 /* ---------------------------------------------------------------- */
   4561 
   4562 int main ( void )
   4563 {
   4564    assert(sizeof(V128) == 16);
   4565 
   4566    // ======================== FP ========================
   4567 
   4568    // fabs      d,s
   4569    // fabs      2d,4s,2s
   4570    if (1) test_fabs_d_d(TyDF);
   4571    if (1) test_fabs_s_s(TySF);
   4572    if (1) test_fabs_2d_2d(TyDF);
   4573    if (1) test_fabs_4s_4s(TySF);
   4574    if (1) test_fabs_2s_2s(TyDF);
   4575 
   4576    // fneg      d,s
   4577    // fneg      2d,4s,2s
   4578    if (1) test_fneg_d_d(TyDF);
   4579    if (1) test_fneg_s_s(TySF);
   4580    if (1) test_fneg_2d_2d(TySF);
   4581    if (1) test_fneg_4s_4s(TyDF);
   4582    if (1) test_fneg_2s_2s(TySF);
   4583 
   4584    // fsqrt     d,s
   4585    // fsqrt     2d,4s,2s
   4586    if (1) test_fsqrt_d_d(TyDF);
   4587    if (1) test_fsqrt_s_s(TySF);
   4588    if (1) test_fsqrt_2d_2d(TySF);
   4589    if (1) test_fsqrt_4s_4s(TyDF);
   4590    if (1) test_fsqrt_2s_2s(TySF);
   4591 
   4592    // fadd      d,s
   4593    // fsub      d,s
   4594    if (1) test_fadd_d_d_d(TyDF);
   4595    if (1) test_fadd_s_s_s(TySF);
   4596    if (1) test_fsub_d_d_d(TyDF);
   4597    if (1) test_fsub_s_s_s(TySF);
   4598 
   4599    // fadd      2d,4s,2s
   4600    // fsub      2d,4s,2s
   4601    if (1) test_fadd_2d_2d_2d(TyDF);
   4602    if (1) test_fadd_4s_4s_4s(TySF);
   4603    if (1) test_fadd_2s_2s_2s(TySF);
   4604    if (1) test_fsub_2d_2d_2d(TyDF);
   4605    if (1) test_fsub_4s_4s_4s(TySF);
   4606    if (1) test_fsub_2s_2s_2s(TySF);
   4607 
   4608    // fabd      d,s
   4609    // fabd      2d,4s,2s
   4610    if (1) test_fabd_d_d_d(TyDF);
   4611    if (1) test_fabd_s_s_s(TySF);
   4612    if (1) test_fabd_2d_2d_2d(TyDF);
   4613    if (1) test_fabd_4s_4s_4s(TySF);
   4614    if (1) test_fabd_2s_2s_2s(TySF);
   4615 
   4616    // faddp     d,s (floating add pair)
   4617    // faddp     2d,4s,2s
   4618    if (1) test_faddp_d_2d(TyDF);
   4619    if (1) test_faddp_s_2s(TySF);
   4620    if (1) test_faddp_2d_2d_2d(TySF);
   4621    if (1) test_faddp_4s_4s_4s(TyDF);
   4622    if (1) test_faddp_2s_2s_2s(TySF);
   4623 
   4624    // fccmp     d,s (floating point conditional quiet compare)
   4625    // fccmpe    d,s (floating point conditional signaling compare)
   4626    if (1) DO50( test_FCCMP_D_D_0xF_EQ() );
   4627    if (1) DO50( test_FCCMP_D_D_0xF_NE() );
   4628    if (1) DO50( test_FCCMP_D_D_0x0_EQ() );
   4629    if (1) DO50( test_FCCMP_D_D_0x0_NE() );
   4630    if (1) DO50( test_FCCMP_S_S_0xF_EQ() );
   4631    if (1) DO50( test_FCCMP_S_S_0xF_NE() );
   4632    if (1) DO50( test_FCCMP_S_S_0x0_EQ() );
   4633    if (1) DO50( test_FCCMP_S_S_0x0_NE() );
   4634    if (1) DO50( test_FCCMPE_D_D_0xF_EQ() );
   4635    if (1) DO50( test_FCCMPE_D_D_0xF_NE() );
   4636    if (1) DO50( test_FCCMPE_D_D_0x0_EQ() );
   4637    if (1) DO50( test_FCCMPE_D_D_0x0_NE() );
   4638    if (1) DO50( test_FCCMPE_S_S_0xF_EQ() );
   4639    if (1) DO50( test_FCCMPE_S_S_0xF_NE() );
   4640    if (1) DO50( test_FCCMPE_S_S_0x0_EQ() );
   4641    if (1) DO50( test_FCCMPE_S_S_0x0_NE() );
   4642 
   4643    // fcmeq     d,s
   4644    // fcmge     d,s
   4645    // fcmgt     d,s
   4646    // facgt     d,s  (floating abs compare GE)
   4647    // facge     d,s  (floating abs compare GE)
   4648    if (1) DO50( test_FCMEQ_D_D_D() );
   4649    if (1) DO50( test_FCMEQ_S_S_S() );
   4650    if (1) DO50( test_FCMGE_D_D_D() );
   4651    if (1) DO50( test_FCMGE_S_S_S() );
   4652    if (1) DO50( test_FCMGT_D_D_D() );
   4653    if (1) DO50( test_FCMGT_S_S_S() );
   4654    if (1) DO50( test_FACGT_D_D_D() );
   4655    if (1) DO50( test_FACGT_S_S_S() );
   4656    if (1) DO50( test_FACGE_D_D_D() );
   4657    if (1) DO50( test_FACGE_S_S_S() );
   4658 
   4659    // fcmeq     2d,4s,2s
   4660    // fcmge     2d,4s,2s
   4661    // fcmgt     2d,4s,2s
   4662    // facge     2d,4s,2s
   4663    // facgt     2d,4s,2s
   4664    if (1) test_fcmeq_2d_2d_2d(TyDF);
   4665    if (1) test_fcmeq_4s_4s_4s(TySF);
   4666    if (1) test_fcmeq_2s_2s_2s(TySF);
   4667    if (1) test_fcmge_2d_2d_2d(TyDF);
   4668    if (1) test_fcmge_4s_4s_4s(TySF);
   4669    if (1) test_fcmge_2s_2s_2s(TySF);
   4670    if (1) test_fcmgt_2d_2d_2d(TyDF);
   4671    if (1) test_fcmgt_4s_4s_4s(TySF);
   4672    if (1) test_fcmgt_2s_2s_2s(TySF);
   4673    if (1) test_facge_2d_2d_2d(TyDF);
   4674    if (1) test_facge_4s_4s_4s(TySF);
   4675    if (1) test_facge_2s_2s_2s(TySF);
   4676    if (1) test_facgt_2d_2d_2d(TyDF);
   4677    if (1) test_facgt_4s_4s_4s(TySF);
   4678    if (1) test_facgt_2s_2s_2s(TySF);
   4679 
   4680    // fcmeq_z   d,s
   4681    // fcmge_z   d,s
   4682    // fcmgt_z   d,s
   4683    // fcmle_z   d,s
   4684    // fcmlt_z   d,s
   4685    if (1) DO50( test_FCMEQ_D_D_Z() );
   4686    if (1) DO50( test_FCMEQ_S_S_Z() );
   4687    if (1) DO50( test_FCMGE_D_D_Z() );
   4688    if (1) DO50( test_FCMGE_S_S_Z() );
   4689    if (1) DO50( test_FCMGT_D_D_Z() );
   4690    if (1) DO50( test_FCMGT_S_S_Z() );
   4691    if (1) DO50( test_FCMLE_D_D_Z() );
   4692    if (1) DO50( test_FCMLE_S_S_Z() );
   4693    if (1) DO50( test_FCMLT_D_D_Z() );
   4694    if (1) DO50( test_FCMLT_S_S_Z() );
   4695 
   4696    // fcmeq_z   2d,4s,2s
   4697    // fcmge_z   2d,4s,2s
   4698    // fcmgt_z   2d,4s,2s
   4699    // fcmle_z   2d,4s,2s
   4700    // fcmlt_z   2d,4s,2s
   4701    if (1) test_fcmeq_z_2d_2d(TyDF);
   4702    if (1) test_fcmeq_z_4s_4s(TySF);
   4703    if (1) test_fcmeq_z_2s_2s(TySF);
   4704    if (1) test_fcmge_z_2d_2d(TyDF);
   4705    if (1) test_fcmge_z_4s_4s(TySF);
   4706    if (1) test_fcmge_z_2s_2s(TySF);
   4707    if (1) test_fcmgt_z_2d_2d(TyDF);
   4708    if (1) test_fcmgt_z_4s_4s(TySF);
   4709    if (1) test_fcmgt_z_2s_2s(TySF);
   4710    if (1) test_fcmle_z_2d_2d(TyDF);
   4711    if (1) test_fcmle_z_4s_4s(TySF);
   4712    if (1) test_fcmle_z_2s_2s(TySF);
   4713    if (1) test_fcmlt_z_2d_2d(TyDF);
   4714    if (1) test_fcmlt_z_4s_4s(TySF);
   4715    if (1) test_fcmlt_z_2s_2s(TySF);
   4716 
   4717    // fcmp_z    d,s
   4718    // fcmpe_z   d,s
   4719    // fcmp      d,s (floating point quiet, set flags)
   4720    // fcmpe     d,s (floating point signaling, set flags)
   4721    if (1) DO50( test_FCMP_D_Z() );
   4722    if (1) DO50( test_FCMP_S_Z() );
   4723    if (1) DO50( test_FCMPE_D_Z() );
   4724    if (1) DO50( test_FCMPE_S_Z() );
   4725    if (1) DO50( test_FCMP_D_D() );
   4726    if (1) DO50( test_FCMP_S_S() );
   4727    if (1) DO50( test_FCMPE_D_D() );
   4728    if (1) DO50( test_FCMPE_S_S() );
   4729 
   4730    // fcsel     d,s (fp cond select)
   4731    if (1) DO50( test_FCSEL_D_D_D_EQ() );
   4732    if (1) DO50( test_FCSEL_D_D_D_NE() );
   4733    if (1) DO50( test_FCSEL_S_S_S_EQ() );
   4734    if (1) DO50( test_FCSEL_S_S_S_NE() );
   4735 
   4736    // fdiv      d,s
   4737    // fdiv      2d,4s,2s
   4738    if (1) test_fdiv_d_d_d(TyDF);
   4739    if (1) test_fdiv_s_s_s(TySF);
   4740    if (1) test_fdiv_2d_2d_2d(TyDF);
   4741    if (1) test_fdiv_4s_4s_4s(TySF);
   4742    if (1) test_fdiv_2s_2s_2s(TySF);
   4743 
   4744    // fmadd     d,s
   4745    // fnmadd    d,s
   4746    // fmsub     d,s
   4747    // fnmsub    d,s
   4748    if (1) test_fmadd_d_d_d_d(TyDF);
   4749    if (1) test_fmadd_s_s_s_s(TySF);
   4750    if (1) test_fnmadd_d_d_d_d(TyDF);
   4751    if (1) test_fnmadd_s_s_s_s(TySF);
   4752    if (1) test_fmsub_d_d_d_d(TyDF);
   4753    if (1) test_fmsub_s_s_s_s(TySF);
   4754    if (1) test_fnmsub_d_d_d_d(TyDF);
   4755    if (1) test_fnmsub_s_s_s_s(TySF);
   4756 
   4757    // fnmul     d,s
   4758    if (1) test_fnmul_d_d_d(TyDF);
   4759    if (1) test_fnmul_s_s_s(TySF);
   4760 
   4761    // fmax      d,s
   4762    // fmin      d,s
   4763    // fmaxnm    d,s ("max number")
   4764    // fminnm    d,s
   4765    if (1) test_fmax_d_d_d(TyDF);
   4766    if (1) test_fmax_s_s_s(TySF);
   4767    if (1) test_fmin_d_d_d(TyDF);
   4768    if (1) test_fmin_s_s_s(TySF);
   4769    if (1) test_fmaxnm_d_d_d(TyDF);
   4770    if (1) test_fmaxnm_s_s_s(TySF);
   4771    if (1) test_fminnm_d_d_d(TyDF);
   4772    if (1) test_fminnm_s_s_s(TySF);
   4773 
   4774    // fmax      2d,4s,2s
   4775    // fmin      2d,4s,2s
   4776    // fmaxnm    2d,4s,2s
   4777    // fminnm    2d,4s,2s
   4778    if (1) test_fmax_2d_2d_2d(TyDF);
   4779    if (1) test_fmax_4s_4s_4s(TySF);
   4780    if (1) test_fmax_2s_2s_2s(TySF);
   4781    if (1) test_fmin_2d_2d_2d(TyDF);
   4782    if (1) test_fmin_4s_4s_4s(TySF);
   4783    if (1) test_fmin_2s_2s_2s(TySF);
   4784    if (1) test_fmaxnm_2d_2d_2d(TyDF);
   4785    if (1) test_fmaxnm_4s_4s_4s(TySF);
   4786    if (1) test_fmaxnm_2s_2s_2s(TySF);
   4787    if (1) test_fminnm_2d_2d_2d(TyDF);
   4788    if (1) test_fminnm_4s_4s_4s(TySF);
   4789    if (1) test_fminnm_2s_2s_2s(TySF);
   4790 
   4791    // fmaxnmp   d_2d,s_2s ("max number pairwise")
   4792    // fminnmp   d_2d,s_2s
   4793    if (1) test_fmaxnmp_d_2d(TyDF);
   4794    if (1) test_fmaxnmp_s_2s(TySF);
   4795    if (1) test_fminnmp_d_2d(TyDF);
   4796    if (1) test_fminnmp_s_2s(TySF);
   4797 
   4798    // fmaxnmp   2d,4s,2s
   4799    // fminnmp   2d,4s,2s
   4800    if (1) test_fmaxnmp_2d_2d_2d(TyDF);
   4801    if (1) test_fmaxnmp_4s_4s_4s(TySF);
   4802    if (1) test_fmaxnmp_2s_2s_2s(TySF);
   4803    if (1) test_fminnmp_2d_2d_2d(TyDF);
   4804    if (1) test_fminnmp_4s_4s_4s(TySF);
   4805    if (1) test_fminnmp_2s_2s_2s(TySF);
   4806 
   4807    // fmaxnmv   s_4s (maxnum across vector)
   4808    // fminnmv   s_4s
   4809    if (1) test_fmaxnmv_s_4s(TySF);
   4810    if (1) test_fminnmv_s_4s(TySF);
   4811 
   4812    // fmaxp     d_2d,s_2s (max of a pair)
   4813    // fminp     d_2d,s_2s (max of a pair)
   4814    if (1) test_fmaxp_d_2d(TyDF);
   4815    if (1) test_fmaxp_s_2s(TySF);
   4816    if (1) test_fminp_d_2d(TyDF);
   4817    if (1) test_fminp_s_2s(TySF);
   4818 
   4819    // fmaxp     2d,4s,2s  (max pairwise)
   4820    // fminp     2d,4s,2s
   4821    if (1) test_fmaxp_2d_2d_2d(TyDF);
   4822    if (1) test_fmaxp_4s_4s_4s(TySF);
   4823    if (1) test_fmaxp_2s_2s_2s(TySF);
   4824    if (1) test_fminp_2d_2d_2d(TyDF);
   4825    if (1) test_fminp_4s_4s_4s(TySF);
   4826    if (1) test_fminp_2s_2s_2s(TySF);
   4827 
   4828    // fmaxv     s_4s (max across vector)
   4829    // fminv     s_4s
   4830    if (1) test_fmaxv_s_4s(TySF);
   4831    if (1) test_fminv_s_4s(TySF);
   4832 
   4833    // fmla      2d,4s,2s
   4834    // fmls      2d,4s,2s
   4835    if (1) test_fmla_2d_2d_2d(TyDF);
   4836    if (1) test_fmla_4s_4s_4s(TySF);
   4837    if (1) test_fmla_2s_2s_2s(TySF);
   4838    if (1) test_fmls_2d_2d_2d(TyDF);
   4839    if (1) test_fmls_4s_4s_4s(TySF);
   4840    if (1) test_fmls_2s_2s_2s(TySF);
   4841 
   4842    // fmla      d_d_d[],s_s_s[] (by element)
   4843    // fmls      d_d_d[],s_s_s[] (by element)
   4844    if (1) test_fmla_d_d_d0(TyDF);
   4845    if (1) test_fmla_d_d_d1(TyDF);
   4846    if (1) test_fmla_s_s_s0(TySF);
   4847    if (1) test_fmla_s_s_s3(TySF);
   4848    if (1) test_fmls_d_d_d0(TyDF);
   4849    if (1) test_fmls_d_d_d1(TyDF);
   4850    if (1) test_fmls_s_s_s0(TySF);
   4851    if (1) test_fmls_s_s_s3(TySF);
   4852 
   4853    // fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4854    // fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4855    if (1) test_fmla_2d_2d_d0(TyDF);
   4856    if (1) test_fmla_2d_2d_d1(TyDF);
   4857    if (1) test_fmla_4s_4s_s0(TySF);
   4858    if (1) test_fmla_4s_4s_s3(TySF);
   4859    if (1) test_fmla_2s_2s_s0(TySF);
   4860    if (1) test_fmla_2s_2s_s3(TySF);
   4861    if (1) test_fmls_2d_2d_d0(TyDF);
   4862    if (1) test_fmls_2d_2d_d1(TyDF);
   4863    if (1) test_fmls_4s_4s_s0(TySF);
   4864    if (1) test_fmls_4s_4s_s3(TySF);
   4865    if (1) test_fmls_2s_2s_s0(TySF);
   4866    if (1) test_fmls_2s_2s_s3(TySF);
   4867 
   4868    // fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   4869    if (1) test_fmov_2d_imm_01(TyD);
   4870    if (1) test_fmov_2d_imm_02(TyD);
   4871    if (1) test_fmov_2d_imm_03(TyD);
   4872    if (1) test_fmov_4s_imm_01(TyS);
   4873    if (1) test_fmov_4s_imm_02(TyS);
   4874    if (1) test_fmov_4s_imm_03(TyS);
   4875    if (1) test_fmov_2s_imm_01(TyS);
   4876    if (1) test_fmov_2s_imm_02(TyS);
   4877    if (1) test_fmov_2s_imm_03(TyS);
   4878 
   4879    // fmov      d_d,s_s
   4880    if (1) test_fmov_d_d(TyDF);
   4881    if (1) test_fmov_s_s(TySF);
   4882 
   4883    // fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   4884    if (1) test_fmov_s_w(TyS);
   4885    if (1) test_fmov_d_x(TyD);
   4886    if (1) test_fmov_d1_x(TyD);
   4887    if (1) test_fmov_w_s(TyS);
   4888    if (1) test_fmov_x_d(TyD);
   4889    if (1) test_fmov_x_d1(TyD);
   4890 
   4891    // fmov      d,s #imm
   4892    if (1) test_fmov_d_imm_01(TyNONE);
   4893    if (1) test_fmov_d_imm_02(TyNONE);
   4894    if (1) test_fmov_d_imm_03(TyNONE);
   4895    if (1) test_fmov_s_imm_01(TyNONE);
   4896    if (1) test_fmov_s_imm_02(TyNONE);
   4897    if (1) test_fmov_s_imm_03(TyNONE);
   4898 
   4899    // fmul      d_d_d[],s_s_s[]
   4900    if (1) test_fmul_d_d_d0(TyDF);
   4901    if (1) test_fmul_d_d_d1(TyDF);
   4902    if (1) test_fmul_s_s_s0(TySF);
   4903    if (1) test_fmul_s_s_s3(TySF);
   4904 
   4905    // fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4906    if (1) test_fmul_2d_2d_d0(TyDF);
   4907    if (1) test_fmul_2d_2d_d1(TyDF);
   4908    if (1) test_fmul_4s_4s_s0(TySF);
   4909    if (1) test_fmul_4s_4s_s3(TySF);
   4910    if (1) test_fmul_2s_2s_s0(TySF);
   4911    if (1) test_fmul_2s_2s_s3(TySF);
   4912 
   4913    // fmul      d,s
   4914    // fmul      2d,4s,2s
   4915    if (1) test_fmul_d_d_d(TyDF);
   4916    if (1) test_fmul_s_s_s(TySF);
   4917    if (1) test_fmul_2d_2d_2d(TyDF);
   4918    if (1) test_fmul_4s_4s_4s(TySF);
   4919    if (1) test_fmul_2s_2s_2s(TySF);
   4920 
   4921    // fmulx     d_d_d[],s_s_s[]
   4922    // fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4923    if (1) test_fmulx_d_d_d0(TyDF);
   4924    if (1) test_fmulx_d_d_d1(TyDF);
   4925    if (1) test_fmulx_s_s_s0(TySF);
   4926    if (1) test_fmulx_s_s_s3(TySF);
   4927    if (1) test_fmulx_2d_2d_d0(TyDF);
   4928    if (1) test_fmulx_2d_2d_d1(TyDF);
   4929    if (1) test_fmulx_4s_4s_s0(TySF);
   4930    if (1) test_fmulx_4s_4s_s3(TySF);
   4931    if (1) test_fmulx_2s_2s_s0(TySF);
   4932    if (1) test_fmulx_2s_2s_s3(TySF);
   4933 
   4934    // fmulx     d,s
   4935    // fmulx     2d,4s,2s
   4936    if (1) test_fmulx_d_d_d(TyDF);
   4937    if (1) test_fmulx_s_s_s(TySF);
   4938    if (1) test_fmulx_2d_2d_2d(TyDF);
   4939    if (1) test_fmulx_4s_4s_4s(TySF);
   4940    if (1) test_fmulx_2s_2s_2s(TySF);
   4941 
   4942    // frecpe    d,s (recip estimate)
   4943    // frecpe    2d,4s,2s
   4944    if (1) test_frecpe_d_d(TyDF);
   4945    if (1) test_frecpe_s_s(TySF);
   4946    if (1) test_frecpe_2d_2d(TyDF);
   4947    if (1) test_frecpe_4s_4s(TySF);
   4948    if (1) test_frecpe_2s_2s(TySF);
   4949 
   4950    // frecps    d,s (recip step)
   4951    // frecps    2d,4s,2s
   4952    if (1) test_frecps_d_d_d(TyDF);
   4953    if (1) test_frecps_s_s_s(TySF);
   4954    if (1) test_frecps_2d_2d_2d(TyDF);
   4955    if (1) test_frecps_4s_4s_4s(TySF);
   4956    if (1) test_frecps_2s_2s_2s(TySF);
   4957 
   4958    // frecpx    d,s (recip exponent)
   4959    if (1) test_frecpx_d_d(TyDF);
   4960    if (1) test_frecpx_s_s(TySF);
   4961 
   4962    // frinta    d,s
   4963    // frinti    d,s
   4964    // frintm    d,s
   4965    // frintn    d,s
   4966    // frintp    d,s
   4967    // frintx    d,s
   4968    // frintz    d,s
   4969    if (1) test_frinta_d_d(TyDF);
   4970    if (1) test_frinta_s_s(TySF);
   4971    if (1) test_frinti_d_d(TyDF);
   4972    if (1) test_frinti_s_s(TySF);
   4973    if (1) test_frintm_d_d(TyDF);
   4974    if (1) test_frintm_s_s(TySF);
   4975    if (1) test_frintn_d_d(TyDF);
   4976    if (1) test_frintn_s_s(TySF);
   4977    if (1) test_frintp_d_d(TyDF);
   4978    if (1) test_frintp_s_s(TySF);
   4979    if (1) test_frintx_d_d(TyDF);
   4980    if (1) test_frintx_s_s(TySF);
   4981    if (1) test_frintz_d_d(TyDF);
   4982    if (1) test_frintz_s_s(TySF);
   4983 
   4984    // frinta    2d,4s,2s (round to integral, nearest away)
   4985    // frinti    2d,4s,2s (round to integral, per FPCR)
   4986    // frintm    2d,4s,2s (round to integral, minus inf)
   4987    // frintn    2d,4s,2s (round to integral, nearest, to even)
   4988    // frintp    2d,4s,2s (round to integral, plus inf)
   4989    // frintx    2d,4s,2s (round to integral exact, per FPCR)
   4990    // frintz    2d,4s,2s (round to integral, zero)
   4991    if (1) test_frinta_2d_2d(TyDF);
   4992    if (1) test_frinta_4s_4s(TySF);
   4993    if (1) test_frinta_2s_2s(TySF);
   4994    if (1) test_frinti_2d_2d(TyDF);
   4995    if (1) test_frinti_4s_4s(TySF);
   4996    if (1) test_frinti_2s_2s(TySF);
   4997    if (1) test_frintm_2d_2d(TyDF);
   4998    if (1) test_frintm_4s_4s(TySF);
   4999    if (1) test_frintm_2s_2s(TySF);
   5000    if (1) test_frintn_2d_2d(TyDF);
   5001    if (1) test_frintn_4s_4s(TySF);
   5002    if (1) test_frintn_2s_2s(TySF);
   5003    if (1) test_frintp_2d_2d(TyDF);
   5004    if (1) test_frintp_4s_4s(TySF);
   5005    if (1) test_frintp_2s_2s(TySF);
   5006    if (1) test_frintx_2d_2d(TyDF);
   5007    if (1) test_frintx_4s_4s(TySF);
   5008    if (1) test_frintx_2s_2s(TySF);
   5009    if (1) test_frintz_2d_2d(TyDF);
   5010    if (1) test_frintz_4s_4s(TySF);
   5011    if (1) test_frintz_2s_2s(TySF);
   5012 
   5013    // frsqrte   d,s (est)
   5014    // frsqrte   2d,4s,2s
   5015    if (1) test_frsqrte_d_d(TyDF);
   5016    if (1) test_frsqrte_s_s(TySF);
   5017    if (1) test_frsqrte_2d_2d(TyDF);
   5018    if (1) test_frsqrte_4s_4s(TySF);
   5019    if (1) test_frsqrte_2s_2s(TySF);
   5020 
   5021    // frsqrts   d,s (step)
   5022    // frsqrts   2d,4s,2s
   5023    if (1) test_frsqrts_d_d_d(TyDF);
   5024    if (1) test_frsqrts_s_s_s(TySF);
   5025    if (1) test_frsqrts_2d_2d_2d(TyDF);
   5026    if (1) test_frsqrts_4s_4s_4s(TySF);
   5027    if (1) test_frsqrts_2s_2s_2s(TySF);
   5028 
   5029    // ======================== CONV ========================
   5030 
   5031    // fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   5032    if (1) test_fcvt_s_h(TyHF);
   5033    if (1) test_fcvt_d_h(TyHF);
   5034    if (1) test_fcvt_h_s(TySF);
   5035    if (1) test_fcvt_d_s(TySF);
   5036    if (1) test_fcvt_h_d(TyDF);
   5037    if (1) test_fcvt_s_d(TyDF);
   5038 
   5039    // fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   5040    if (1) test_fcvtl_4s_4h(TyHF);
   5041    if (1) test_fcvtl_4s_8h(TyHF);
   5042    if (1) test_fcvtl_2d_2s(TySF);
   5043    if (1) test_fcvtl_2d_4s(TySF);
   5044 
   5045    // fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   5046    if (1) test_fcvtn_4h_4s(TySF);
   5047    if (1) test_fcvtn_8h_4s(TySF);
   5048    if (1) test_fcvtn_2s_2d(TyDF);
   5049    if (1) test_fcvtn_4s_2d(TyDF);
   5050 
   5051    // fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   5052    // fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   5053    // fcvtas    2d,4s,2s
   5054    // fcvtau    2d,4s,2s
   5055    // fcvtas    w_s,x_s,w_d,x_d
   5056    // fcvtau    w_s,x_s,w_d,x_d
   5057    if (1) test_fcvtas_d_d(TyDF);
   5058    if (1) test_fcvtau_d_d(TyDF);
   5059    if (1) test_fcvtas_s_s(TySF);
   5060    if (1) test_fcvtau_s_s(TySF);
   5061    if (1) test_fcvtas_2d_2d(TyDF);
   5062    if (1) test_fcvtau_2d_2d(TyDF);
   5063    if (1) test_fcvtas_4s_4s(TySF);
   5064    if (1) test_fcvtau_4s_4s(TySF);
   5065    if (1) test_fcvtas_2s_2s(TySF);
   5066    if (1) test_fcvtau_2s_2s(TySF);
   5067    if (1) test_fcvtas_w_s(TySF);
   5068    if (1) test_fcvtau_w_s(TySF);
   5069    if (1) test_fcvtas_x_s(TySF);
   5070    if (1) test_fcvtau_x_s(TySF);
   5071    if (1) test_fcvtas_w_d(TyDF);
   5072    if (1) test_fcvtau_w_d(TyDF);
   5073    if (1) test_fcvtas_x_d(TyDF);
   5074    if (1) test_fcvtau_x_d(TyDF);
   5075 
   5076    // fcvtms    d,s  (fcvt to signed int,   minus inf)
   5077    // fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   5078    // fcvtms    2d,4s,2s
   5079    // fcvtmu    2d,4s,2s
   5080    // fcvtms    w_s,x_s,w_d,x_d
   5081    // fcvtmu    w_s,x_s,w_d,x_d
   5082    if (1) test_fcvtms_d_d(TyDF);
   5083    if (1) test_fcvtmu_d_d(TyDF);
   5084    if (1) test_fcvtms_s_s(TySF);
   5085    if (1) test_fcvtmu_s_s(TySF);
   5086    if (1) test_fcvtms_2d_2d(TyDF);
   5087    if (1) test_fcvtmu_2d_2d(TyDF);
   5088    if (1) test_fcvtms_4s_4s(TySF);
   5089    if (1) test_fcvtmu_4s_4s(TySF);
   5090    if (1) test_fcvtms_2s_2s(TySF);
   5091    if (1) test_fcvtmu_2s_2s(TySF);
   5092    if (1) test_fcvtms_w_s(TySF);
   5093    if (1) test_fcvtmu_w_s(TySF);
   5094    if (1) test_fcvtms_x_s(TySF);
   5095    if (1) test_fcvtmu_x_s(TySF);
   5096    if (1) test_fcvtms_w_d(TyDF);
   5097    if (1) test_fcvtmu_w_d(TyDF);
   5098    if (1) test_fcvtms_x_d(TyDF);
   5099    if (1) test_fcvtmu_x_d(TyDF);
   5100 
   5101    // fcvtns    d,s  (fcvt to signed int,   nearest)
   5102    // fcvtnu    d,s  (fcvt to unsigned int, nearest)
   5103    // fcvtns    2d,4s,2s
   5104    // fcvtnu    2d,4s,2s
   5105    // fcvtns    w_s,x_s,w_d,x_d
   5106    // fcvtnu    w_s,x_s,w_d,x_d
   5107    if (1) test_fcvtns_d_d(TyDF);
   5108    if (1) test_fcvtnu_d_d(TyDF);
   5109    if (1) test_fcvtns_s_s(TySF);
   5110    if (1) test_fcvtnu_s_s(TySF);
   5111    if (1) test_fcvtns_2d_2d(TyDF);
   5112    if (1) test_fcvtnu_2d_2d(TyDF);
   5113    if (1) test_fcvtns_4s_4s(TySF);
   5114    if (1) test_fcvtnu_4s_4s(TySF);
   5115    if (1) test_fcvtns_2s_2s(TySF);
   5116    if (1) test_fcvtnu_2s_2s(TySF);
   5117    if (1) test_fcvtns_w_s(TySF);
   5118    if (1) test_fcvtnu_w_s(TySF);
   5119    if (1) test_fcvtns_x_s(TySF);
   5120    if (1) test_fcvtnu_x_s(TySF);
   5121    if (1) test_fcvtns_w_d(TyDF);
   5122    if (1) test_fcvtnu_w_d(TyDF);
   5123    if (1) test_fcvtns_x_d(TyDF);
   5124    if (1) test_fcvtnu_x_d(TyDF);
   5125 
   5126    // fcvtps    d,s  (fcvt to signed int,   plus inf)
   5127    // fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   5128    // fcvtps    2d,4s,2s
   5129    // fcvtpu    2d,4s,2s
   5130    // fcvtps    w_s,x_s,w_d,x_d
   5131    // fcvtpu    w_s,x_s,w_d,x_d
   5132    if (1) test_fcvtps_d_d(TyDF);
   5133    if (1) test_fcvtpu_d_d(TyDF);
   5134    if (1) test_fcvtps_s_s(TySF);
   5135    if (1) test_fcvtpu_s_s(TySF);
   5136    if (1) test_fcvtps_2d_2d(TyDF);
   5137    if (1) test_fcvtpu_2d_2d(TyDF);
   5138    if (1) test_fcvtps_4s_4s(TySF);
   5139    if (1) test_fcvtpu_4s_4s(TySF);
   5140    if (1) test_fcvtps_2s_2s(TySF);
   5141    if (1) test_fcvtpu_2s_2s(TySF);
   5142    if (1) test_fcvtps_w_s(TySF);
   5143    if (1) test_fcvtpu_w_s(TySF);
   5144    if (1) test_fcvtps_x_s(TySF);
   5145    if (1) test_fcvtpu_x_s(TySF);
   5146    if (1) test_fcvtps_w_d(TyDF);
   5147    if (1) test_fcvtpu_w_d(TyDF);
   5148    if (1) test_fcvtps_x_d(TyDF);
   5149    if (1) test_fcvtpu_x_d(TyDF);
   5150 
   5151    // fcvtzs    d,s (fcvt to signed integer,   to zero)
   5152    // fcvtzu    d,s (fcvt to unsigned integer, to zero)
   5153    // fcvtzs    2d,4s,2s
   5154    // fcvtzu    2d,4s,2s
   5155    // fcvtzs    w_s,x_s,w_d,x_d
   5156    // fcvtzu    w_s,x_s,w_d,x_d
   5157    if (1) test_fcvtzs_d_d(TyDF);
   5158    if (1) test_fcvtzu_d_d(TyDF);
   5159    if (1) test_fcvtzs_s_s(TySF);
   5160    if (1) test_fcvtzu_s_s(TySF);
   5161    if (1) test_fcvtzs_2d_2d(TyDF);
   5162    if (1) test_fcvtzu_2d_2d(TyDF);
   5163    if (1) test_fcvtzs_4s_4s(TySF);
   5164    if (1) test_fcvtzu_4s_4s(TySF);
   5165    if (1) test_fcvtzs_2s_2s(TySF);
   5166    if (1) test_fcvtzu_2s_2s(TySF);
   5167    if (1) test_fcvtzs_w_s(TySF);
   5168    if (1) test_fcvtzu_w_s(TySF);
   5169    if (1) test_fcvtzs_x_s(TySF);
   5170    if (1) test_fcvtzu_x_s(TySF);
   5171    if (1) test_fcvtzs_w_d(TyDF);
   5172    if (1) test_fcvtzu_w_d(TyDF);
   5173    if (1) test_fcvtzs_x_d(TyDF);
   5174    if (1) test_fcvtzu_x_d(TyDF);
   5175 
   5176    // fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5177    // fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5178    // fcvtzs    2d,4s,2s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5179    // fcvtzu    2d,4s,2s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5180    // fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5181    // fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5182    if (1) test_fcvtzs_d_d_fbits1(TyDF);
   5183    if (1) test_fcvtzs_d_d_fbits32(TyDF);
   5184    if (1) test_fcvtzs_d_d_fbits64(TyDF);
   5185    if (1) test_fcvtzu_d_d_fbits1(TyDF);
   5186    if (1) test_fcvtzu_d_d_fbits32(TyDF);
   5187    if (1) test_fcvtzu_d_d_fbits64(TyDF);
   5188    if (1) test_fcvtzs_s_s_fbits1(TySF);
   5189    if (1) test_fcvtzs_s_s_fbits16(TySF);
   5190    if (1) test_fcvtzs_s_s_fbits32(TySF);
   5191    if (1) test_fcvtzu_s_s_fbits1(TySF);
   5192    if (1) test_fcvtzu_s_s_fbits16(TySF);
   5193    if (1) test_fcvtzu_s_s_fbits32(TySF);
   5194    if (1) test_fcvtzs_2d_2d_fbits1(TyDF);
   5195    if (1) test_fcvtzs_2d_2d_fbits32(TyDF);
   5196    if (1) test_fcvtzs_2d_2d_fbits64(TyDF);
   5197    if (1) test_fcvtzu_2d_2d_fbits1(TyDF);
   5198    if (1) test_fcvtzu_2d_2d_fbits32(TyDF);
   5199    if (1) test_fcvtzu_2d_2d_fbits64(TyDF);
   5200    if (1) test_fcvtzs_4s_4s_fbits1(TySF);
   5201    if (1) test_fcvtzs_4s_4s_fbits16(TySF);
   5202    if (1) test_fcvtzs_4s_4s_fbits32(TySF);
   5203    if (1) test_fcvtzu_4s_4s_fbits1(TySF);
   5204    if (1) test_fcvtzu_4s_4s_fbits16(TySF);
   5205    if (1) test_fcvtzu_4s_4s_fbits32(TySF);
   5206    if (1) test_fcvtzs_2s_2s_fbits1(TySF);
   5207    if (1) test_fcvtzs_2s_2s_fbits16(TySF);
   5208    if (1) test_fcvtzs_2s_2s_fbits32(TySF);
   5209    if (1) test_fcvtzu_2s_2s_fbits1(TySF);
   5210    if (1) test_fcvtzu_2s_2s_fbits16(TySF);
   5211    if (1) test_fcvtzu_2s_2s_fbits32(TySF);
   5212    if (1) test_fcvtzs_w_s_fbits1(TySF);
   5213    if (1) test_fcvtzs_w_s_fbits16(TySF);
   5214    if (1) test_fcvtzs_w_s_fbits32(TySF);
   5215    if (1) test_fcvtzu_w_s_fbits1(TySF);
   5216    if (1) test_fcvtzu_w_s_fbits16(TySF);
   5217    if (1) test_fcvtzu_w_s_fbits32(TySF);
   5218    if (1) test_fcvtzs_x_s_fbits1(TySF);
   5219    if (1) test_fcvtzs_x_s_fbits32(TySF);
   5220    if (1) test_fcvtzs_x_s_fbits64(TySF);
   5221    if (1) test_fcvtzu_x_s_fbits1(TySF);
   5222    if (1) test_fcvtzu_x_s_fbits32(TySF);
   5223    if (1) test_fcvtzu_x_s_fbits64(TySF);
   5224    if (1) test_fcvtzs_w_d_fbits1(TyDF);
   5225    if (1) test_fcvtzs_w_d_fbits16(TyDF);
   5226    if (1) test_fcvtzs_w_d_fbits32(TyDF);
   5227    if (1) test_fcvtzu_w_d_fbits1(TyDF);
   5228    if (1) test_fcvtzu_w_d_fbits16(TyDF);
   5229    if (1) test_fcvtzu_w_d_fbits32(TyDF);
   5230    if (1) test_fcvtzs_x_d_fbits1(TyDF);
   5231    if (1) test_fcvtzs_x_d_fbits32(TyDF);
   5232    if (1) test_fcvtzs_x_d_fbits64(TyDF);
   5233    if (1) test_fcvtzu_x_d_fbits1(TyDF);
   5234    if (1) test_fcvtzu_x_d_fbits32(TyDF);
   5235    if (1) test_fcvtzu_x_d_fbits64(TyDF);
   5236 
   5237    // fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   5238    // fcvtxn    2s_2d,4s_2d
   5239    if (1) test_fcvtxn_s_d(TyDF);
   5240    if (1) test_fcvtxn_2s_2d(TyDF);
   5241    if (1) test_fcvtxn_4s_2d(TyDF);
   5242 
   5243    // scvtf     d,s        _#fbits
   5244    // ucvtf     d,s        _#fbits
   5245    // scvtf     2d,4s,2s   _#fbits
   5246    // ucvtf     2d,4s,2s   _#fbits
   5247    if (1) test_scvtf_d_d_fbits1(TyD);
   5248    if (1) test_scvtf_d_d_fbits32(TyD);
   5249    if (1) test_scvtf_d_d_fbits64(TyD);
   5250    if (1) test_ucvtf_d_d_fbits1(TyD);
   5251    if (1) test_ucvtf_d_d_fbits32(TyD);
   5252    if (1) test_ucvtf_d_d_fbits64(TyD);
   5253    if (1) test_scvtf_s_s_fbits1(TyS);
   5254    if (1) test_scvtf_s_s_fbits16(TyS);
   5255    if (1) test_scvtf_s_s_fbits32(TyS);
   5256    if (1) test_ucvtf_s_s_fbits1(TyS);
   5257    if (1) test_ucvtf_s_s_fbits16(TyS);
   5258    if (1) test_ucvtf_s_s_fbits32(TyS);
   5259    if (1) test_scvtf_2d_2d_fbits1(TyD);
   5260    if (1) test_scvtf_2d_2d_fbits32(TyD);
   5261    if (1) test_scvtf_2d_2d_fbits64(TyD);
   5262    if (1) test_ucvtf_2d_2d_fbits1(TyD);
   5263    if (1) test_ucvtf_2d_2d_fbits32(TyD);
   5264    if (1) test_ucvtf_2d_2d_fbits64(TyD);
   5265    if (1) test_scvtf_4s_4s_fbits1(TyS);
   5266    if (1) test_scvtf_4s_4s_fbits16(TyS);
   5267    if (1) test_scvtf_4s_4s_fbits32(TyS);
   5268    if (1) test_ucvtf_4s_4s_fbits1(TyS);
   5269    if (1) test_ucvtf_4s_4s_fbits16(TyS);
   5270    if (1) test_ucvtf_4s_4s_fbits32(TyS);
   5271    if (1) test_scvtf_2s_2s_fbits1(TyS);
   5272    if (1) test_scvtf_2s_2s_fbits16(TyS);
   5273    if (1) test_scvtf_2s_2s_fbits32(TyS);
   5274    if (1) test_ucvtf_2s_2s_fbits1(TyS);
   5275    if (1) test_ucvtf_2s_2s_fbits16(TyS);
   5276    if (1) test_ucvtf_2s_2s_fbits32(TyS);
   5277 
   5278    // scvtf     d,s
   5279    // ucvtf     d,s
   5280    // scvtf     2d,4s,2s
   5281    // ucvtf     2d,4s,2s
   5282    if (1) test_scvtf_d_d(TyD);
   5283    if (1) test_ucvtf_d_d(TyD);
   5284    if (1) test_scvtf_s_s(TyS);
   5285    if (1) test_ucvtf_s_s(TyS);
   5286    if (1) test_scvtf_2d_2d(TyD);
   5287    if (1) test_ucvtf_2d_2d(TyD);
   5288    if (1) test_scvtf_4s_4s(TyS);
   5289    if (1) test_ucvtf_4s_4s(TyS);
   5290    if (1) test_scvtf_2s_2s(TyS);
   5291    if (1) test_ucvtf_2s_2s(TyS);
   5292 
   5293    // scvtf     s_w, d_w, s_x, d_x,   _#fbits
   5294    // ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   5295    if (1) test_scvtf_s_w_fbits1(TyS);
   5296    if (1) test_scvtf_s_w_fbits16(TyS);
   5297    if (1) test_scvtf_s_w_fbits32(TyS);
   5298    if (1) test_scvtf_d_w_fbits1(TyS);
   5299    if (1) test_scvtf_d_w_fbits16(TyS);
   5300    if (1) test_scvtf_d_w_fbits32(TyS);
   5301    if (1) test_scvtf_s_x_fbits1(TyD);
   5302    if (1) test_scvtf_s_x_fbits32(TyD);
   5303    if (1) test_scvtf_s_x_fbits64(TyD);
   5304    if (1) test_scvtf_d_x_fbits1(TyD);
   5305    if (1) test_scvtf_d_x_fbits32(TyD);
   5306    if (1) test_scvtf_d_x_fbits64(TyD);
   5307    if (1) test_ucvtf_s_w_fbits1(TyS);
   5308    if (1) test_ucvtf_s_w_fbits16(TyS);
   5309    if (1) test_ucvtf_s_w_fbits32(TyS);
   5310    if (1) test_ucvtf_d_w_fbits1(TyS);
   5311    if (1) test_ucvtf_d_w_fbits16(TyS);
   5312    if (1) test_ucvtf_d_w_fbits32(TyS);
   5313    if (1) test_ucvtf_s_x_fbits1(TyD);
   5314    if (1) test_ucvtf_s_x_fbits32(TyD);
   5315    if (1) test_ucvtf_s_x_fbits64(TyD);
   5316    if (1) test_ucvtf_d_x_fbits1(TyD);
   5317    if (1) test_ucvtf_d_x_fbits32(TyD);
   5318    if (1) test_ucvtf_d_x_fbits64(TyD);
   5319 
   5320    // scvtf     s_w, d_w, s_x, d_x
   5321    // ucvtf     s_w, d_w, s_x, d_x
   5322    if (1) test_scvtf_s_w(TyS);
   5323    if (1) test_scvtf_d_w(TyS);
   5324    if (1) test_scvtf_s_x(TyD);
   5325    if (1) test_scvtf_d_x(TyD);
   5326    if (1) test_ucvtf_s_w(TyS);
   5327    if (1) test_ucvtf_d_w(TyS);
   5328    if (1) test_ucvtf_s_x(TyD);
   5329    if (1) test_ucvtf_d_x(TyD);
   5330 
   5331    // ======================== INT ========================
   5332 
   5333    // abs       d
   5334    // neg       d
   5335    if (1) test_abs_d_d(TyD);
   5336    if (1) test_neg_d_d(TyD);
   5337 
   5338    // abs       2d,4s,2s,8h,4h,16b,8b
   5339    // neg       2d,4s,2s,8h,4h,16b,8b
   5340    if (1) test_abs_2d_2d(TyD);
   5341    if (1) test_abs_4s_4s(TyS);
   5342    if (1) test_abs_2s_2s(TyS);
   5343    if (1) test_abs_8h_8h(TyH);
   5344    if (1) test_abs_4h_4h(TyH);
   5345    if (1) test_abs_16b_16b(TyB);
   5346    if (1) test_abs_8b_8b(TyB);
   5347    if (1) test_neg_2d_2d(TyD);
   5348    if (1) test_neg_4s_4s(TyS);
   5349    if (1) test_neg_2s_2s(TyS);
   5350    if (1) test_neg_8h_8h(TyH);
   5351    if (1) test_neg_4h_4h(TyH);
   5352    if (1) test_neg_16b_16b(TyB);
   5353    if (1) test_neg_8b_8b(TyB);
   5354 
   5355    // add       d
   5356    // sub       d
   5357    if (1) test_add_d_d_d(TyD);
   5358    if (1) test_sub_d_d_d(TyD);
   5359 
   5360    // add       2d,4s,2s,8h,4h,16b,8b
   5361    // sub       2d,4s,2s,8h,4h,16b,8b
   5362    if (1) test_add_2d_2d_2d(TyD);
   5363    if (1) test_add_4s_4s_4s(TyS);
   5364    if (1) test_add_2s_2s_2s(TyS);
   5365    if (1) test_add_8h_8h_8h(TyH);
   5366    if (1) test_add_4h_4h_4h(TyH);
   5367    if (1) test_add_16b_16b_16b(TyB);
   5368    if (1) test_add_8b_8b_8b(TyB);
   5369    if (1) test_sub_2d_2d_2d(TyD);
   5370    if (1) test_sub_4s_4s_4s(TyS);
   5371    if (1) test_sub_2s_2s_2s(TyS);
   5372    if (1) test_sub_8h_8h_8h(TyH);
   5373    if (1) test_sub_4h_4h_4h(TyH);
   5374    if (1) test_sub_16b_16b_16b(TyB);
   5375    if (1) test_sub_8b_8b_8b(TyB);
   5376 
   5377    // addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5378    // subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5379    // raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5380    // rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5381    if (1) test_addhn_2s_2d_2d(TyD);
   5382    if (1) test_addhn2_4s_2d_2d(TyD);
   5383    if (1) test_addhn_4h_4s_4s(TyS);
   5384    if (1) test_addhn2_8h_4s_4s(TyS);
   5385    if (1) test_addhn_8b_8h_8h(TyH);
   5386    if (1) test_addhn2_16b_8h_8h(TyH);
   5387    if (1) test_subhn_2s_2d_2d(TyD);
   5388    if (1) test_subhn2_4s_2d_2d(TyD);
   5389    if (1) test_subhn_4h_4s_4s(TyS);
   5390    if (1) test_subhn2_8h_4s_4s(TyS);
   5391    if (1) test_subhn_8b_8h_8h(TyH);
   5392    if (1) test_subhn2_16b_8h_8h(TyH);
   5393    if (1) test_raddhn_2s_2d_2d(TyD);
   5394    if (1) test_raddhn2_4s_2d_2d(TyD);
   5395    if (1) test_raddhn_4h_4s_4s(TyS);
   5396    if (1) test_raddhn2_8h_4s_4s(TyS);
   5397    if (1) test_raddhn_8b_8h_8h(TyH);
   5398    if (1) test_raddhn2_16b_8h_8h(TyH);
   5399    if (1) test_rsubhn_2s_2d_2d(TyD);
   5400    if (1) test_rsubhn2_4s_2d_2d(TyD);
   5401    if (1) test_rsubhn_4h_4s_4s(TyS);
   5402    if (1) test_rsubhn2_8h_4s_4s(TyS);
   5403    if (1) test_rsubhn_8b_8h_8h(TyH);
   5404    if (1) test_rsubhn2_16b_8h_8h(TyH);
   5405 
   5406    // addp     d (add pairs, across)
   5407    if (1) test_addp_d_2d(TyD);
   5408 
   5409    // addp     2d,4s,2s,8h,4h,16b,8b
   5410    if (1) test_addp_2d_2d_2d(TyD);
   5411    if (1) test_addp_4s_4s_4s(TyS);
   5412    if (1) test_addp_2s_2s_2s(TyS);
   5413    if (1) test_addp_8h_8h_8h(TyH);
   5414    if (1) test_addp_4h_4h_4h(TyH);
   5415    if (1) test_addp_16b_16b_16b(TyB);
   5416    if (1) test_addp_8b_8b_8b(TyB);
   5417 
   5418    // addv     4s,8h,4h,16b,18b (reduce across vector)
   5419    if (1) test_addv_s_4s(TyS);
   5420    if (1) test_addv_h_8h(TyH);
   5421    if (1) test_addv_h_4h(TyH);
   5422    if (1) test_addv_b_16b(TyB);
   5423    if (1) test_addv_b_8b(TyB);
   5424 
   5425    // and      16b,8b
   5426    // bic      16b,8b
   5427    // orn      16b,8b
   5428    // orr      16b,8b
   5429    if (1) test_and_16b_16b_16b(TyB);
   5430    if (1) test_and_8b_8b_8b(TyB);
   5431    if (1) test_bic_16b_16b_16b(TyB);
   5432    if (1) test_bic_8b_8b_8b(TyB);
   5433    if (1) test_orr_16b_16b_16b(TyB);
   5434    if (1) test_orr_8b_8b_8b(TyB);
   5435    if (1) test_orn_16b_16b_16b(TyB);
   5436    if (1) test_orn_8b_8b_8b(TyB);
   5437 
   5438    // orr      8h,4h   #imm8, LSL #0 or 8
   5439    // orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5440    // bic      8h,4h   #imm8, LSL #0 or 8
   5441    // bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5442    // movi and mvni are very similar, a superset of these.
   5443    // Cases are below.
   5444    if (1) test_orr_8h_0x5A_lsl0(TyH);
   5445    if (1) test_orr_8h_0xA5_lsl8(TyH);
   5446    if (1) test_orr_4h_0x5A_lsl0(TyH);
   5447    if (1) test_orr_4h_0xA5_lsl8(TyH);
   5448    if (1) test_orr_4s_0x5A_lsl0(TyS);
   5449    if (1) test_orr_4s_0x6B_lsl8(TyS);
   5450    if (1) test_orr_4s_0x49_lsl16(TyS);
   5451    if (1) test_orr_4s_0x3D_lsl24(TyS);
   5452    if (1) test_orr_2s_0x5A_lsl0(TyS);
   5453    if (1) test_orr_2s_0x6B_lsl8(TyS);
   5454    if (1) test_orr_2s_0x49_lsl16(TyS);
   5455    if (1) test_orr_2s_0x3D_lsl24(TyS);
   5456    if (1) test_bic_8h_0x5A_lsl0(TyH);
   5457    if (1) test_bic_8h_0xA5_lsl8(TyH);
   5458    if (1) test_bic_4h_0x5A_lsl0(TyH);
   5459    if (1) test_bic_4h_0xA5_lsl8(TyH);
   5460    if (1) test_bic_4s_0x5A_lsl0(TyS);
   5461    if (1) test_bic_4s_0x6B_lsl8(TyS);
   5462    if (1) test_bic_4s_0x49_lsl16(TyS);
   5463    if (1) test_bic_4s_0x3D_lsl24(TyS);
   5464    if (1) test_bic_2s_0x5A_lsl0(TyS);
   5465    if (1) test_bic_2s_0x6B_lsl8(TyS);
   5466    if (1) test_bic_2s_0x49_lsl16(TyS);
   5467    if (1) test_bic_2s_0x3D_lsl24(TyS);
   5468 
   5469    // bif      16b,8b (vector) (bit insert if false)
   5470    // bit      16b,8b (vector) (bit insert if true)
   5471    // bsl      16b,8b (vector) (bit select)
   5472    // eor      16b,8b (vector)
   5473    if (1) test_bif_16b_16b_16b(TyB);
   5474    if (1) test_bif_8b_8b_8b(TyB);
   5475    if (1) test_bit_16b_16b_16b(TyB);
   5476    if (1) test_bit_8b_8b_8b(TyB);
   5477    if (1) test_bsl_16b_16b_16b(TyB);
   5478    if (1) test_bsl_8b_8b_8b(TyB);
   5479    if (1) test_eor_16b_16b_16b(TyB);
   5480    if (1) test_eor_8b_8b_8b(TyB);
   5481 
   5482    // cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   5483    // clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   5484    if (1) test_cls_4s_4s(TyS);
   5485    if (1) test_cls_2s_2s(TyS);
   5486    if (1) test_cls_8h_8h(TyH);
   5487    if (1) test_cls_4h_4h(TyH);
   5488    if (1) test_cls_16b_16b(TyB);
   5489    if (1) test_cls_8b_8b(TyB);
   5490    if (1) test_clz_4s_4s(TyS);
   5491    if (1) test_clz_2s_2s(TyS);
   5492    if (1) test_clz_8h_8h(TyH);
   5493    if (1) test_clz_4h_4h(TyH);
   5494    if (1) test_clz_16b_16b(TyB);
   5495    if (1) test_clz_8b_8b(TyB);
   5496 
   5497    // cmeq     d
   5498    // cmge     d
   5499    // cmgt     d
   5500    // cmhi     d
   5501    // cmhs     d
   5502    // cmtst    d
   5503    if (1) test_cmeq_d_d_d(TyD);
   5504    if (1) test_cmge_d_d_d(TyD);
   5505    if (1) test_cmgt_d_d_d(TyD);
   5506    if (1) test_cmhi_d_d_d(TyD);
   5507    if (1) test_cmhs_d_d_d(TyD);
   5508    if (1) test_cmtst_d_d_d(TyD);
   5509 
   5510    // cmeq     2d,4s,2s,8h,4h,16b,8b
   5511    // cmge     2d,4s,2s,8h,4h,16b,8b
   5512    // cmgt     2d,4s,2s,8h,4h,16b,8b
   5513    // cmhi     2d,4s,2s,8h,4h,16b,8b
   5514    // cmhs     2d,4s,2s,8h,4h,16b,8b
   5515    // cmtst    2d,4s,2s,8h,4h,16b,8b
   5516    if (1) test_cmeq_2d_2d_2d(TyD);
   5517    if (1) test_cmeq_4s_4s_4s(TyS);
   5518    if (1) test_cmeq_2s_2s_2s(TyS);
   5519    if (1) test_cmeq_8h_8h_8h(TyH);
   5520    if (1) test_cmeq_4h_4h_4h(TyH);
   5521    if (1) test_cmeq_16b_16b_16b(TyB);
   5522    if (1) test_cmeq_8b_8b_8b(TyB);
   5523    if (1) test_cmge_2d_2d_2d(TyD);
   5524    if (1) test_cmge_4s_4s_4s(TyS);
   5525    if (1) test_cmge_2s_2s_2s(TyS);
   5526    if (1) test_cmge_8h_8h_8h(TyH);
   5527    if (1) test_cmge_4h_4h_4h(TyH);
   5528    if (1) test_cmge_16b_16b_16b(TyB);
   5529    if (1) test_cmge_8b_8b_8b(TyB);
   5530    if (1) test_cmgt_2d_2d_2d(TyD);
   5531    if (1) test_cmgt_4s_4s_4s(TyS);
   5532    if (1) test_cmgt_2s_2s_2s(TyS);
   5533    if (1) test_cmgt_8h_8h_8h(TyH);
   5534    if (1) test_cmgt_4h_4h_4h(TyH);
   5535    if (1) test_cmgt_16b_16b_16b(TyB);
   5536    if (1) test_cmgt_8b_8b_8b(TyB);
   5537    if (1) test_cmhi_2d_2d_2d(TyD);
   5538    if (1) test_cmhi_4s_4s_4s(TyS);
   5539    if (1) test_cmhi_2s_2s_2s(TyS);
   5540    if (1) test_cmhi_8h_8h_8h(TyH);
   5541    if (1) test_cmhi_4h_4h_4h(TyH);
   5542    if (1) test_cmhi_16b_16b_16b(TyB);
   5543    if (1) test_cmhi_8b_8b_8b(TyB);
   5544    if (1) test_cmhs_2d_2d_2d(TyD);
   5545    if (1) test_cmhs_4s_4s_4s(TyS);
   5546    if (1) test_cmhs_2s_2s_2s(TyS);
   5547    if (1) test_cmhs_8h_8h_8h(TyH);
   5548    if (1) test_cmhs_4h_4h_4h(TyH);
   5549    if (1) test_cmhs_16b_16b_16b(TyB);
   5550    if (1) test_cmhs_8b_8b_8b(TyB);
   5551    if (1) test_cmtst_2d_2d_2d(TyD);
   5552    if (1) test_cmtst_4s_4s_4s(TyS);
   5553    if (1) test_cmtst_2s_2s_2s(TyS);
   5554    if (1) test_cmtst_8h_8h_8h(TyH);
   5555    if (1) test_cmtst_4h_4h_4h(TyH);
   5556    if (1) test_cmtst_16b_16b_16b(TyB);
   5557    if (1) test_cmtst_8b_8b_8b(TyB);
   5558 
   5559    // cmeq_z   d
   5560    // cmge_z   d
   5561    // cmgt_z   d
   5562    // cmle_z   d
   5563    // cmlt_z   d
   5564    if (1) test_cmeq_zero_d_d(TyD);
   5565    if (1) test_cmge_zero_d_d(TyD);
   5566    if (1) test_cmgt_zero_d_d(TyD);
   5567    if (1) test_cmle_zero_d_d(TyD);
   5568    if (1) test_cmlt_zero_d_d(TyD);
   5569 
   5570    // cmeq_z   2d,4s,2s,8h,4h,16b,8b
   5571    // cmge_z   2d,4s,2s,8h,4h,16b,8b
   5572    // cmgt_z   2d,4s,2s,8h,4h,16b,8b
   5573    // cmle_z   2d,4s,2s,8h,4h,16b,8b
   5574    // cmlt_z   2d,4s,2s,8h,4h,16b,8b
   5575    if (1) test_cmeq_zero_2d_2d(TyD);
   5576    if (1) test_cmeq_zero_4s_4s(TyS);
   5577    if (1) test_cmeq_zero_2s_2s(TyS);
   5578    if (1) test_cmeq_zero_8h_8h(TyH);
   5579    if (1) test_cmeq_zero_4h_4h(TyH);
   5580    if (1) test_cmeq_zero_16b_16b(TyB);
   5581    if (1) test_cmeq_zero_8b_8b(TyB);
   5582    if (1) test_cmge_zero_2d_2d(TyD);
   5583    if (1) test_cmge_zero_4s_4s(TyS);
   5584    if (1) test_cmge_zero_2s_2s(TyS);
   5585    if (1) test_cmge_zero_8h_8h(TyH);
   5586    if (1) test_cmge_zero_4h_4h(TyH);
   5587    if (1) test_cmge_zero_16b_16b(TyB);
   5588    if (1) test_cmge_zero_8b_8b(TyB);
   5589    if (1) test_cmgt_zero_2d_2d(TyD);
   5590    if (1) test_cmgt_zero_4s_4s(TyS);
   5591    if (1) test_cmgt_zero_2s_2s(TyS);
   5592    if (1) test_cmgt_zero_8h_8h(TyH);
   5593    if (1) test_cmgt_zero_4h_4h(TyH);
   5594    if (1) test_cmgt_zero_16b_16b(TyB);
   5595    if (1) test_cmgt_zero_8b_8b(TyB);
   5596    if (1) test_cmle_zero_2d_2d(TyD);
   5597    if (1) test_cmle_zero_4s_4s(TyS);
   5598    if (1) test_cmle_zero_2s_2s(TyS);
   5599    if (1) test_cmle_zero_8h_8h(TyH);
   5600    if (1) test_cmle_zero_4h_4h(TyH);
   5601    if (1) test_cmle_zero_16b_16b(TyB);
   5602    if (1) test_cmle_zero_8b_8b(TyB);
   5603    if (1) test_cmlt_zero_2d_2d(TyD);
   5604    if (1) test_cmlt_zero_4s_4s(TyS);
   5605    if (1) test_cmlt_zero_2s_2s(TyS);
   5606    if (1) test_cmlt_zero_8h_8h(TyH);
   5607    if (1) test_cmlt_zero_4h_4h(TyH);
   5608    if (1) test_cmlt_zero_16b_16b(TyB);
   5609    if (1) test_cmlt_zero_8b_8b(TyB);
   5610 
   5611    // cnt      16b,8b (population count per byte)
   5612    if (1) test_cnt_16b_16b(TyB);
   5613    if (1) test_cnt_8b_8b(TyB);
   5614 
   5615    // dup      d,s,h,b (vec elem to scalar)
   5616    if (1) test_dup_d_d0(TyD);
   5617    if (1) test_dup_d_d1(TyD);
   5618    if (1) test_dup_s_s0(TyS);
   5619    if (1) test_dup_s_s3(TyS);
   5620    if (1) test_dup_h_h0(TyH);
   5621    if (1) test_dup_h_h6(TyH);
   5622    if (1) test_dup_b_b0(TyB);
   5623    if (1) test_dup_b_b13(TyB);
   5624 
   5625    // dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   5626    if (1) test_dup_2d_d0(TyD);
   5627    if (1) test_dup_2d_d1(TyD);
   5628    if (1) test_dup_4s_s0(TyS);
   5629    if (1) test_dup_4s_s3(TyS);
   5630    if (1) test_dup_2s_s0(TyS);
   5631    if (1) test_dup_2s_s2(TyS);
   5632    if (1) test_dup_8h_h0(TyH);
   5633    if (1) test_dup_8h_h6(TyH);
   5634    if (1) test_dup_4h_h1(TyH);
   5635    if (1) test_dup_4h_h5(TyH);
   5636    if (1) test_dup_16b_b2(TyB);
   5637    if (1) test_dup_16b_b12(TyB);
   5638    if (1) test_dup_8b_b3(TyB);
   5639    if (1) test_dup_8b_b13(TyB);
   5640 
   5641    // dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   5642    if (1) test_dup_2d_x(TyD);
   5643    if (1) test_dup_4s_w(TyS);
   5644    if (1) test_dup_2s_w(TyS);
   5645    if (1) test_dup_8h_w(TyH);
   5646    if (1) test_dup_4h_w(TyH);
   5647    if (1) test_dup_16b_w(TyB);
   5648    if (1) test_dup_8b_w(TyB);
   5649 
   5650    // ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   5651    if (1) test_ext_16b_16b_16b_0x0(TyB);
   5652    if (1) test_ext_16b_16b_16b_0x1(TyB);
   5653    if (1) test_ext_16b_16b_16b_0x2(TyB);
   5654    if (1) test_ext_16b_16b_16b_0x3(TyB);
   5655    if (1) test_ext_16b_16b_16b_0x4(TyB);
   5656    if (1) test_ext_16b_16b_16b_0x5(TyB);
   5657    if (1) test_ext_16b_16b_16b_0x6(TyB);
   5658    if (1) test_ext_16b_16b_16b_0x7(TyB);
   5659    if (1) test_ext_16b_16b_16b_0x8(TyB);
   5660    if (1) test_ext_16b_16b_16b_0x9(TyB);
   5661    if (1) test_ext_16b_16b_16b_0xA(TyB);
   5662    if (1) test_ext_16b_16b_16b_0xB(TyB);
   5663    if (1) test_ext_16b_16b_16b_0xC(TyB);
   5664    if (1) test_ext_16b_16b_16b_0xD(TyB);
   5665    if (1) test_ext_16b_16b_16b_0xE(TyB);
   5666    if (1) test_ext_16b_16b_16b_0xF(TyB);
   5667    if (1) test_ext_8b_8b_8b_0x0(TyB);
   5668    if (1) test_ext_8b_8b_8b_0x1(TyB);
   5669    if (1) test_ext_8b_8b_8b_0x2(TyB);
   5670    if (1) test_ext_8b_8b_8b_0x3(TyB);
   5671    if (1) test_ext_8b_8b_8b_0x4(TyB);
   5672    if (1) test_ext_8b_8b_8b_0x5(TyB);
   5673    if (1) test_ext_8b_8b_8b_0x6(TyB);
   5674    if (1) test_ext_8b_8b_8b_0x7(TyB);
   5675 
   5676    // ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   5677    if (1) test_ins_d0_d0(TyD);
   5678    if (1) test_ins_d0_d1(TyD);
   5679    if (1) test_ins_d1_d0(TyD);
   5680    if (1) test_ins_d1_d1(TyD);
   5681    if (1) test_ins_s0_s2(TyS);
   5682    if (1) test_ins_s3_s0(TyS);
   5683    if (1) test_ins_s2_s1(TyS);
   5684    if (1) test_ins_s1_s3(TyS);
   5685    if (1) test_ins_h0_h6(TyH);
   5686    if (1) test_ins_h7_h0(TyH);
   5687    if (1) test_ins_h6_h1(TyH);
   5688    if (1) test_ins_h1_h7(TyH);
   5689    if (1) test_ins_b0_b14(TyB);
   5690    if (1) test_ins_b15_b8(TyB);
   5691    if (1) test_ins_b13_b9(TyB);
   5692    if (1) test_ins_b5_b12(TyB);
   5693 
   5694    // ins      d[]_x, s[]_w, h[]_w, b[]_w
   5695    if (1) test_INS_general();
   5696 
   5697    // mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5698    // mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5699    // mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5700    if (1) test_mla_4s_4s_s0(TyS);
   5701    if (1) test_mla_4s_4s_s3(TyS);
   5702    if (1) test_mla_2s_2s_s0(TyS);
   5703    if (1) test_mla_2s_2s_s3(TyS);
   5704    if (1) test_mla_8h_8h_h1(TyH);
   5705    if (1) test_mla_8h_8h_h5(TyH);
   5706    if (1) test_mla_4h_4h_h2(TyH);
   5707    if (1) test_mla_4h_4h_h7(TyH);
   5708    if (1) test_mls_4s_4s_s0(TyS);
   5709    if (1) test_mls_4s_4s_s3(TyS);
   5710    if (1) test_mls_2s_2s_s0(TyS);
   5711    if (1) test_mls_2s_2s_s3(TyS);
   5712    if (1) test_mls_8h_8h_h1(TyH);
   5713    if (1) test_mls_8h_8h_h5(TyH);
   5714    if (1) test_mls_4h_4h_h2(TyH);
   5715    if (1) test_mls_4h_4h_h7(TyH);
   5716    if (1) test_mul_4s_4s_s0(TyS);
   5717    if (1) test_mul_4s_4s_s3(TyS);
   5718    if (1) test_mul_2s_2s_s0(TyS);
   5719    if (1) test_mul_2s_2s_s3(TyS);
   5720    if (1) test_mul_8h_8h_h1(TyH);
   5721    if (1) test_mul_8h_8h_h5(TyH);
   5722    if (1) test_mul_4h_4h_h2(TyH);
   5723    if (1) test_mul_4h_4h_h7(TyH);
   5724 
   5725    // mla   4s,2s,8h,4h,16b,8b
   5726    // mls   4s,2s,8h,4h,16b,8b
   5727    // mul   4s,2s,8h,4h,16b,8b
   5728    if (1) test_mla_4s_4s_4s(TyS);
   5729    if (1) test_mla_2s_2s_2s(TyS);
   5730    if (1) test_mla_8h_8h_8h(TyH);
   5731    if (1) test_mla_4h_4h_4h(TyH);
   5732    if (1) test_mla_16b_16b_16b(TyB);
   5733    if (1) test_mla_8b_8b_8b(TyB);
   5734    if (1) test_mls_4s_4s_4s(TyS);
   5735    if (1) test_mls_2s_2s_2s(TyS);
   5736    if (1) test_mls_8h_8h_8h(TyH);
   5737    if (1) test_mls_4h_4h_4h(TyH);
   5738    if (1) test_mls_16b_16b_16b(TyB);
   5739    if (1) test_mls_8b_8b_8b(TyB);
   5740    if (1) test_mul_4s_4s_4s(TyS);
   5741    if (1) test_mul_2s_2s_2s(TyS);
   5742    if (1) test_mul_8h_8h_8h(TyH);
   5743    if (1) test_mul_4h_4h_4h(TyH);
   5744    if (1) test_mul_16b_16b_16b(TyB);
   5745    if (1) test_mul_8b_8b_8b(TyB);
   5746 
   5747    // Some of these movi and mvni cases are similar to orr and bic
   5748    // cases with immediates.  Maybe they should be moved together.
   5749    // movi  16b,8b   #imm8, LSL #0
   5750    if (1) test_movi_16b_0x9C_lsl0(TyB);
   5751    if (1) test_movi_8b_0x8B_lsl0(TyB);
   5752 
   5753    // movi  8h,4h    #imm8, LSL #0 or 8
   5754    // mvni  8h,4h    #imm8, LSL #0 or 8
   5755    if (1) test_movi_8h_0x5A_lsl0(TyH);
   5756    if (1) test_movi_8h_0xA5_lsl8(TyH);
   5757    if (1) test_movi_4h_0x5A_lsl0(TyH);
   5758    if (1) test_movi_4h_0xA5_lsl8(TyH);
   5759    if (1) test_mvni_8h_0x5A_lsl0(TyH);
   5760    if (1) test_mvni_8h_0xA5_lsl8(TyH);
   5761    if (1) test_mvni_4h_0x5A_lsl0(TyH);
   5762    if (1) test_mvni_4h_0xA5_lsl8(TyH);
   5763 
   5764    // movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   5765    // mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   5766    if (1) test_movi_4s_0x5A_lsl0(TyS);
   5767    if (1) test_movi_4s_0x6B_lsl8(TyS);
   5768    if (1) test_movi_4s_0x49_lsl16(TyS);
   5769    if (1) test_movi_4s_0x3D_lsl24(TyS);
   5770    if (1) test_movi_2s_0x5A_lsl0(TyS);
   5771    if (1) test_movi_2s_0x6B_lsl8(TyS);
   5772    if (1) test_movi_2s_0x49_lsl16(TyS);
   5773    if (1) test_movi_2s_0x3D_lsl24(TyS);
   5774    if (1) test_mvni_4s_0x5A_lsl0(TyS);
   5775    if (1) test_mvni_4s_0x6B_lsl8(TyS);
   5776    if (1) test_mvni_4s_0x49_lsl16(TyS);
   5777    if (1) test_mvni_4s_0x3D_lsl24(TyS);
   5778    if (1) test_mvni_2s_0x5A_lsl0(TyS);
   5779    if (1) test_mvni_2s_0x6B_lsl8(TyS);
   5780    if (1) test_mvni_2s_0x49_lsl16(TyS);
   5781    if (1) test_mvni_2s_0x3D_lsl24(TyS);
   5782 
   5783    // movi  4s,2s    #imm8, MSL #8 or 16
   5784    // mvni  4s,2s    #imm8, MSL #8 or 16
   5785    if (1) test_movi_4s_0x6B_msl8(TyS);
   5786    if (1) test_movi_4s_0x94_msl16(TyS);
   5787    if (1) test_movi_2s_0x7A_msl8(TyS);
   5788    if (1) test_movi_2s_0xA5_msl16(TyS);
   5789    if (1) test_mvni_4s_0x6B_msl8(TyS);
   5790    if (1) test_mvni_4s_0x94_msl16(TyS);
   5791    if (1) test_mvni_2s_0x7A_msl8(TyS);
   5792    if (1) test_mvni_2s_0xA5_msl16(TyS);
   5793 
   5794    // movi  d,       #imm64
   5795    // movi  2d,      #imm64
   5796    if (1) test_movi_d_0xA5(TyD);
   5797    if (1) test_movi_2d_0xB4(TyD);
   5798 
   5799    // not   16b,8b
   5800    if (1) test_not_16b_16b(TyB);
   5801    if (1) test_not_8b_8b(TyB);
   5802 
   5803    // pmul  16b,8b
   5804    if (1) test_pmul_16b_16b_16b(TyB);
   5805    if (1) test_pmul_8b_8b_8b(TyB);
   5806 
   5807    // pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1q_2d_2d
   5808    if (1) test_pmull_8h_8b_8b(TyB);
   5809    if (1) test_pmull2_8h_16b_16b(TyB);
   5810    //if (0) test_pmull_1q_1d_1d(TyD);
   5811    //if (0) test_pmull_1q_2d_2d(TyD);
   5812 
   5813    // rbit    16b,8b
   5814    // rev16   16b,8b
   5815    // rev32   16b,8b,8h,4h
   5816    // rev64   16b,8b,8h,4h,4s,2s
   5817    if (1) test_rbit_16b_16b(TyB);
   5818    if (1) test_rbit_8b_8b(TyB);
   5819    if (1) test_rev16_16b_16b(TyB);
   5820    if (1) test_rev16_8b_8b(TyB);
   5821    if (1) test_rev32_16b_16b(TyB);
   5822    if (1) test_rev32_8b_8b(TyB);
   5823    if (1) test_rev32_8h_8h(TyH);
   5824    if (1) test_rev32_4h_4h(TyH);
   5825    if (1) test_rev64_16b_16b(TyB);
   5826    if (1) test_rev64_8b_8b(TyB);
   5827    if (1) test_rev64_8h_8h(TyH);
   5828    if (1) test_rev64_4h_4h(TyH);
   5829    if (1) test_rev64_4s_4s(TyS);
   5830    if (1) test_rev64_2s_2s(TyS);
   5831 
   5832    // saba      16b,8b,8h,4h,4s,2s
   5833    // uaba      16b,8b,8h,4h,4s,2s
   5834    if (1) test_saba_4s_4s_4s(TyS);
   5835    if (1) test_saba_2s_2s_2s(TyS);
   5836    if (1) test_saba_8h_8h_8h(TyH);
   5837    if (1) test_saba_4h_4h_4h(TyH);
   5838    if (1) test_saba_16b_16b_16b(TyB);
   5839    if (1) test_saba_8b_8b_8b(TyB);
   5840    if (1) test_uaba_4s_4s_4s(TyS);
   5841    if (1) test_uaba_2s_2s_2s(TyS);
   5842    if (1) test_uaba_8h_8h_8h(TyH);
   5843    if (1) test_uaba_4h_4h_4h(TyH);
   5844    if (1) test_uaba_16b_16b_16b(TyB);
   5845    if (1) test_uaba_8b_8b_8b(TyB);
   5846 
   5847    // sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5848    // uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5849    if (1) test_sabal_2d_2s_2s(TyS);
   5850    if (1) test_sabal2_2d_4s_4s(TyS);
   5851    if (1) test_sabal_4s_4h_4h(TyH);
   5852    if (1) test_sabal2_4s_8h_8h(TyH);
   5853    if (1) test_sabal_8h_8b_8b(TyB);
   5854    if (1) test_sabal2_8h_16b_16b(TyB);
   5855    if (1) test_uabal_2d_2s_2s(TyS);
   5856    if (1) test_uabal2_2d_4s_4s(TyS);
   5857    if (1) test_uabal_4s_4h_4h(TyH);
   5858    if (1) test_uabal2_4s_8h_8h(TyH);
   5859    if (1) test_uabal_8h_8b_8b(TyB);
   5860    if (1) test_uabal2_8h_16b_16b(TyB);
   5861 
   5862    // sabd      16b,8b,8h,4h,4s,2s
   5863    // uabd      16b,8b,8h,4h,4s,2s
   5864    if (1) test_sabd_4s_4s_4s(TyS);
   5865    if (1) test_sabd_2s_2s_2s(TyS);
   5866    if (1) test_sabd_8h_8h_8h(TyH);
   5867    if (1) test_sabd_4h_4h_4h(TyH);
   5868    if (1) test_sabd_16b_16b_16b(TyB);
   5869    if (1) test_sabd_8b_8b_8b(TyB);
   5870    if (1) test_uabd_4s_4s_4s(TyS);
   5871    if (1) test_uabd_2s_2s_2s(TyS);
   5872    if (1) test_uabd_8h_8h_8h(TyH);
   5873    if (1) test_uabd_4h_4h_4h(TyH);
   5874    if (1) test_uabd_16b_16b_16b(TyB);
   5875    if (1) test_uabd_8b_8b_8b(TyB);
   5876 
   5877    // sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5878    // uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5879    if (1) test_sabdl_2d_2s_2s(TyS);
   5880    if (1) test_sabdl2_2d_4s_4s(TyS);
   5881    if (1) test_sabdl_4s_4h_4h(TyH);
   5882    if (1) test_sabdl2_4s_8h_8h(TyH);
   5883    if (1) test_sabdl_8h_8b_8b(TyB);
   5884    if (1) test_sabdl2_8h_16b_16b(TyB);
   5885    if (1) test_uabdl_2d_2s_2s(TyS);
   5886    if (1) test_uabdl2_2d_4s_4s(TyS);
   5887    if (1) test_uabdl_4s_4h_4h(TyH);
   5888    if (1) test_uabdl2_4s_8h_8h(TyH);
   5889    if (1) test_uabdl_8h_8b_8b(TyB);
   5890    if (1) test_uabdl2_8h_16b_16b(TyB);
   5891 
   5892    // sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5893    // uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5894    if (1) test_sadalp_1d_2s(TyS);
   5895    if (1) test_sadalp_2d_4s(TyS);
   5896    if (1) test_sadalp_2s_4h(TyH);
   5897    if (1) test_sadalp_4s_8h(TyH);
   5898    if (1) test_sadalp_4h_8b(TyB);
   5899    if (1) test_sadalp_8h_16b(TyB);
   5900    if (1) test_uadalp_1d_2s(TyS);
   5901    if (1) test_uadalp_2d_4s(TyS);
   5902    if (1) test_uadalp_2s_4h(TyH);
   5903    if (1) test_uadalp_4s_8h(TyH);
   5904    if (1) test_uadalp_4h_8b(TyB);
   5905    if (1) test_uadalp_8h_16b(TyB);
   5906 
   5907    // saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5908    // uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5909    // ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5910    // usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5911    if (1) test_saddl_2d_2s_2s(TyS);
   5912    if (1) test_saddl2_2d_4s_4s(TyS);
   5913    if (1) test_saddl_4s_4h_4h(TyH);
   5914    if (1) test_saddl2_4s_8h_8h(TyH);
   5915    if (1) test_saddl_8h_8b_8b(TyB);
   5916    if (1) test_saddl2_8h_16b_16b(TyB);
   5917    if (1) test_uaddl_2d_2s_2s(TyS);
   5918    if (1) test_uaddl2_2d_4s_4s(TyS);
   5919    if (1) test_uaddl_4s_4h_4h(TyH);
   5920    if (1) test_uaddl2_4s_8h_8h(TyH);
   5921    if (1) test_uaddl_8h_8b_8b(TyB);
   5922    if (1) test_uaddl2_8h_16b_16b(TyB);
   5923    if (1) test_ssubl_2d_2s_2s(TyS);
   5924    if (1) test_ssubl2_2d_4s_4s(TyS);
   5925    if (1) test_ssubl_4s_4h_4h(TyH);
   5926    if (1) test_ssubl2_4s_8h_8h(TyH);
   5927    if (1) test_ssubl_8h_8b_8b(TyB);
   5928    if (1) test_ssubl2_8h_16b_16b(TyB);
   5929    if (1) test_usubl_2d_2s_2s(TyS);
   5930    if (1) test_usubl2_2d_4s_4s(TyS);
   5931    if (1) test_usubl_4s_4h_4h(TyH);
   5932    if (1) test_usubl2_4s_8h_8h(TyH);
   5933    if (1) test_usubl_8h_8b_8b(TyB);
   5934    if (1) test_usubl2_8h_16b_16b(TyB);
   5935 
   5936    // saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5937    // uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5938    if (1) test_saddlp_1d_2s(TyS);
   5939    if (1) test_saddlp_2d_4s(TyS);
   5940    if (1) test_saddlp_2s_4h(TyH);
   5941    if (1) test_saddlp_4s_8h(TyH);
   5942    if (1) test_saddlp_4h_8b(TyB);
   5943    if (1) test_saddlp_8h_16b(TyB);
   5944    if (1) test_uaddlp_1d_2s(TyS);
   5945    if (1) test_uaddlp_2d_4s(TyS);
   5946    if (1) test_uaddlp_2s_4h(TyH);
   5947    if (1) test_uaddlp_4s_8h(TyH);
   5948    if (1) test_uaddlp_4h_8b(TyB);
   5949    if (1) test_uaddlp_8h_16b(TyB);
   5950 
   5951    // saddlv    h_16b/8b, s_8h/4h, d_4s
   5952    // uaddlv    h_16b/8b, s_8h/4h, d_4s
   5953    if (1) test_saddlv_h_16b(TyB);
   5954    if (1) test_saddlv_h_8b(TyB);
   5955    if (1) test_saddlv_s_8h(TyH);
   5956    if (1) test_saddlv_s_4h(TyH);
   5957    if (1) test_saddlv_d_4s(TyH);
   5958    if (1) test_uaddlv_h_16b(TyB);
   5959    if (1) test_uaddlv_h_8b(TyB);
   5960    if (1) test_uaddlv_s_8h(TyH);
   5961    if (1) test_uaddlv_s_4h(TyH);
   5962    if (1) test_uaddlv_d_4s(TyH);
   5963 
   5964    // saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5965    // uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5966    // ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5967    // usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5968    if (1) test_saddw2_8h_8h_16b(TyB);
   5969    if (1) test_saddw_8h_8h_8b(TyB);
   5970    if (1) test_saddw2_4s_4s_8h(TyH);
   5971    if (1) test_saddw_4s_4s_4h(TyH);
   5972    if (1) test_saddw2_2d_2d_4s(TyS);
   5973    if (1) test_saddw_2d_2d_2s(TyS);
   5974    if (1) test_uaddw2_8h_8h_16b(TyB);
   5975    if (1) test_uaddw_8h_8h_8b(TyB);
   5976    if (1) test_uaddw2_4s_4s_8h(TyH);
   5977    if (1) test_uaddw_4s_4s_4h(TyH);
   5978    if (1) test_uaddw2_2d_2d_4s(TyS);
   5979    if (1) test_uaddw_2d_2d_2s(TyS);
   5980    if (1) test_ssubw2_8h_8h_16b(TyB);
   5981    if (1) test_ssubw_8h_8h_8b(TyB);
   5982    if (1) test_ssubw2_4s_4s_8h(TyH);
   5983    if (1) test_ssubw_4s_4s_4h(TyH);
   5984    if (1) test_ssubw2_2d_2d_4s(TyS);
   5985    if (1) test_ssubw_2d_2d_2s(TyS);
   5986    if (1) test_usubw2_8h_8h_16b(TyB);
   5987    if (1) test_usubw_8h_8h_8b(TyB);
   5988    if (1) test_usubw2_4s_4s_8h(TyH);
   5989    if (1) test_usubw_4s_4s_4h(TyH);
   5990    if (1) test_usubw2_2d_2d_4s(TyS);
   5991    if (1) test_usubw_2d_2d_2s(TyS);
   5992 
   5993    // shadd        16b,8b,8h,4h,4s,2s
   5994    // uhadd        16b,8b,8h,4h,4s,2s
   5995    // shsub        16b,8b,8h,4h,4s,2s
   5996    // uhsub        16b,8b,8h,4h,4s,2s
   5997    if (1) test_shadd_4s_4s_4s(TyS);
   5998    if (1) test_shadd_2s_2s_2s(TyS);
   5999    if (1) test_shadd_8h_8h_8h(TyH);
   6000    if (1) test_shadd_4h_4h_4h(TyH);
   6001    if (1) test_shadd_16b_16b_16b(TyB);
   6002    if (1) test_shadd_8b_8b_8b(TyB);
   6003    if (1) test_uhadd_4s_4s_4s(TyS);
   6004    if (1) test_uhadd_2s_2s_2s(TyS);
   6005    if (1) test_uhadd_8h_8h_8h(TyH);
   6006    if (1) test_uhadd_4h_4h_4h(TyH);
   6007    if (1) test_uhadd_16b_16b_16b(TyB);
   6008    if (1) test_uhadd_8b_8b_8b(TyB);
   6009    if (1) test_shsub_4s_4s_4s(TyS);
   6010    if (1) test_shsub_2s_2s_2s(TyS);
   6011    if (1) test_shsub_8h_8h_8h(TyH);
   6012    if (1) test_shsub_4h_4h_4h(TyH);
   6013    if (1) test_shsub_16b_16b_16b(TyB);
   6014    if (1) test_shsub_8b_8b_8b(TyB);
   6015    if (1) test_uhsub_4s_4s_4s(TyS);
   6016    if (1) test_uhsub_2s_2s_2s(TyS);
   6017    if (1) test_uhsub_8h_8h_8h(TyH);
   6018    if (1) test_uhsub_4h_4h_4h(TyH);
   6019    if (1) test_uhsub_16b_16b_16b(TyB);
   6020    if (1) test_uhsub_8b_8b_8b(TyB);
   6021 
   6022    // shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   6023    if (1) test_shll_8h_8b_8(TyB);
   6024    if (1) test_shll2_8h_16b_8(TyB);
   6025    if (1) test_shll_4s_4h_16(TyH);
   6026    if (1) test_shll2_4s_8h_16(TyH);
   6027    if (1) test_shll_2d_2s_32(TyS);
   6028    if (1) test_shll2_2d_4s_32(TyS);
   6029 
   6030    // shrn{2}      2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6031    // rshrn{2}     2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6032    if (1) test_shrn_2s_2d_1(TyD);
   6033    if (1) test_shrn_2s_2d_32(TyD);
   6034    if (1) test_shrn2_4s_2d_1(TyD);
   6035    if (1) test_shrn2_4s_2d_32(TyD);
   6036    if (1) test_shrn_4h_4s_1(TyS);
   6037    if (1) test_shrn_4h_4s_16(TyS);
   6038    if (1) test_shrn2_8h_4s_1(TyS);
   6039    if (1) test_shrn2_8h_4s_16(TyS);
   6040    if (1) test_shrn_8b_8h_1(TyH);
   6041    if (1) test_shrn_8b_8h_8(TyH);
   6042    if (1) test_shrn2_16b_8h_1(TyH);
   6043    if (1) test_shrn2_16b_8h_8(TyH);
   6044    if (1) test_rshrn_2s_2d_1(TyD);
   6045    if (1) test_rshrn_2s_2d_32(TyD);
   6046    if (1) test_rshrn2_4s_2d_1(TyD);
   6047    if (1) test_rshrn2_4s_2d_32(TyD);
   6048    if (1) test_rshrn_4h_4s_1(TyS);
   6049    if (1) test_rshrn_4h_4s_16(TyS);
   6050    if (1) test_rshrn2_8h_4s_1(TyS);
   6051    if (1) test_rshrn2_8h_4s_16(TyS);
   6052    if (1) test_rshrn_8b_8h_1(TyH);
   6053    if (1) test_rshrn_8b_8h_8(TyH);
   6054    if (1) test_rshrn2_16b_8h_1(TyH);
   6055    if (1) test_rshrn2_16b_8h_8(TyH);
   6056 
   6057    // sli          d_#imm
   6058    // sri          d_#imm
   6059    if (1) test_sli_d_d_0(TyD);
   6060    if (1) test_sli_d_d_32(TyD);
   6061    if (1) test_sli_d_d_63(TyD);
   6062    if (1) test_sri_d_d_1(TyD);
   6063    if (1) test_sri_d_d_33(TyD);
   6064    if (1) test_sri_d_d_64(TyD);
   6065 
   6066    // sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   6067    // sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   6068    if (1) test_sli_2d_2d_0(TyD);
   6069    if (1) test_sli_2d_2d_32(TyD);
   6070    if (1) test_sli_2d_2d_63(TyD);
   6071    if (1) test_sli_4s_4s_0(TyS);
   6072    if (1) test_sli_4s_4s_16(TyS);
   6073    if (1) test_sli_4s_4s_31(TyS);
   6074    if (1) test_sli_2s_2s_0(TyS);
   6075    if (1) test_sli_2s_2s_16(TyS);
   6076    if (1) test_sli_2s_2s_31(TyS);
   6077    if (1) test_sli_8h_8h_0(TyH);
   6078    if (1) test_sli_8h_8h_8(TyH);
   6079    if (1) test_sli_8h_8h_15(TyH);
   6080    if (1) test_sli_4h_4h_0(TyH);
   6081    if (1) test_sli_4h_4h_8(TyH);
   6082    if (1) test_sli_4h_4h_15(TyH);
   6083    if (1) test_sli_16b_16b_0(TyB);
   6084    if (1) test_sli_16b_16b_3(TyB);
   6085    if (1) test_sli_16b_16b_7(TyB);
   6086    if (1) test_sli_8b_8b_0(TyB);
   6087    if (1) test_sli_8b_8b_3(TyB);
   6088    if (1) test_sli_8b_8b_7(TyB);
   6089    if (1) test_sri_2d_2d_1(TyD);
   6090    if (1) test_sri_2d_2d_33(TyD);
   6091    if (1) test_sri_2d_2d_64(TyD);
   6092    if (1) test_sri_4s_4s_1(TyS);
   6093    if (1) test_sri_4s_4s_17(TyS);
   6094    if (1) test_sri_4s_4s_32(TyS);
   6095    if (1) test_sri_2s_2s_1(TyS);
   6096    if (1) test_sri_2s_2s_17(TyS);
   6097    if (1) test_sri_2s_2s_32(TyS);
   6098    if (1) test_sri_8h_8h_1(TyH);
   6099    if (1) test_sri_8h_8h_8(TyH);
   6100    if (1) test_sri_8h_8h_16(TyH);
   6101    if (1) test_sri_4h_4h_1(TyH);
   6102    if (1) test_sri_4h_4h_8(TyH);
   6103    if (1) test_sri_4h_4h_16(TyH);
   6104    if (1) test_sri_16b_16b_1(TyB);
   6105    if (1) test_sri_16b_16b_4(TyB);
   6106    if (1) test_sri_16b_16b_8(TyB);
   6107    if (1) test_sri_8b_8b_1(TyB);
   6108    if (1) test_sri_8b_8b_4(TyB);
   6109    if (1) test_sri_8b_8b_8(TyB);
   6110 
   6111    // smax         4s,2s,8h,4h,16b,8b
   6112    // umax         4s,2s,8h,4h,16b,8b
   6113    // smin         4s,2s,8h,4h,16b,8b
   6114    // umin         4s,2s,8h,4h,16b,8b
   6115    if (1) test_smax_4s_4s_4s(TyS);
   6116    if (1) test_smax_2s_2s_2s(TyS);
   6117    if (1) test_smax_8h_8h_8h(TyH);
   6118    if (1) test_smax_4h_4h_4h(TyH);
   6119    if (1) test_smax_16b_16b_16b(TyB);
   6120    if (1) test_smax_8b_8b_8b(TyB);
   6121    if (1) test_umax_4s_4s_4s(TyS);
   6122    if (1) test_umax_2s_2s_2s(TyS);
   6123    if (1) test_umax_8h_8h_8h(TyH);
   6124    if (1) test_umax_4h_4h_4h(TyH);
   6125    if (1) test_umax_16b_16b_16b(TyB);
   6126    if (1) test_umax_8b_8b_8b(TyB);
   6127    if (1) test_smin_4s_4s_4s(TyS);
   6128    if (1) test_smin_2s_2s_2s(TyS);
   6129    if (1) test_smin_8h_8h_8h(TyH);
   6130    if (1) test_smin_4h_4h_4h(TyH);
   6131    if (1) test_smin_16b_16b_16b(TyB);
   6132    if (1) test_smin_8b_8b_8b(TyB);
   6133    if (1) test_umin_4s_4s_4s(TyS);
   6134    if (1) test_umin_2s_2s_2s(TyS);
   6135    if (1) test_umin_8h_8h_8h(TyH);
   6136    if (1) test_umin_4h_4h_4h(TyH);
   6137    if (1) test_umin_16b_16b_16b(TyB);
   6138    if (1) test_umin_8b_8b_8b(TyB);
   6139 
   6140    // smaxp        4s,2s,8h,4h,16b,8b
   6141    // umaxp        4s,2s,8h,4h,16b,8b
   6142    // sminp        4s,2s,8h,4h,16b,8b
   6143    // uminp        4s,2s,8h,4h,16b,8b
   6144    if (1) test_smaxp_4s_4s_4s(TyS);
   6145    if (1) test_smaxp_2s_2s_2s(TyS);
   6146    if (1) test_smaxp_8h_8h_8h(TyH);
   6147    if (1) test_smaxp_4h_4h_4h(TyH);
   6148    if (1) test_smaxp_16b_16b_16b(TyB);
   6149    if (1) test_smaxp_8b_8b_8b(TyB);
   6150    if (1) test_umaxp_4s_4s_4s(TyS);
   6151    if (1) test_umaxp_2s_2s_2s(TyS);
   6152    if (1) test_umaxp_8h_8h_8h(TyH);
   6153    if (1) test_umaxp_4h_4h_4h(TyH);
   6154    if (1) test_umaxp_16b_16b_16b(TyB);
   6155    if (1) test_umaxp_8b_8b_8b(TyB);
   6156    if (1) test_sminp_4s_4s_4s(TyS);
   6157    if (1) test_sminp_2s_2s_2s(TyS);
   6158    if (1) test_sminp_8h_8h_8h(TyH);
   6159    if (1) test_sminp_4h_4h_4h(TyH);
   6160    if (1) test_sminp_16b_16b_16b(TyB);
   6161    if (1) test_sminp_8b_8b_8b(TyB);
   6162    if (1) test_uminp_4s_4s_4s(TyS);
   6163    if (1) test_uminp_2s_2s_2s(TyS);
   6164    if (1) test_uminp_8h_8h_8h(TyH);
   6165    if (1) test_uminp_4h_4h_4h(TyH);
   6166    if (1) test_uminp_16b_16b_16b(TyB);
   6167    if (1) test_uminp_8b_8b_8b(TyB);
   6168 
   6169    // smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6170    // umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6171    // sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6172    // uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6173    if (1) test_SMAXV();
   6174    if (1) test_UMAXV();
   6175    if (1) test_SMINV();
   6176    if (1) test_UMINV();
   6177 
   6178    // smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6179    // umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6180    // smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6181    // umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6182    // smull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6183    // umull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6184    if (1) test_smlal_2d_2s_s0(TyS);
   6185    if (1) test_smlal_2d_2s_s3(TyS);
   6186    if (1) test_smlal2_2d_4s_s1(TyS);
   6187    if (1) test_smlal2_2d_4s_s2(TyS);
   6188    if (1) test_smlal_4s_4h_h0(TyH);
   6189    if (1) test_smlal_4s_4h_h7(TyH);
   6190    if (1) test_smlal2_4s_8h_h1(TyH);
   6191    if (1) test_smlal2_4s_8h_h4(TyH);
   6192    if (1) test_umlal_2d_2s_s0(TyS);
   6193    if (1) test_umlal_2d_2s_s3(TyS);
   6194    if (1) test_umlal2_2d_4s_s1(TyS);
   6195    if (1) test_umlal2_2d_4s_s2(TyS);
   6196    if (1) test_umlal_4s_4h_h0(TyH);
   6197    if (1) test_umlal_4s_4h_h7(TyH);
   6198    if (1) test_umlal2_4s_8h_h1(TyH);
   6199    if (1) test_umlal2_4s_8h_h4(TyH);
   6200    if (1) test_smlsl_2d_2s_s0(TyS);
   6201    if (1) test_smlsl_2d_2s_s3(TyS);
   6202    if (1) test_smlsl2_2d_4s_s1(TyS);
   6203    if (1) test_smlsl2_2d_4s_s2(TyS);
   6204    if (1) test_smlsl_4s_4h_h0(TyH);
   6205    if (1) test_smlsl_4s_4h_h7(TyH);
   6206    if (1) test_smlsl2_4s_8h_h1(TyH);
   6207    if (1) test_smlsl2_4s_8h_h4(TyH);
   6208    if (1) test_umlsl_2d_2s_s0(TyS);
   6209    if (1) test_umlsl_2d_2s_s3(TyS);
   6210    if (1) test_umlsl2_2d_4s_s1(TyS);
   6211    if (1) test_umlsl2_2d_4s_s2(TyS);
   6212    if (1) test_umlsl_4s_4h_h0(TyH);
   6213    if (1) test_umlsl_4s_4h_h7(TyH);
   6214    if (1) test_umlsl2_4s_8h_h1(TyH);
   6215    if (1) test_umlsl2_4s_8h_h4(TyH);
   6216    if (1) test_smull_2d_2s_s0(TyS);
   6217    if (1) test_smull_2d_2s_s3(TyS);
   6218    if (1) test_smull2_2d_4s_s1(TyS);
   6219    if (1) test_smull2_2d_4s_s2(TyS);
   6220    if (1) test_smull_4s_4h_h0(TyH);
   6221    if (1) test_smull_4s_4h_h7(TyH);
   6222    if (1) test_smull2_4s_8h_h1(TyH);
   6223    if (1) test_smull2_4s_8h_h4(TyH);
   6224    if (1) test_umull_2d_2s_s0(TyS);
   6225    if (1) test_umull_2d_2s_s3(TyS);
   6226    if (1) test_umull2_2d_4s_s1(TyS);
   6227    if (1) test_umull2_2d_4s_s2(TyS);
   6228    if (1) test_umull_4s_4h_h0(TyH);
   6229    if (1) test_umull_4s_4h_h7(TyH);
   6230    if (1) test_umull2_4s_8h_h1(TyH);
   6231    if (1) test_umull2_4s_8h_h4(TyH);
   6232 
   6233    // smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6234    // umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6235    // smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6236    // umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6237    // smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6238    // umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6239    if (1) test_smlal_2d_2s_2s(TyS);
   6240    if (1) test_smlal2_2d_4s_4s(TyS);
   6241    if (1) test_smlal_4s_4h_4h(TyH);
   6242    if (1) test_smlal2_4s_8h_8h(TyH);
   6243    if (1) test_smlal_8h_8b_8b(TyB);
   6244    if (1) test_smlal2_8h_16b_16b(TyB);
   6245    if (1) test_umlal_2d_2s_2s(TyS);
   6246    if (1) test_umlal2_2d_4s_4s(TyS);
   6247    if (1) test_umlal_4s_4h_4h(TyH);
   6248    if (1) test_umlal2_4s_8h_8h(TyH);
   6249    if (1) test_umlal_8h_8b_8b(TyB);
   6250    if (1) test_umlal2_8h_16b_16b(TyB);
   6251    if (1) test_smlsl_2d_2s_2s(TyS);
   6252    if (1) test_smlsl2_2d_4s_4s(TyS);
   6253    if (1) test_smlsl_4s_4h_4h(TyH);
   6254    if (1) test_smlsl2_4s_8h_8h(TyH);
   6255    if (1) test_smlsl_8h_8b_8b(TyB);
   6256    if (1) test_smlsl2_8h_16b_16b(TyB);
   6257    if (1) test_umlsl_2d_2s_2s(TyS);
   6258    if (1) test_umlsl2_2d_4s_4s(TyS);
   6259    if (1) test_umlsl_4s_4h_4h(TyH);
   6260    if (1) test_umlsl2_4s_8h_8h(TyH);
   6261    if (1) test_umlsl_8h_8b_8b(TyB);
   6262    if (1) test_umlsl2_8h_16b_16b(TyB);
   6263    if (1) test_smull_2d_2s_2s(TyS);
   6264    if (1) test_smull2_2d_4s_4s(TyS);
   6265    if (1) test_smull_4s_4h_4h(TyH);
   6266    if (1) test_smull2_4s_8h_8h(TyH);
   6267    if (1) test_smull_8h_8b_8b(TyB);
   6268    if (1) test_smull2_8h_16b_16b(TyB);
   6269    if (1) test_umull_2d_2s_2s(TyS);
   6270    if (1) test_umull2_2d_4s_4s(TyS);
   6271    if (1) test_umull_4s_4h_4h(TyH);
   6272    if (1) test_umull2_4s_8h_8h(TyH);
   6273    if (1) test_umull_8h_8b_8b(TyB);
   6274    if (1) test_umull2_8h_16b_16b(TyB);
   6275 
   6276    // smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   6277    // umov         w_b[], w_h[],               w_s[], x_d[]
   6278    if (1) test_umov_x_d0(TyD);
   6279    if (1) test_umov_x_d1(TyD);
   6280    if (1) test_umov_w_s0(TyS);
   6281    if (1) test_umov_w_s3(TyS);
   6282    if (1) test_umov_w_h0(TyH);
   6283    if (1) test_umov_w_h7(TyH);
   6284    if (1) test_umov_w_b0(TyB);
   6285    if (1) test_umov_w_b15(TyB);
   6286    if (1) test_smov_x_s0(TyS);
   6287    if (1) test_smov_x_s3(TyS);
   6288    if (1) test_smov_x_h0(TyH);
   6289    if (1) test_smov_x_h7(TyH);
   6290    if (1) test_smov_w_h0(TyH);
   6291    if (1) test_smov_w_h7(TyH);
   6292    if (1) test_smov_x_b0(TyB);
   6293    if (1) test_smov_x_b15(TyB);
   6294    if (1) test_smov_w_b0(TyB);
   6295    if (1) test_smov_w_b15(TyB);
   6296 
   6297    // sqabs        d,s,h,b
   6298    // sqneg        d,s,h,b
   6299    if (1) test_sqabs_d_d(TyD);
   6300    if (1) test_sqabs_s_s(TyS);
   6301    if (1) test_sqabs_h_h(TyH);
   6302    if (1) test_sqabs_b_b(TyB);
   6303    if (1) test_sqneg_d_d(TyD);
   6304    if (1) test_sqneg_s_s(TyS);
   6305    if (1) test_sqneg_h_h(TyH);
   6306    if (1) test_sqneg_b_b(TyB);
   6307 
   6308    // sqabs        2d,4s,2s,8h,4h,16b,8b
   6309    // sqneg        2d,4s,2s,8h,4h,16b,8b
   6310    if (1) test_sqabs_2d_2d(TyD);
   6311    if (1) test_sqabs_4s_4s(TyS);
   6312    if (1) test_sqabs_2s_2s(TyS);
   6313    if (1) test_sqabs_8h_8h(TyH);
   6314    if (1) test_sqabs_4h_4h(TyH);
   6315    if (1) test_sqabs_16b_16b(TyB);
   6316    if (1) test_sqabs_8b_8b(TyB);
   6317    if (1) test_sqneg_2d_2d(TyD);
   6318    if (1) test_sqneg_4s_4s(TyS);
   6319    if (1) test_sqneg_2s_2s(TyS);
   6320    if (1) test_sqneg_8h_8h(TyH);
   6321    if (1) test_sqneg_4h_4h(TyH);
   6322    if (1) test_sqneg_16b_16b(TyB);
   6323    if (1) test_sqneg_8b_8b(TyB);
   6324 
   6325    // sqadd        d,s,h,b
   6326    // uqadd        d,s,h,b
   6327    // sqsub        d,s,h,b
   6328    // uqsub        d,s,h,b
   6329    if (1) test_sqadd_d_d_d(TyD);
   6330    if (1) test_sqadd_s_s_s(TyS);
   6331    if (1) test_sqadd_h_h_h(TyH);
   6332    if (1) test_sqadd_b_b_b(TyB);
   6333    if (1) test_uqadd_d_d_d(TyD);
   6334    if (1) test_uqadd_s_s_s(TyS);
   6335    if (1) test_uqadd_h_h_h(TyH);
   6336    if (1) test_uqadd_b_b_b(TyB);
   6337    if (1) test_sqsub_d_d_d(TyD);
   6338    if (1) test_sqsub_s_s_s(TyS);
   6339    if (1) test_sqsub_h_h_h(TyH);
   6340    if (1) test_sqsub_b_b_b(TyB);
   6341    if (1) test_uqsub_d_d_d(TyD);
   6342    if (1) test_uqsub_s_s_s(TyS);
   6343    if (1) test_uqsub_h_h_h(TyH);
   6344    if (1) test_uqsub_b_b_b(TyB);
   6345 
   6346    // sqadd        2d,4s,2s,8h,4h,16b,8b
   6347    // uqadd        2d,4s,2s,8h,4h,16b,8b
   6348    // sqsub        2d,4s,2s,8h,4h,16b,8b
   6349    // uqsub        2d,4s,2s,8h,4h,16b,8b
   6350    if (1) test_sqadd_2d_2d_2d(TyD);
   6351    if (1) test_sqadd_4s_4s_4s(TyS);
   6352    if (1) test_sqadd_2s_2s_2s(TyS);
   6353    if (1) test_sqadd_8h_8h_8h(TyH);
   6354    if (1) test_sqadd_4h_4h_4h(TyH);
   6355    if (1) test_sqadd_16b_16b_16b(TyB);
   6356    if (1) test_sqadd_8b_8b_8b(TyB);
   6357    if (1) test_uqadd_2d_2d_2d(TyD);
   6358    if (1) test_uqadd_4s_4s_4s(TyS);
   6359    if (1) test_uqadd_2s_2s_2s(TyS);
   6360    if (1) test_uqadd_8h_8h_8h(TyH);
   6361    if (1) test_uqadd_4h_4h_4h(TyH);
   6362    if (1) test_uqadd_16b_16b_16b(TyB);
   6363    if (1) test_uqadd_8b_8b_8b(TyB);
   6364    if (1) test_sqsub_2d_2d_2d(TyD);
   6365    if (1) test_sqsub_4s_4s_4s(TyS);
   6366    if (1) test_sqsub_2s_2s_2s(TyS);
   6367    if (1) test_sqsub_8h_8h_8h(TyH);
   6368    if (1) test_sqsub_4h_4h_4h(TyH);
   6369    if (1) test_sqsub_16b_16b_16b(TyB);
   6370    if (1) test_sqsub_8b_8b_8b(TyB);
   6371    if (1) test_uqsub_2d_2d_2d(TyD);
   6372    if (1) test_uqsub_4s_4s_4s(TyS);
   6373    if (1) test_uqsub_2s_2s_2s(TyS);
   6374    if (1) test_uqsub_8h_8h_8h(TyH);
   6375    if (1) test_uqsub_4h_4h_4h(TyH);
   6376    if (1) test_uqsub_16b_16b_16b(TyB);
   6377    if (1) test_uqsub_8b_8b_8b(TyB);
   6378 
   6379    // sqdmlal      d_s_s[], s_h_h[]
   6380    // sqdmlsl      d_s_s[], s_h_h[]
   6381    // sqdmull      d_s_s[], s_h_h[]
   6382    if (1) test_sqdmlal_d_s_s0(TyS);
   6383    if (1) test_sqdmlal_d_s_s3(TyS);
   6384    if (1) test_sqdmlal_s_h_h1(TyH);
   6385    if (1) test_sqdmlal_s_h_h5(TyH);
   6386    if (1) test_sqdmlsl_d_s_s0(TyS);
   6387    if (1) test_sqdmlsl_d_s_s3(TyS);
   6388    if (1) test_sqdmlsl_s_h_h1(TyH);
   6389    if (1) test_sqdmlsl_s_h_h5(TyH);
   6390    if (1) test_sqdmull_d_s_s0(TyS);
   6391    if (1) test_sqdmull_d_s_s3(TyS);
   6392    if (1) test_sqdmull_s_h_h1(TyH);
   6393    if (1) test_sqdmull_s_h_h5(TyH);
   6394 
   6395    // sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6396    // sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6397    // sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   6398    if (1) test_sqdmlal_2d_2s_s0(TyS);
   6399    if (1) test_sqdmlal_2d_2s_s3(TyS);
   6400    if (1) test_sqdmlal2_2d_4s_s1(TyS);
   6401    if (1) test_sqdmlal2_2d_4s_s2(TyS);
   6402    if (1) test_sqdmlal_4s_4h_h0(TyH);
   6403    if (1) test_sqdmlal_4s_4h_h7(TyH);
   6404    if (1) test_sqdmlal2_4s_8h_h1(TyH);
   6405    if (1) test_sqdmlal2_4s_8h_h4(TyH);
   6406    if (1) test_sqdmlsl_2d_2s_s0(TyS);
   6407    if (1) test_sqdmlsl_2d_2s_s3(TyS);
   6408    if (1) test_sqdmlsl2_2d_4s_s1(TyS);
   6409    if (1) test_sqdmlsl2_2d_4s_s2(TyS);
   6410    if (1) test_sqdmlsl_4s_4h_h0(TyH);
   6411    if (1) test_sqdmlsl_4s_4h_h7(TyH);
   6412    if (1) test_sqdmlsl2_4s_8h_h1(TyH);
   6413    if (1) test_sqdmlsl2_4s_8h_h4(TyH);
   6414    if (1) test_sqdmull_2d_2s_s0(TyS);
   6415    if (1) test_sqdmull_2d_2s_s3(TyS);
   6416    if (1) test_sqdmull2_2d_4s_s1(TyS);
   6417    if (1) test_sqdmull2_2d_4s_s2(TyS);
   6418    if (1) test_sqdmull_4s_4h_h0(TyH);
   6419    if (1) test_sqdmull_4s_4h_h7(TyH);
   6420    if (1) test_sqdmull2_4s_8h_h1(TyH);
   6421    if (1) test_sqdmull2_4s_8h_h4(TyH);
   6422 
   6423    // sqdmlal      d_s_s, s_h_h
   6424    // sqdmlsl      d_s_s, s_h_h
   6425    // sqdmull      d_s_s, s_h_h
   6426    if (1) test_sqdmlal_d_s_s(TyS);
   6427    if (1) test_sqdmlal_s_h_h(TyH);
   6428    if (1) test_sqdmlsl_d_s_s(TyS);
   6429    if (1) test_sqdmlsl_s_h_h(TyH);
   6430    if (1) test_sqdmull_d_s_s(TyS);
   6431    if (1) test_sqdmull_s_h_h(TyH);
   6432 
   6433    // sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6434    // sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6435    // sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6436    if (1) test_sqdmlal_2d_2s_2s(TyS);
   6437    if (1) test_sqdmlal2_2d_4s_4s(TyS);
   6438    if (1) test_sqdmlal_4s_4h_4h(TyH);
   6439    if (1) test_sqdmlal2_4s_8h_8h(TyH);
   6440    if (1) test_sqdmlsl_2d_2s_2s(TyS);
   6441    if (1) test_sqdmlsl2_2d_4s_4s(TyS);
   6442    if (1) test_sqdmlsl_4s_4h_4h(TyH);
   6443    if (1) test_sqdmlsl2_4s_8h_8h(TyH);
   6444    if (1) test_sqdmull_2d_2s_2s(TyS);
   6445    if (1) test_sqdmull2_2d_4s_4s(TyS);
   6446    if (1) test_sqdmull_4s_4h_4h(TyH);
   6447    if (1) test_sqdmull2_4s_8h_8h(TyH);
   6448 
   6449    // sqdmulh      s_s_s[], h_h_h[]
   6450    // sqrdmulh     s_s_s[], h_h_h[]
   6451    if (1) test_sqdmulh_s_s_s1(TyS);
   6452    if (1) test_sqdmulh_s_s_s3(TyS);
   6453    if (1) test_sqdmulh_h_h_h2(TyH);
   6454    if (1) test_sqdmulh_h_h_h7(TyH);
   6455    if (1) test_sqrdmulh_s_s_s1(TyS);
   6456    if (1) test_sqrdmulh_s_s_s3(TyS);
   6457    if (1) test_sqrdmulh_h_h_h2(TyH);
   6458    if (1) test_sqrdmulh_h_h_h7(TyH);
   6459 
   6460    // sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6461    // sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6462    if (1) test_sqdmulh_4s_4s_s1(TyS);
   6463    if (1) test_sqdmulh_4s_4s_s3(TyS);
   6464    if (1) test_sqdmulh_2s_2s_s1(TyS);
   6465    if (1) test_sqdmulh_2s_2s_s3(TyS);
   6466    if (1) test_sqdmulh_8h_8h_h2(TyH);
   6467    if (1) test_sqdmulh_8h_8h_h7(TyH);
   6468    if (1) test_sqdmulh_4h_4h_h2(TyH);
   6469    if (1) test_sqdmulh_4h_4h_h7(TyH);
   6470    if (1) test_sqrdmulh_4s_4s_s1(TyS);
   6471    if (1) test_sqrdmulh_4s_4s_s3(TyS);
   6472    if (1) test_sqrdmulh_2s_2s_s1(TyS);
   6473    if (1) test_sqrdmulh_2s_2s_s3(TyS);
   6474    if (1) test_sqrdmulh_8h_8h_h2(TyH);
   6475    if (1) test_sqrdmulh_8h_8h_h7(TyH);
   6476    if (1) test_sqrdmulh_4h_4h_h2(TyH);
   6477    if (1) test_sqrdmulh_4h_4h_h7(TyH);
   6478 
   6479    // sqdmulh      h,s
   6480    // sqrdmulh     h,s
   6481    if (1) test_sqdmulh_s_s_s(TyS);
   6482    if (1) test_sqdmulh_h_h_h(TyH);
   6483    if (1) test_sqrdmulh_s_s_s(TyS);
   6484    if (1) test_sqrdmulh_h_h_h(TyH);
   6485 
   6486    // sqdmulh      4s,2s,8h,4h
   6487    // sqrdmulh     4s,2s,8h,4h
   6488    if (1) test_sqdmulh_4s_4s_4s(TyS);
   6489    if (1) test_sqdmulh_2s_2s_2s(TyS);
   6490    if (1) test_sqdmulh_8h_8h_8h(TyH);
   6491    if (1) test_sqdmulh_4h_4h_4h(TyH);
   6492    if (1) test_sqrdmulh_4s_4s_4s(TyS);
   6493    if (1) test_sqrdmulh_2s_2s_2s(TyS);
   6494    if (1) test_sqrdmulh_8h_8h_8h(TyH);
   6495    if (1) test_sqrdmulh_4h_4h_4h(TyH);
   6496 
   6497    // sqshl (reg)  d,s,h,b
   6498    // uqshl (reg)  d,s,h,b
   6499    // sqrshl (reg) d,s,h,b
   6500    // uqrshl (reg) d,s,h,b
   6501    if (1) test_sqshl_d_d_d(TyD);
   6502    if (1) test_sqshl_s_s_s(TyS);
   6503    if (1) test_sqshl_h_h_h(TyH);
   6504    if (1) test_sqshl_b_b_b(TyB);
   6505    if (1) test_uqshl_d_d_d(TyD);
   6506    if (1) test_uqshl_s_s_s(TyS);
   6507    if (1) test_uqshl_h_h_h(TyH);
   6508    if (1) test_uqshl_b_b_b(TyB);
   6509    if (1) test_sqrshl_d_d_d(TyD);
   6510    if (1) test_sqrshl_s_s_s(TyS);
   6511    if (1) test_sqrshl_h_h_h(TyH);
   6512    if (1) test_sqrshl_b_b_b(TyB);
   6513    if (1) test_uqrshl_d_d_d(TyD);
   6514    if (1) test_uqrshl_s_s_s(TyS);
   6515    if (1) test_uqrshl_h_h_h(TyH);
   6516    if (1) test_uqrshl_b_b_b(TyB);
   6517 
   6518    // sqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6519    // uqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6520    // sqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6521    // uqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6522    if (1) test_sqshl_2d_2d_2d(TyD);
   6523    if (1) test_sqshl_4s_4s_4s(TyS);
   6524    if (1) test_sqshl_2s_2s_2s(TyS);
   6525    if (1) test_sqshl_8h_8h_8h(TyH);
   6526    if (1) test_sqshl_4h_4h_4h(TyH);
   6527    if (1) test_sqshl_16b_16b_16b(TyB);
   6528    if (1) test_sqshl_8b_8b_8b(TyB);
   6529    if (1) test_uqshl_2d_2d_2d(TyD);
   6530    if (1) test_uqshl_4s_4s_4s(TyS);
   6531    if (1) test_uqshl_2s_2s_2s(TyS);
   6532    if (1) test_uqshl_8h_8h_8h(TyH);
   6533    if (1) test_uqshl_4h_4h_4h(TyH);
   6534    if (1) test_uqshl_16b_16b_16b(TyB);
   6535    if (1) test_uqshl_8b_8b_8b(TyB);
   6536    if (1) test_sqrshl_2d_2d_2d(TyD);
   6537    if (1) test_sqrshl_4s_4s_4s(TyS);
   6538    if (1) test_sqrshl_2s_2s_2s(TyS);
   6539    if (1) test_sqrshl_8h_8h_8h(TyH);
   6540    if (1) test_sqrshl_4h_4h_4h(TyH);
   6541    if (1) test_sqrshl_16b_16b_16b(TyB);
   6542    if (1) test_sqrshl_8b_8b_8b(TyB);
   6543    if (1) test_uqrshl_2d_2d_2d(TyD);
   6544    if (1) test_uqrshl_4s_4s_4s(TyS);
   6545    if (1) test_uqrshl_2s_2s_2s(TyS);
   6546    if (1) test_uqrshl_8h_8h_8h(TyH);
   6547    if (1) test_uqrshl_4h_4h_4h(TyH);
   6548    if (1) test_uqrshl_16b_16b_16b(TyB);
   6549    if (1) test_uqrshl_8b_8b_8b(TyB);
   6550 
   6551    // sqrshrn      s_d, h_s, b_h   #imm
   6552    // uqrshrn      s_d, h_s, b_h   #imm
   6553    // sqshrn       s_d, h_s, b_h   #imm
   6554    // uqshrn       s_d, h_s, b_h   #imm
   6555    // sqrshrun     s_d, h_s, b_h   #imm
   6556    // sqshrun      s_d, h_s, b_h   #imm
   6557    if (1) test_sqrshrn_s_d_1(TyD);
   6558    if (1) test_sqrshrn_s_d_17(TyD);
   6559    if (1) test_sqrshrn_s_d_32(TyD);
   6560    if (1) test_sqrshrn_h_s_1(TyS);
   6561    if (1) test_sqrshrn_h_s_9(TyS);
   6562    if (1) test_sqrshrn_h_s_16(TyS);
   6563    if (1) test_sqrshrn_b_h_1(TyH);
   6564    if (1) test_sqrshrn_b_h_4(TyH);
   6565    if (1) test_sqrshrn_b_h_8(TyH);
   6566    if (1) test_uqrshrn_s_d_1(TyD);
   6567    if (1) test_uqrshrn_s_d_17(TyD);
   6568    if (1) test_uqrshrn_s_d_32(TyD);
   6569    if (1) test_uqrshrn_h_s_1(TyS);
   6570    if (1) test_uqrshrn_h_s_9(TyS);
   6571    if (1) test_uqrshrn_h_s_16(TyS);
   6572    if (1) test_uqrshrn_b_h_1(TyH);
   6573    if (1) test_uqrshrn_b_h_4(TyH);
   6574    if (1) test_uqrshrn_b_h_8(TyH);
   6575    if (1) test_sqshrn_s_d_1(TyD);
   6576    if (1) test_sqshrn_s_d_17(TyD);
   6577    if (1) test_sqshrn_s_d_32(TyD);
   6578    if (1) test_sqshrn_h_s_1(TyS);
   6579    if (1) test_sqshrn_h_s_9(TyS);
   6580    if (1) test_sqshrn_h_s_16(TyS);
   6581    if (1) test_sqshrn_b_h_1(TyH);
   6582    if (1) test_sqshrn_b_h_4(TyH);
   6583    if (1) test_sqshrn_b_h_8(TyH);
   6584    if (1) test_uqshrn_s_d_1(TyD);
   6585    if (1) test_uqshrn_s_d_17(TyD);
   6586    if (1) test_uqshrn_s_d_32(TyD);
   6587    if (1) test_uqshrn_h_s_1(TyS);
   6588    if (1) test_uqshrn_h_s_9(TyS);
   6589    if (1) test_uqshrn_h_s_16(TyS);
   6590    if (1) test_uqshrn_b_h_1(TyH);
   6591    if (1) test_uqshrn_b_h_4(TyH);
   6592    if (1) test_uqshrn_b_h_8(TyH);
   6593    if (1) test_sqrshrun_s_d_1(TyD);
   6594    if (1) test_sqrshrun_s_d_17(TyD);
   6595    if (1) test_sqrshrun_s_d_32(TyD);
   6596    if (1) test_sqrshrun_h_s_1(TyS);
   6597    if (1) test_sqrshrun_h_s_9(TyS);
   6598    if (1) test_sqrshrun_h_s_16(TyS);
   6599    if (1) test_sqrshrun_b_h_1(TyH);
   6600    if (1) test_sqrshrun_b_h_4(TyH);
   6601    if (1) test_sqrshrun_b_h_8(TyH);
   6602    if (1) test_sqshrun_s_d_1(TyD);
   6603    if (1) test_sqshrun_s_d_17(TyD);
   6604    if (1) test_sqshrun_s_d_32(TyD);
   6605    if (1) test_sqshrun_h_s_1(TyS);
   6606    if (1) test_sqshrun_h_s_9(TyS);
   6607    if (1) test_sqshrun_h_s_16(TyS);
   6608    if (1) test_sqshrun_b_h_1(TyH);
   6609    if (1) test_sqshrun_b_h_4(TyH);
   6610    if (1) test_sqshrun_b_h_8(TyH);
   6611 
   6612    // sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6613    // uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6614    // sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6615    // uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6616    // sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6617    // sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6618    if (1) test_sqrshrn_2s_2d_1(TyD);
   6619    if (1) test_sqrshrn_2s_2d_17(TyD);
   6620    if (1) test_sqrshrn_2s_2d_32(TyD);
   6621    if (1) test_sqrshrn2_4s_2d_1(TyD);
   6622    if (1) test_sqrshrn2_4s_2d_17(TyD);
   6623    if (1) test_sqrshrn2_4s_2d_32(TyD);
   6624    if (1) test_sqrshrn_4h_4s_1(TyS);
   6625    if (1) test_sqrshrn_4h_4s_9(TyS);
   6626    if (1) test_sqrshrn_4h_4s_16(TyS);
   6627    if (1) test_sqrshrn2_8h_4s_1(TyS);
   6628    if (1) test_sqrshrn2_8h_4s_9(TyS);
   6629    if (1) test_sqrshrn2_8h_4s_16(TyS);
   6630    if (1) test_sqrshrn_8b_8h_1(TyH);
   6631    if (1) test_sqrshrn_8b_8h_4(TyH);
   6632    if (1) test_sqrshrn_8b_8h_8(TyH);
   6633    if (1) test_sqrshrn2_16b_8h_1(TyH);
   6634    if (1) test_sqrshrn2_16b_8h_4(TyH);
   6635    if (1) test_sqrshrn2_16b_8h_8(TyH);
   6636    if (1) test_uqrshrn_2s_2d_1(TyD);
   6637    if (1) test_uqrshrn_2s_2d_17(TyD);
   6638    if (1) test_uqrshrn_2s_2d_32(TyD);
   6639    if (1) test_uqrshrn2_4s_2d_1(TyD);
   6640    if (1) test_uqrshrn2_4s_2d_17(TyD);
   6641    if (1) test_uqrshrn2_4s_2d_32(TyD);
   6642    if (1) test_uqrshrn_4h_4s_1(TyS);
   6643    if (1) test_uqrshrn_4h_4s_9(TyS);
   6644    if (1) test_uqrshrn_4h_4s_16(TyS);
   6645    if (1) test_uqrshrn2_8h_4s_1(TyS);
   6646    if (1) test_uqrshrn2_8h_4s_9(TyS);
   6647    if (1) test_uqrshrn2_8h_4s_16(TyS);
   6648    if (1) test_uqrshrn_8b_8h_1(TyH);
   6649    if (1) test_uqrshrn_8b_8h_4(TyH);
   6650    if (1) test_uqrshrn_8b_8h_8(TyH);
   6651    if (1) test_uqrshrn2_16b_8h_1(TyH);
   6652    if (1) test_uqrshrn2_16b_8h_4(TyH);
   6653    if (1) test_uqrshrn2_16b_8h_8(TyH);
   6654    if (1) test_sqshrn_2s_2d_1(TyD);
   6655    if (1) test_sqshrn_2s_2d_17(TyD);
   6656    if (1) test_sqshrn_2s_2d_32(TyD);
   6657    if (1) test_sqshrn2_4s_2d_1(TyD);
   6658    if (1) test_sqshrn2_4s_2d_17(TyD);
   6659    if (1) test_sqshrn2_4s_2d_32(TyD);
   6660    if (1) test_sqshrn_4h_4s_1(TyS);
   6661    if (1) test_sqshrn_4h_4s_9(TyS);
   6662    if (1) test_sqshrn_4h_4s_16(TyS);
   6663    if (1) test_sqshrn2_8h_4s_1(TyS);
   6664    if (1) test_sqshrn2_8h_4s_9(TyS);
   6665    if (1) test_sqshrn2_8h_4s_16(TyS);
   6666    if (1) test_sqshrn_8b_8h_1(TyH);
   6667    if (1) test_sqshrn_8b_8h_4(TyH);
   6668    if (1) test_sqshrn_8b_8h_8(TyH);
   6669    if (1) test_sqshrn2_16b_8h_1(TyH);
   6670    if (1) test_sqshrn2_16b_8h_4(TyH);
   6671    if (1) test_sqshrn2_16b_8h_8(TyH);
   6672    if (1) test_uqshrn_2s_2d_1(TyD);
   6673    if (1) test_uqshrn_2s_2d_17(TyD);
   6674    if (1) test_uqshrn_2s_2d_32(TyD);
   6675    if (1) test_uqshrn2_4s_2d_1(TyD);
   6676    if (1) test_uqshrn2_4s_2d_17(TyD);
   6677    if (1) test_uqshrn2_4s_2d_32(TyD);
   6678    if (1) test_uqshrn_4h_4s_1(TyS);
   6679    if (1) test_uqshrn_4h_4s_9(TyS);
   6680    if (1) test_uqshrn_4h_4s_16(TyS);
   6681    if (1) test_uqshrn2_8h_4s_1(TyS);
   6682    if (1) test_uqshrn2_8h_4s_9(TyS);
   6683    if (1) test_uqshrn2_8h_4s_16(TyS);
   6684    if (1) test_uqshrn_8b_8h_1(TyH);
   6685    if (1) test_uqshrn_8b_8h_4(TyH);
   6686    if (1) test_uqshrn_8b_8h_8(TyH);
   6687    if (1) test_uqshrn2_16b_8h_1(TyH);
   6688    if (1) test_uqshrn2_16b_8h_4(TyH);
   6689    if (1) test_uqshrn2_16b_8h_8(TyH);
   6690    if (1) test_sqrshrun_2s_2d_1(TyD);
   6691    if (1) test_sqrshrun_2s_2d_17(TyD);
   6692    if (1) test_sqrshrun_2s_2d_32(TyD);
   6693    if (1) test_sqrshrun2_4s_2d_1(TyD);
   6694    if (1) test_sqrshrun2_4s_2d_17(TyD);
   6695    if (1) test_sqrshrun2_4s_2d_32(TyD);
   6696    if (1) test_sqrshrun_4h_4s_1(TyS);
   6697    if (1) test_sqrshrun_4h_4s_9(TyS);
   6698    if (1) test_sqrshrun_4h_4s_16(TyS);
   6699    if (1) test_sqrshrun2_8h_4s_1(TyS);
   6700    if (1) test_sqrshrun2_8h_4s_9(TyS);
   6701    if (1) test_sqrshrun2_8h_4s_16(TyS);
   6702    if (1) test_sqrshrun_8b_8h_1(TyH);
   6703    if (1) test_sqrshrun_8b_8h_4(TyH);
   6704    if (1) test_sqrshrun_8b_8h_8(TyH);
   6705    if (1) test_sqrshrun2_16b_8h_1(TyH);
   6706    if (1) test_sqrshrun2_16b_8h_4(TyH);
   6707    if (1) test_sqrshrun2_16b_8h_8(TyH);
   6708    if (1) test_sqshrun_2s_2d_1(TyD);
   6709    if (1) test_sqshrun_2s_2d_17(TyD);
   6710    if (1) test_sqshrun_2s_2d_32(TyD);
   6711    if (1) test_sqshrun2_4s_2d_1(TyD);
   6712    if (1) test_sqshrun2_4s_2d_17(TyD);
   6713    if (1) test_sqshrun2_4s_2d_32(TyD);
   6714    if (1) test_sqshrun_4h_4s_1(TyS);
   6715    if (1) test_sqshrun_4h_4s_9(TyS);
   6716    if (1) test_sqshrun_4h_4s_16(TyS);
   6717    if (1) test_sqshrun2_8h_4s_1(TyS);
   6718    if (1) test_sqshrun2_8h_4s_9(TyS);
   6719    if (1) test_sqshrun2_8h_4s_16(TyS);
   6720    if (1) test_sqshrun_8b_8h_1(TyH);
   6721    if (1) test_sqshrun_8b_8h_4(TyH);
   6722    if (1) test_sqshrun_8b_8h_8(TyH);
   6723    if (1) test_sqshrun2_16b_8h_1(TyH);
   6724    if (1) test_sqshrun2_16b_8h_4(TyH);
   6725    if (1) test_sqshrun2_16b_8h_8(TyH);
   6726 
   6727    // sqshl (imm)  d,s,h,b   _#imm
   6728    // uqshl (imm)  d,s,h,b   _#imm
   6729    // sqshlu (imm) d,s,h,b   _#imm
   6730    if (1) test_sqshl_d_d_0(TyD);
   6731    if (1) test_sqshl_d_d_32(TyD);
   6732    if (1) test_sqshl_d_d_63(TyD);
   6733    if (1) test_sqshl_s_s_0(TyS);
   6734    if (1) test_sqshl_s_s_16(TyS);
   6735    if (1) test_sqshl_s_s_31(TyS);
   6736    if (1) test_sqshl_h_h_0(TyH);
   6737    if (1) test_sqshl_h_h_8(TyH);
   6738    if (1) test_sqshl_h_h_15(TyH);
   6739    if (1) test_sqshl_b_b_0(TyB);
   6740    if (1) test_sqshl_b_b_1(TyB);
   6741    if (1) test_sqshl_b_b_4(TyB);
   6742    if (1) test_sqshl_b_b_6(TyB);
   6743    if (1) test_sqshl_b_b_7(TyB);
   6744    if (1) test_uqshl_d_d_0(TyD);
   6745    if (1) test_uqshl_d_d_32(TyD);
   6746    if (1) test_uqshl_d_d_63(TyD);
   6747    if (1) test_uqshl_s_s_0(TyS);
   6748    if (1) test_uqshl_s_s_16(TyS);
   6749    if (1) test_uqshl_s_s_31(TyS);
   6750    if (1) test_uqshl_h_h_0(TyH);
   6751    if (1) test_uqshl_h_h_8(TyH);
   6752    if (1) test_uqshl_h_h_15(TyH);
   6753    if (1) test_uqshl_b_b_0(TyB);
   6754    if (1) test_uqshl_b_b_1(TyB);
   6755    if (1) test_uqshl_b_b_4(TyB);
   6756    if (1) test_uqshl_b_b_6(TyB);
   6757    if (1) test_uqshl_b_b_7(TyB);
   6758    if (1) test_sqshlu_d_d_0(TyD);
   6759    if (1) test_sqshlu_d_d_32(TyD);
   6760    if (1) test_sqshlu_d_d_63(TyD);
   6761    if (1) test_sqshlu_s_s_0(TyS);
   6762    if (1) test_sqshlu_s_s_16(TyS);
   6763    if (1) test_sqshlu_s_s_31(TyS);
   6764    if (1) test_sqshlu_h_h_0(TyH);
   6765    if (1) test_sqshlu_h_h_8(TyH);
   6766    if (1) test_sqshlu_h_h_15(TyH);
   6767    if (1) test_sqshlu_b_b_0(TyB);
   6768    if (1) test_sqshlu_b_b_1(TyB);
   6769    if (1) test_sqshlu_b_b_2(TyB);
   6770    if (1) test_sqshlu_b_b_3(TyB);
   6771    if (1) test_sqshlu_b_b_4(TyB);
   6772    if (1) test_sqshlu_b_b_5(TyB);
   6773    if (1) test_sqshlu_b_b_6(TyB);
   6774    if (1) test_sqshlu_b_b_7(TyB);
   6775 
   6776    // sqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6777    // uqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6778    // sqshlu (imm) 2d,4s,2s,8h,4h,16b,8b   _#imm
   6779    if (1) test_sqshl_2d_2d_0(TyD);
   6780    if (1) test_sqshl_2d_2d_32(TyD);
   6781    if (1) test_sqshl_2d_2d_63(TyD);
   6782    if (1) test_sqshl_4s_4s_0(TyS);
   6783    if (1) test_sqshl_4s_4s_16(TyS);
   6784    if (1) test_sqshl_4s_4s_31(TyS);
   6785    if (1) test_sqshl_2s_2s_0(TyS);
   6786    if (1) test_sqshl_2s_2s_16(TyS);
   6787    if (1) test_sqshl_2s_2s_31(TyS);
   6788    if (1) test_sqshl_8h_8h_0(TyH);
   6789    if (1) test_sqshl_8h_8h_8(TyH);
   6790    if (1) test_sqshl_8h_8h_15(TyH);
   6791    if (1) test_sqshl_4h_4h_0(TyH);
   6792    if (1) test_sqshl_4h_4h_8(TyH);
   6793    if (1) test_sqshl_4h_4h_15(TyH);
   6794    if (1) test_sqshl_16b_16b_0(TyB);
   6795    if (1) test_sqshl_16b_16b_3(TyB);
   6796    if (1) test_sqshl_16b_16b_7(TyB);
   6797    if (1) test_sqshl_8b_8b_0(TyB);
   6798    if (1) test_sqshl_8b_8b_3(TyB);
   6799    if (1) test_sqshl_8b_8b_7(TyB);
   6800    if (1) test_uqshl_2d_2d_0(TyD);
   6801    if (1) test_uqshl_2d_2d_32(TyD);
   6802    if (1) test_uqshl_2d_2d_63(TyD);
   6803    if (1) test_uqshl_4s_4s_0(TyS);
   6804    if (1) test_uqshl_4s_4s_16(TyS);
   6805    if (1) test_uqshl_4s_4s_31(TyS);
   6806    if (1) test_uqshl_2s_2s_0(TyS);
   6807    if (1) test_uqshl_2s_2s_16(TyS);
   6808    if (1) test_uqshl_2s_2s_31(TyS);
   6809    if (1) test_uqshl_8h_8h_0(TyH);
   6810    if (1) test_uqshl_8h_8h_8(TyH);
   6811    if (1) test_uqshl_8h_8h_15(TyH);
   6812    if (1) test_uqshl_4h_4h_0(TyH);
   6813    if (1) test_uqshl_4h_4h_8(TyH);
   6814    if (1) test_uqshl_4h_4h_15(TyH);
   6815    if (1) test_uqshl_16b_16b_0(TyB);
   6816    if (1) test_uqshl_16b_16b_3(TyB);
   6817    if (1) test_uqshl_16b_16b_7(TyB);
   6818    if (1) test_uqshl_8b_8b_0(TyB);
   6819    if (1) test_uqshl_8b_8b_3(TyB);
   6820    if (1) test_uqshl_8b_8b_7(TyB);
   6821    if (1) test_sqshlu_2d_2d_0(TyD);
   6822    if (1) test_sqshlu_2d_2d_32(TyD);
   6823    if (1) test_sqshlu_2d_2d_63(TyD);
   6824    if (1) test_sqshlu_4s_4s_0(TyS);
   6825    if (1) test_sqshlu_4s_4s_16(TyS);
   6826    if (1) test_sqshlu_4s_4s_31(TyS);
   6827    if (1) test_sqshlu_2s_2s_0(TyS);
   6828    if (1) test_sqshlu_2s_2s_16(TyS);
   6829    if (1) test_sqshlu_2s_2s_31(TyS);
   6830    if (1) test_sqshlu_8h_8h_0(TyH);
   6831    if (1) test_sqshlu_8h_8h_8(TyH);
   6832    if (1) test_sqshlu_8h_8h_15(TyH);
   6833    if (1) test_sqshlu_4h_4h_0(TyH);
   6834    if (1) test_sqshlu_4h_4h_8(TyH);
   6835    if (1) test_sqshlu_4h_4h_15(TyH);
   6836    if (1) test_sqshlu_16b_16b_0(TyB);
   6837    if (1) test_sqshlu_16b_16b_3(TyB);
   6838    if (1) test_sqshlu_16b_16b_7(TyB);
   6839    if (1) test_sqshlu_8b_8b_0(TyB);
   6840    if (1) test_sqshlu_8b_8b_3(TyB);
   6841    if (1) test_sqshlu_8b_8b_7(TyB);
   6842 
   6843    // sqxtn        s_d,h_s,b_h
   6844    // uqxtn        s_d,h_s,b_h
   6845    // sqxtun       s_d,h_s,b_h
   6846    if (1) test_sqxtn_s_d(TyD);
   6847    if (1) test_sqxtn_h_s(TyS);
   6848    if (1) test_sqxtn_b_h(TyH);
   6849    if (1) test_uqxtn_s_d(TyD);
   6850    if (1) test_uqxtn_h_s(TyS);
   6851    if (1) test_uqxtn_b_h(TyH);
   6852    if (1) test_sqxtun_s_d(TyD);
   6853    if (1) test_sqxtun_h_s(TyS);
   6854    if (1) test_sqxtun_b_h(TyH);
   6855 
   6856    // sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6857    // uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6858    // sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6859    if (1) test_sqxtn_2s_2d(TyD);
   6860    if (1) test_sqxtn2_4s_2d(TyD);
   6861    if (1) test_sqxtn_4h_4s(TyS);
   6862    if (1) test_sqxtn2_8h_4s(TyS);
   6863    if (1) test_sqxtn_8b_8h(TyH);
   6864    if (1) test_sqxtn2_16b_8h(TyH);
   6865    if (1) test_uqxtn_2s_2d(TyD);
   6866    if (1) test_uqxtn2_4s_2d(TyD);
   6867    if (1) test_uqxtn_4h_4s(TyS);
   6868    if (1) test_uqxtn2_8h_4s(TyS);
   6869    if (1) test_uqxtn_8b_8h(TyH);
   6870    if (1) test_uqxtn2_16b_8h(TyH);
   6871    if (1) test_sqxtun_2s_2d(TyD);
   6872    if (1) test_sqxtun2_4s_2d(TyD);
   6873    if (1) test_sqxtun_4h_4s(TyS);
   6874    if (1) test_sqxtun2_8h_4s(TyS);
   6875    if (1) test_sqxtun_8b_8h(TyH);
   6876    if (1) test_sqxtun2_16b_8h(TyH);
   6877 
   6878    // srhadd       4s,2s,8h,4h,16b,8b
   6879    // urhadd       4s,2s,8h,4h,16b,8b
   6880    if (1) test_srhadd_4s_4s_4s(TyS);
   6881    if (1) test_srhadd_2s_2s_2s(TyS);
   6882    if (1) test_srhadd_8h_8h_8h(TyH);
   6883    if (1) test_srhadd_4h_4h_4h(TyH);
   6884    if (1) test_srhadd_16b_16b_16b(TyB);
   6885    if (1) test_srhadd_8b_8b_8b(TyB);
   6886    if (1) test_urhadd_4s_4s_4s(TyS);
   6887    if (1) test_urhadd_2s_2s_2s(TyS);
   6888    if (1) test_urhadd_8h_8h_8h(TyH);
   6889    if (1) test_urhadd_4h_4h_4h(TyH);
   6890    if (1) test_urhadd_16b_16b_16b(TyB);
   6891    if (1) test_urhadd_8b_8b_8b(TyB);
   6892 
   6893    // sshl (reg)   d
   6894    // ushl (reg)   d
   6895    if (1) test_sshl_d_d_d(TyD);
   6896    if (1) test_ushl_d_d_d(TyD);
   6897 
   6898    // sshl (reg)   2d,4s,2s,8h,4h,16b,8b
   6899    // ushl (reg)   2d,4s,2s,8h,4h,16b,8b
   6900    if (1) test_sshl_2d_2d_2d(TyD);
   6901    if (1) test_sshl_4s_4s_4s(TyS);
   6902    if (1) test_sshl_2s_2s_2s(TyS);
   6903    if (1) test_sshl_8h_8h_8h(TyH);
   6904    if (1) test_sshl_4h_4h_4h(TyH);
   6905    if (1) test_sshl_16b_16b_16b(TyB);
   6906    if (1) test_sshl_8b_8b_8b(TyB);
   6907    if (1) test_ushl_2d_2d_2d(TyD);
   6908    if (1) test_ushl_4s_4s_4s(TyS);
   6909    if (1) test_ushl_2s_2s_2s(TyS);
   6910    if (1) test_ushl_8h_8h_8h(TyH);
   6911    if (1) test_ushl_4h_4h_4h(TyH);
   6912    if (1) test_ushl_16b_16b_16b(TyB);
   6913    if (1) test_ushl_8b_8b_8b(TyB);
   6914 
   6915    // shl  (imm)   d
   6916    // sshr (imm)   d
   6917    // ushr (imm)   d
   6918    if (1) test_shl_d_d_0(TyD);
   6919    if (1) test_shl_d_d_32(TyD);
   6920    if (1) test_shl_d_d_63(TyD);
   6921    if (1) test_sshr_d_d_1(TyD);
   6922    if (1) test_sshr_d_d_32(TyD);
   6923    if (1) test_sshr_d_d_64(TyD);
   6924    if (1) test_ushr_d_d_1(TyD);
   6925    if (1) test_ushr_d_d_32(TyD);
   6926    if (1) test_ushr_d_d_64(TyD);
   6927 
   6928    // shl  (imm)   16b,8b,8h,4h,4s,2s,2d
   6929    // sshr (imm)   2d,4s,2s,8h,4h,16b,8b
   6930    // ushr (imm)   2d,4s,2s,8h,4h,16b,8b
   6931    if (1) test_shl_2d_2d_0(TyD);
   6932    if (1) test_shl_2d_2d_13(TyD);
   6933    if (1) test_shl_2d_2d_63(TyD);
   6934    if (1) test_shl_4s_4s_0(TyS);
   6935    if (1) test_shl_4s_4s_13(TyS);
   6936    if (1) test_shl_4s_4s_31(TyS);
   6937    if (1) test_shl_2s_2s_0(TyS);
   6938    if (1) test_shl_2s_2s_13(TyS);
   6939    if (1) test_shl_2s_2s_31(TyS);
   6940    if (1) test_shl_8h_8h_0(TyH);
   6941    if (1) test_shl_8h_8h_13(TyH);
   6942    if (1) test_shl_8h_8h_15(TyH);
   6943    if (1) test_shl_4h_4h_0(TyH);
   6944    if (1) test_shl_4h_4h_13(TyH);
   6945    if (1) test_shl_4h_4h_15(TyH);
   6946    if (1) test_shl_16b_16b_0(TyB);
   6947    if (1) test_shl_16b_16b_7(TyB);
   6948    if (1) test_shl_8b_8b_0(TyB);
   6949    if (1) test_shl_8b_8b_7(TyB);
   6950    if (1) test_sshr_2d_2d_1(TyD);
   6951    if (1) test_sshr_2d_2d_13(TyD);
   6952    if (1) test_sshr_2d_2d_64(TyD);
   6953    if (1) test_sshr_4s_4s_1(TyS);
   6954    if (1) test_sshr_4s_4s_13(TyS);
   6955    if (1) test_sshr_4s_4s_32(TyS);
   6956    if (1) test_sshr_2s_2s_1(TyS);
   6957    if (1) test_sshr_2s_2s_13(TyS);
   6958    if (1) test_sshr_2s_2s_32(TyS);
   6959    if (1) test_sshr_8h_8h_1(TyH);
   6960    if (1) test_sshr_8h_8h_13(TyH);
   6961    if (1) test_sshr_8h_8h_16(TyH);
   6962    if (1) test_sshr_4h_4h_1(TyH);
   6963    if (1) test_sshr_4h_4h_13(TyH);
   6964    if (1) test_sshr_4h_4h_16(TyH);
   6965    if (1) test_sshr_16b_16b_1(TyB);
   6966    if (1) test_sshr_16b_16b_8(TyB);
   6967    if (1) test_sshr_8b_8b_1(TyB);
   6968    if (1) test_sshr_8b_8b_8(TyB);
   6969    if (1) test_ushr_2d_2d_1(TyD);
   6970    if (1) test_ushr_2d_2d_13(TyD);
   6971    if (1) test_ushr_2d_2d_64(TyD);
   6972    if (1) test_ushr_4s_4s_1(TyS);
   6973    if (1) test_ushr_4s_4s_13(TyS);
   6974    if (1) test_ushr_4s_4s_32(TyS);
   6975    if (1) test_ushr_2s_2s_1(TyS);
   6976    if (1) test_ushr_2s_2s_13(TyS);
   6977    if (1) test_ushr_2s_2s_32(TyS);
   6978    if (1) test_ushr_8h_8h_1(TyH);
   6979    if (1) test_ushr_8h_8h_13(TyH);
   6980    if (1) test_ushr_8h_8h_16(TyH);
   6981    if (1) test_ushr_4h_4h_1(TyH);
   6982    if (1) test_ushr_4h_4h_13(TyH);
   6983    if (1) test_ushr_4h_4h_16(TyH);
   6984    if (1) test_ushr_16b_16b_1(TyB);
   6985    if (1) test_ushr_16b_16b_8(TyB);
   6986    if (1) test_ushr_8b_8b_1(TyB);
   6987    if (1) test_ushr_8b_8b_8(TyB);
   6988 
   6989    // ssra (imm)   d
   6990    // usra (imm)   d
   6991    if (1) test_ssra_d_d_1(TyD);
   6992    if (1) test_ssra_d_d_32(TyD);
   6993    if (1) test_ssra_d_d_64(TyD);
   6994    if (1) test_usra_d_d_1(TyD);
   6995    if (1) test_usra_d_d_32(TyD);
   6996    if (1) test_usra_d_d_64(TyD);
   6997 
   6998    // ssra (imm)   2d,4s,2s,8h,4h,16b,8b
   6999    // usra (imm)   2d,4s,2s,8h,4h,16b,8b
   7000    if (1) test_ssra_2d_2d_1(TyD);
   7001    if (1) test_ssra_2d_2d_32(TyD);
   7002    if (1) test_ssra_2d_2d_64(TyD);
   7003    if (1) test_ssra_4s_4s_1(TyS);
   7004    if (1) test_ssra_4s_4s_16(TyS);
   7005    if (1) test_ssra_4s_4s_32(TyS);
   7006    if (1) test_ssra_2s_2s_1(TyS);
   7007    if (1) test_ssra_2s_2s_16(TyS);
   7008    if (1) test_ssra_2s_2s_32(TyS);
   7009    if (1) test_ssra_8h_8h_1(TyH);
   7010    if (1) test_ssra_8h_8h_8(TyH);
   7011    if (1) test_ssra_8h_8h_16(TyH);
   7012    if (1) test_ssra_4h_4h_1(TyH);
   7013    if (1) test_ssra_4h_4h_8(TyH);
   7014    if (1) test_ssra_4h_4h_16(TyH);
   7015    if (1) test_ssra_16b_16b_1(TyB);
   7016    if (1) test_ssra_16b_16b_3(TyB);
   7017    if (1) test_ssra_16b_16b_8(TyB);
   7018    if (1) test_ssra_8b_8b_1(TyB);
   7019    if (1) test_ssra_8b_8b_3(TyB);
   7020    if (1) test_ssra_8b_8b_8(TyB);
   7021    if (1) test_usra_2d_2d_1(TyD);
   7022    if (1) test_usra_2d_2d_32(TyD);
   7023    if (1) test_usra_2d_2d_64(TyD);
   7024    if (1) test_usra_4s_4s_1(TyS);
   7025    if (1) test_usra_4s_4s_16(TyS);
   7026    if (1) test_usra_4s_4s_32(TyS);
   7027    if (1) test_usra_2s_2s_1(TyS);
   7028    if (1) test_usra_2s_2s_16(TyS);
   7029    if (1) test_usra_2s_2s_32(TyS);
   7030    if (1) test_usra_8h_8h_1(TyH);
   7031    if (1) test_usra_8h_8h_8(TyH);
   7032    if (1) test_usra_8h_8h_16(TyH);
   7033    if (1) test_usra_4h_4h_1(TyH);
   7034    if (1) test_usra_4h_4h_8(TyH);
   7035    if (1) test_usra_4h_4h_16(TyH);
   7036    if (1) test_usra_16b_16b_1(TyB);
   7037    if (1) test_usra_16b_16b_3(TyB);
   7038    if (1) test_usra_16b_16b_8(TyB);
   7039    if (1) test_usra_8b_8b_1(TyB);
   7040    if (1) test_usra_8b_8b_3(TyB);
   7041    if (1) test_usra_8b_8b_8(TyB);
   7042 
   7043    // srshl (reg)  d
   7044    // urshl (reg)  d
   7045    if (1) test_srshl_d_d_d(TyD);
   7046    if (1) test_urshl_d_d_d(TyD);
   7047 
   7048    // srshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7049    // urshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7050    if (1) test_srshl_2d_2d_2d(TyD);
   7051    if (1) test_srshl_4s_4s_4s(TyS);
   7052    if (1) test_srshl_2s_2s_2s(TyS);
   7053    if (1) test_srshl_8h_8h_8h(TyH);
   7054    if (1) test_srshl_4h_4h_4h(TyH);
   7055    if (1) test_srshl_16b_16b_16b(TyB);
   7056    if (1) test_srshl_8b_8b_8b(TyB);
   7057    if (1) test_urshl_2d_2d_2d(TyD);
   7058    if (1) test_urshl_4s_4s_4s(TyS);
   7059    if (1) test_urshl_2s_2s_2s(TyS);
   7060    if (1) test_urshl_8h_8h_8h(TyH);
   7061    if (1) test_urshl_4h_4h_4h(TyH);
   7062    if (1) test_urshl_16b_16b_16b(TyB);
   7063    if (1) test_urshl_8b_8b_8b(TyB);
   7064 
   7065    // srshr (imm)  d
   7066    // urshr (imm)  d
   7067    if (1) test_srshr_d_d_1(TyD);
   7068    if (1) test_srshr_d_d_32(TyD);
   7069    if (1) test_srshr_d_d_64(TyD);
   7070    if (1) test_urshr_d_d_1(TyD);
   7071    if (1) test_urshr_d_d_32(TyD);
   7072    if (1) test_urshr_d_d_64(TyD);
   7073 
   7074    // srshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7075    // urshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7076    if (1) test_srshr_2d_2d_1(TyD);
   7077    if (1) test_srshr_2d_2d_32(TyD);
   7078    if (1) test_srshr_2d_2d_64(TyD);
   7079    if (1) test_srshr_4s_4s_1(TyS);
   7080    if (1) test_srshr_4s_4s_16(TyS);
   7081    if (1) test_srshr_4s_4s_32(TyS);
   7082    if (1) test_srshr_2s_2s_1(TyS);
   7083    if (1) test_srshr_2s_2s_16(TyS);
   7084    if (1) test_srshr_2s_2s_32(TyS);
   7085    if (1) test_srshr_8h_8h_1(TyH);
   7086    if (1) test_srshr_8h_8h_8(TyH);
   7087    if (1) test_srshr_8h_8h_16(TyH);
   7088    if (1) test_srshr_4h_4h_1(TyH);
   7089    if (1) test_srshr_4h_4h_8(TyH);
   7090    if (1) test_srshr_4h_4h_16(TyH);
   7091    if (1) test_srshr_16b_16b_1(TyB);
   7092    if (1) test_srshr_16b_16b_3(TyB);
   7093    if (1) test_srshr_16b_16b_8(TyB);
   7094    if (1) test_srshr_8b_8b_1(TyB);
   7095    if (1) test_srshr_8b_8b_3(TyB);
   7096    if (1) test_srshr_8b_8b_8(TyB);
   7097    if (1) test_urshr_2d_2d_1(TyD);
   7098    if (1) test_urshr_2d_2d_32(TyD);
   7099    if (1) test_urshr_2d_2d_64(TyD);
   7100    if (1) test_urshr_4s_4s_1(TyS);
   7101    if (1) test_urshr_4s_4s_16(TyS);
   7102    if (1) test_urshr_4s_4s_32(TyS);
   7103    if (1) test_urshr_2s_2s_1(TyS);
   7104    if (1) test_urshr_2s_2s_16(TyS);
   7105    if (1) test_urshr_2s_2s_32(TyS);
   7106    if (1) test_urshr_8h_8h_1(TyH);
   7107    if (1) test_urshr_8h_8h_8(TyH);
   7108    if (1) test_urshr_8h_8h_16(TyH);
   7109    if (1) test_urshr_4h_4h_1(TyH);
   7110    if (1) test_urshr_4h_4h_8(TyH);
   7111    if (1) test_urshr_4h_4h_16(TyH);
   7112    if (1) test_urshr_16b_16b_1(TyB);
   7113    if (1) test_urshr_16b_16b_3(TyB);
   7114    if (1) test_urshr_16b_16b_8(TyB);
   7115    if (1) test_urshr_8b_8b_1(TyB);
   7116    if (1) test_urshr_8b_8b_3(TyB);
   7117    if (1) test_urshr_8b_8b_8(TyB);
   7118 
   7119    // srsra (imm)  d
   7120    // ursra (imm)  d
   7121    if (1) test_srsra_d_d_1(TyD);
   7122    if (1) test_srsra_d_d_32(TyD);
   7123    if (1) test_srsra_d_d_64(TyD);
   7124    if (1) test_ursra_d_d_1(TyD);
   7125    if (1) test_ursra_d_d_32(TyD);
   7126    if (1) test_ursra_d_d_64(TyD);
   7127 
   7128    // srsra (imm)  2d,4s,2s,8h,4h,16b,8b
   7129    // ursra (imm)  2d,4s,2s,8h,4h,16b,8b
   7130    if (1) test_srsra_2d_2d_1(TyD);
   7131    if (1) test_srsra_2d_2d_32(TyD);
   7132    if (1) test_srsra_2d_2d_64(TyD);
   7133    if (1) test_srsra_4s_4s_1(TyS);
   7134    if (1) test_srsra_4s_4s_16(TyS);
   7135    if (1) test_srsra_4s_4s_32(TyS);
   7136    if (1) test_srsra_2s_2s_1(TyS);
   7137    if (1) test_srsra_2s_2s_16(TyS);
   7138    if (1) test_srsra_2s_2s_32(TyS);
   7139    if (1) test_srsra_8h_8h_1(TyH);
   7140    if (1) test_srsra_8h_8h_8(TyH);
   7141    if (1) test_srsra_8h_8h_16(TyH);
   7142    if (1) test_srsra_4h_4h_1(TyH);
   7143    if (1) test_srsra_4h_4h_8(TyH);
   7144    if (1) test_srsra_4h_4h_16(TyH);
   7145    if (1) test_srsra_16b_16b_1(TyB);
   7146    if (1) test_srsra_16b_16b_3(TyB);
   7147    if (1) test_srsra_16b_16b_8(TyB);
   7148    if (1) test_srsra_8b_8b_1(TyB);
   7149    if (1) test_srsra_8b_8b_3(TyB);
   7150    if (1) test_srsra_8b_8b_8(TyB);
   7151    if (1) test_ursra_2d_2d_1(TyD);
   7152    if (1) test_ursra_2d_2d_32(TyD);
   7153    if (1) test_ursra_2d_2d_64(TyD);
   7154    if (1) test_ursra_4s_4s_1(TyS);
   7155    if (1) test_ursra_4s_4s_16(TyS);
   7156    if (1) test_ursra_4s_4s_32(TyS);
   7157    if (1) test_ursra_2s_2s_1(TyS);
   7158    if (1) test_ursra_2s_2s_16(TyS);
   7159    if (1) test_ursra_2s_2s_32(TyS);
   7160    if (1) test_ursra_8h_8h_1(TyH);
   7161    if (1) test_ursra_8h_8h_8(TyH);
   7162    if (1) test_ursra_8h_8h_16(TyH);
   7163    if (1) test_ursra_4h_4h_1(TyH);
   7164    if (1) test_ursra_4h_4h_8(TyH);
   7165    if (1) test_ursra_4h_4h_16(TyH);
   7166    if (1) test_ursra_16b_16b_1(TyB);
   7167    if (1) test_ursra_16b_16b_3(TyB);
   7168    if (1) test_ursra_16b_16b_8(TyB);
   7169    if (1) test_ursra_8b_8b_1(TyB);
   7170    if (1) test_ursra_8b_8b_3(TyB);
   7171    if (1) test_ursra_8b_8b_8(TyB);
   7172 
   7173    // sshll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7174    // ushll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7175    if (1) test_sshll_2d_2s_0(TyS);
   7176    if (1) test_sshll_2d_2s_15(TyS);
   7177    if (1) test_sshll_2d_2s_31(TyS);
   7178    if (1) test_sshll2_2d_4s_0(TyS);
   7179    if (1) test_sshll2_2d_4s_15(TyS);
   7180    if (1) test_sshll2_2d_4s_31(TyS);
   7181    if (1) test_sshll_4s_4h_0(TyH);
   7182    if (1) test_sshll_4s_4h_7(TyH);
   7183    if (1) test_sshll_4s_4h_15(TyH);
   7184    if (1) test_sshll2_4s_8h_0(TyH);
   7185    if (1) test_sshll2_4s_8h_7(TyH);
   7186    if (1) test_sshll2_4s_8h_15(TyH);
   7187    if (1) test_sshll_8h_8b_0(TyB);
   7188    if (1) test_sshll_8h_8b_3(TyB);
   7189    if (1) test_sshll_8h_8b_7(TyB);
   7190    if (1) test_sshll2_8h_16b_0(TyB);
   7191    if (1) test_sshll2_8h_16b_3(TyB);
   7192    if (1) test_sshll2_8h_16b_7(TyB);
   7193    if (1) test_ushll_2d_2s_0(TyS);
   7194    if (1) test_ushll_2d_2s_15(TyS);
   7195    if (1) test_ushll_2d_2s_31(TyS);
   7196    if (1) test_ushll2_2d_4s_0(TyS);
   7197    if (1) test_ushll2_2d_4s_15(TyS);
   7198    if (1) test_ushll2_2d_4s_31(TyS);
   7199    if (1) test_ushll_4s_4h_0(TyH);
   7200    if (1) test_ushll_4s_4h_7(TyH);
   7201    if (1) test_ushll_4s_4h_15(TyH);
   7202    if (1) test_ushll2_4s_8h_0(TyH);
   7203    if (1) test_ushll2_4s_8h_7(TyH);
   7204    if (1) test_ushll2_4s_8h_15(TyH);
   7205    if (1) test_ushll_8h_8b_0(TyB);
   7206    if (1) test_ushll_8h_8b_3(TyB);
   7207    if (1) test_ushll_8h_8b_7(TyB);
   7208    if (1) test_ushll2_8h_16b_0(TyB);
   7209    if (1) test_ushll2_8h_16b_3(TyB);
   7210    if (1) test_ushll2_8h_16b_7(TyB);
   7211 
   7212    // suqadd  d,s,h,b
   7213    // usqadd  d,s,h,b
   7214    if (1) test_suqadd_d_d(TyD);
   7215    if (1) test_suqadd_s_s(TyS);
   7216    if (1) test_suqadd_h_h(TyH);
   7217    if (1) test_suqadd_b_b(TyB);
   7218    if (1) test_usqadd_d_d(TyD);
   7219    if (1) test_usqadd_s_s(TyS);
   7220    if (1) test_usqadd_h_h(TyH);
   7221    if (1) test_usqadd_b_b(TyB);
   7222 
   7223    // suqadd  2d,4s,2s,8h,4h,16b,8b
   7224    // usqadd  2d,4s,2s,8h,4h,16b,8b
   7225    if (1) test_suqadd_2d_2d(TyD);
   7226    if (1) test_suqadd_4s_4s(TyS);
   7227    if (1) test_suqadd_2s_2s(TyS);
   7228    if (1) test_suqadd_8h_8h(TyH);
   7229    if (1) test_suqadd_4h_4h(TyH);
   7230    if (1) test_suqadd_16b_16b(TyB);
   7231    if (1) test_suqadd_8b_8b(TyB);
   7232    if (1) test_usqadd_2d_2d(TyD);
   7233    if (1) test_usqadd_4s_4s(TyS);
   7234    if (1) test_usqadd_2s_2s(TyS);
   7235    if (1) test_usqadd_8h_8h(TyH);
   7236    if (1) test_usqadd_4h_4h(TyH);
   7237    if (1) test_usqadd_16b_16b(TyB);
   7238    if (1) test_usqadd_8b_8b(TyB);
   7239 
   7240    // tbl     8b_{16b}_8b, 16b_{16b}_16b
   7241    // tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7242    // tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7243    // tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7244    if (1) test_tbl_16b_1reg(TyB);
   7245    if (1) test_tbl_16b_2reg(TyB);
   7246    if (1) test_tbl_16b_3reg(TyB);
   7247    if (1) test_tbl_16b_4reg(TyB);
   7248    if (1) test_tbl_8b_1reg(TyB);
   7249    if (1) test_tbl_8b_2reg(TyB);
   7250    if (1) test_tbl_8b_3reg(TyB);
   7251    if (1) test_tbl_8b_4reg(TyB);
   7252 
   7253    // tbx     8b_{16b}_8b, 16b_{16b}_16b
   7254    // tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7255    // tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7256    // tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7257    if (1) test_tbx_16b_1reg(TyB);
   7258    if (1) test_tbx_16b_2reg(TyB);
   7259    if (1) test_tbx_16b_3reg(TyB);
   7260    if (1) test_tbx_16b_4reg(TyB);
   7261    if (1) test_tbx_8b_1reg(TyB);
   7262    if (1) test_tbx_8b_2reg(TyB);
   7263    if (1) test_tbx_8b_3reg(TyB);
   7264    if (1) test_tbx_8b_4reg(TyB);
   7265 
   7266    // trn1    2d,4s,2s,8h,4h,16b,8b
   7267    // trn2    2d,4s,2s,8h,4h,16b,8b
   7268    if (1) test_trn1_2d_2d_2d(TyD);
   7269    if (1) test_trn1_4s_4s_4s(TyS);
   7270    if (1) test_trn1_2s_2s_2s(TyS);
   7271    if (1) test_trn1_8h_8h_8h(TyH);
   7272    if (1) test_trn1_4h_4h_4h(TyH);
   7273    if (1) test_trn1_16b_16b_16b(TyB);
   7274    if (1) test_trn1_8b_8b_8b(TyB);
   7275    if (1) test_trn2_2d_2d_2d(TyD);
   7276    if (1) test_trn2_4s_4s_4s(TyS);
   7277    if (1) test_trn2_2s_2s_2s(TyS);
   7278    if (1) test_trn2_8h_8h_8h(TyH);
   7279    if (1) test_trn2_4h_4h_4h(TyH);
   7280    if (1) test_trn2_16b_16b_16b(TyB);
   7281    if (1) test_trn2_8b_8b_8b(TyB);
   7282 
   7283    // urecpe      4s,2s
   7284    // ursqrte     4s,2s
   7285    if (1) test_urecpe_4s_4s(TyS);
   7286    if (1) test_urecpe_2s_2s(TyS);
   7287    if (1) test_ursqrte_4s_4s(TyS);
   7288    if (1) test_ursqrte_2s_2s(TyS);
   7289 
   7290    // uzp1      2d,4s,2s,8h,4h,16b,8b
   7291    // uzp2      2d,4s,2s,8h,4h,16b,8b
   7292    // zip1      2d,4s,2s,8h,4h,16b,8b
   7293    // zip2      2d,4s,2s,8h,4h,16b,8b
   7294    if (1) test_uzp1_2d_2d_2d(TyD);
   7295    if (1) test_uzp1_4s_4s_4s(TyS);
   7296    if (1) test_uzp1_2s_2s_2s(TyS);
   7297    if (1) test_uzp1_8h_8h_8h(TyH);
   7298    if (1) test_uzp1_4h_4h_4h(TyH);
   7299    if (1) test_uzp1_16b_16b_16b(TyB);
   7300    if (1) test_uzp1_8b_8b_8b(TyB);
   7301    if (1) test_uzp2_2d_2d_2d(TyD);
   7302    if (1) test_uzp2_4s_4s_4s(TyS);
   7303    if (1) test_uzp2_2s_2s_2s(TyS);
   7304    if (1) test_uzp2_8h_8h_8h(TyH);
   7305    if (1) test_uzp2_4h_4h_4h(TyH);
   7306    if (1) test_uzp2_16b_16b_16b(TyB);
   7307    if (1) test_uzp2_8b_8b_8b(TyB);
   7308    if (1) test_zip1_2d_2d_2d(TyD);
   7309    if (1) test_zip1_4s_4s_4s(TyS);
   7310    if (1) test_zip1_2s_2s_2s(TyS);
   7311    if (1) test_zip1_8h_8h_8h(TyH);
   7312    if (1) test_zip1_4h_4h_4h(TyH);
   7313    if (1) test_zip1_16b_16b_16b(TyB);
   7314    if (1) test_zip1_8b_8b_8b(TyB);
   7315    if (1) test_zip2_2d_2d_2d(TyD);
   7316    if (1) test_zip2_4s_4s_4s(TyS);
   7317    if (1) test_zip2_2s_2s_2s(TyS);
   7318    if (1) test_zip2_8h_8h_8h(TyH);
   7319    if (1) test_zip2_4h_4h_4h(TyH);
   7320    if (1) test_zip2_16b_16b_16b(TyB);
   7321    if (1) test_zip2_8b_8b_8b(TyB);
   7322 
   7323    // xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7324    if (1) test_xtn_2s_2d(TyD);
   7325    if (1) test_xtn2_4s_2d(TyD);
   7326    if (1) test_xtn_4h_4s(TyS);
   7327    if (1) test_xtn2_8h_4s(TyS);
   7328    if (1) test_xtn_8b_8h(TyH);
   7329    if (1) test_xtn2_16b_8h(TyH);
   7330 
   7331    // ======================== MEM ========================
   7332 
   7333    // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   7334 
   7335    // ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7336    // ld1  (single 1-element structure to one lane of 1 reg)
   7337    // ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7338 
   7339    // ld2  (multiple 2-element structures to 2 regs)
   7340    // ld2  (single 2-element structure to one lane of 2 regs)
   7341    // ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7342 
   7343    // ld3  (multiple 3-element structures to 3 regs)
   7344    // ld3  (single 3-element structure to one lane of 3 regs)
   7345    // ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7346 
   7347    // ld4  (multiple 4-element structures to 4 regs)
   7348    // ld4  (single 4-element structure to one lane of 4 regs)
   7349    // ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7350 
   7351    // ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7352    //       addr = reg + uimm7 * reg_size
   7353 
   7354    // ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7355    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7356 
   7357    // ldr   q,d,s,h,b from addr
   7358    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7359 
   7360    // ldr   q,d,s from  pc+#imm19
   7361 
   7362    // ldr   q,d,s,h,b from addr
   7363    //       addr = [Xn|SP, R <extend> <shift]
   7364 
   7365    // ldur  q,d,s,h,b from addr
   7366    //       addr = [Xn|SP,#imm] (unscaled offset)
   7367 
   7368    // st1 (multiple 1-element structures from 1/2/3/4 regs)
   7369    // st1 (single 1-element structure for 1 lane of 1 reg)
   7370 
   7371    // st2 (multiple 2-element structures from 2 regs)
   7372    // st2 (single 2-element structure from 1 lane of 2 regs)
   7373 
   7374    // st3 (multiple 3-element structures from 3 regs)
   7375    // st3 (single 3-element structure from 1 lane of 3 regs)
   7376 
   7377    // st4 (multiple 4-element structures from 4 regs)
   7378    // st4 (single 4-element structure from one lane of 4 regs)
   7379 
   7380    // stnp q_q_addr, d_d_addr, s_s_addr
   7381    //      addr = [Xn|SP, #imm]
   7382 
   7383    // stp  q_q_addr, d_d_addr, s_s_addr
   7384    //      addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   7385 
   7386    // str  q,d,s,h,b_addr
   7387    //      addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   7388 
   7389    // str   q,d,s,h,b_addr
   7390    //       addr = [Xn|SP, R <extend> <shift]
   7391 
   7392    // stur  q,d,s,h,b_addr
   7393    //       addr = [Xn|SP,#imm] (unscaled offset)
   7394 
   7395    // ======================== CRYPTO ========================
   7396 
   7397    // These tests are believed to be correct but are disabled because
   7398    // GNU assembler (GNU Binutils) 2.24.0.20140311 Linaro 2014.03
   7399    // cannot be persuaded to accept those instructions (AFAICT).
   7400 
   7401    // aesd       16b (aes single round decryption)
   7402    // aese       16b (aes single round encryption)
   7403    // aesimc     16b (aes inverse mix columns)
   7404    // aesmc      16b (aes mix columns)
   7405    //if (0) test_aesd_16b_16b(TyNONE);
   7406    //if (0) test_aese_16b_16b(TyNONE);
   7407    //if (0) test_aesimc_16b_16b(TyNONE);
   7408    //if (0) test_aesmc_16b_16b(TyNONE);
   7409 
   7410    // sha1c      q_s_4s
   7411    // sha1h      s_s
   7412    // sha1m      q_s_4s
   7413    // sha1p      q_s_4s
   7414    // sha1su0    4s_4s_4s
   7415    // sha1su1    4s_4s
   7416    //if (0) test_sha1c_q_s_4s(TyNONE);
   7417    //if (0) test_sha1h_s_s(TyNONE);
   7418    //if (0) test_sha1m_q_s_4s(TyNONE);
   7419    //if (0) test_sha1p_q_s_4s(TyNONE);
   7420    //if (0) test_sha1su0_4s_4s_4s(TyNONE);
   7421    //if (0) test_sha1su1_4s_4s(TyNONE);
   7422 
   7423    // sha256h2   q_q_4s
   7424    // sha256h    q_q_4s
   7425    // sha256su0  4s_4s
   7426    // sha256su1  4s_4s_4s
   7427    //if (0) test_sha256h2_q_q_4s(TyNONE);
   7428    //if (0) test_sha256h_q_q_4s(TyNONE);
   7429    //if (0) test_sha256su0_4s_4s(TyNONE);
   7430    //if (0) test_sha256su1_4s_4s_4s(TyNONE);
   7431 
   7432    return 0;
   7433 }
   7434 
   7435 
   7436 /* ---------------------------------------------------------------- */
   7437 /* -- Alphabetical list of insns                                 -- */
   7438 /* ---------------------------------------------------------------- */
   7439 /*
   7440    abs      d
   7441    abs      2d,4s,2s,8h,4h,16b,8b
   7442    add      d
   7443    add      2d,4s,2s,8h,4h,16b,8b
   7444    addhn    2s.2d.2d, 4s.2d.2d, h_from_s and b_from_h (add and get high half)
   7445    addp     d (add pairs, across)
   7446    addp     2d,4s,2s,8h,4h,16b,8b
   7447    addv     4s,8h,4h,16b,18b (reduce across vector)
   7448    aesd     16b (aes single round decryption)
   7449    aese     16b (aes single round encryption)
   7450    aesimc   16b (aes inverse mix columns)
   7451    aesmc    16b (aes mix columns)
   7452    and      16b,8b
   7453 
   7454    bic      4s,2s,8h,4h (vector, imm)
   7455    also movi, mvni, orr
   7456 
   7457    bic      16b,8b (vector,reg) (bit clear)
   7458    bif      16b,8b (vector) (bit insert if false)
   7459    bit      16b,8b (vector) (bit insert if true)
   7460    bsl      16b,8b (vector) (bit select)
   7461 
   7462    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   7463    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   7464 
   7465    cmeq     d
   7466    cmeq     2d,4s,2s,8h,4h,16b,8b
   7467    cmeq_z   d
   7468    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   7469 
   7470    cmge     d
   7471    cmge     2d,4s,2s,8h,4h,16b,8b
   7472    cmge_z   d
   7473    cmge_z   2d,4s,2s,8h,4h,16b,8b
   7474 
   7475    cmgt     d
   7476    cmgt     2d,4s,2s,8h,4h,16b,8b
   7477    cmgt_z   d
   7478    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   7479 
   7480    cmhi     d
   7481    cmhi     2d,4s,2s,8h,4h,16b,8b
   7482 
   7483    cmhs     d
   7484    cmhs     2d,4s,2s,8h,4h,16b,8b
   7485 
   7486    cmle_z   d
   7487    cmle_z   2d,4s,2s,8h,4h,16b,8b
   7488 
   7489    cmlt_z   d
   7490    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   7491 
   7492    cmtst    d
   7493    cmtst    2d,4s,2s,8h,4h,16b,8b
   7494 
   7495    cnt      16b,8b (population count per byte)
   7496 
   7497    dup      d,s,h,b (vec elem to scalar)
   7498    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   7499    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   7500 
   7501    eor      16b,8b (vector)
   7502    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   7503 
   7504    fabd     d,s
   7505    fabd     2d,4s,2s
   7506 
   7507    fabs     d,s
   7508    fabs     2d,4s,2s
   7509 
   7510    facge    s,d  (floating abs compare GE)
   7511    facge    2d,4s,2s
   7512 
   7513    facgt    s,d  (floating abs compare GE)
   7514    facgt    2d,4s,2s
   7515 
   7516    fadd     d,s
   7517    fadd     2d,4s,2s
   7518 
   7519    faddp    d,s (floating add pair)
   7520    faddp    2d,4s,2s
   7521 
   7522    fccmp    d,s (floating point conditional quiet compare)
   7523    fccmpe   d,s (floating point conditional signaling compare)
   7524 
   7525    fcmeq    d,s
   7526    fcmeq    2d,4s,2s
   7527    fcmeq_z  d,s
   7528    fcmeq_z  2d,4s,2s
   7529 
   7530    fcmge    d,s
   7531    fcmge    2d,4s,2s
   7532    fcmge_z  d,s
   7533    fcmge_z  2d,4s,2s
   7534 
   7535    fcmgt    d,s
   7536    fcmgt    2d,4s,2s
   7537    fcmgt_z  d,s
   7538    fcmgt_z  2d,4s,2s
   7539 
   7540    fcmle_z  d,s
   7541    fcmle_z  2d,4s,2s
   7542 
   7543    fcmlt_z  d,s
   7544    fcmlt_z  2d,4s,2s
   7545 
   7546    fcmp     d,s (floating point quiet, set flags)
   7547    fcmp_z   d,s
   7548    fcmpe    d,s (floating point signaling, set flags)
   7549    fcmpe_z  d,s
   7550 
   7551    fcsel    d,s (fp cond select)
   7552 
   7553    fcvt     s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   7554 
   7555    fcvtas   d,s  (fcvt to signed int, nearest, ties away)
   7556    fcvtas   2d,4s,2s
   7557    fcvtas   w_s,x_s,w_d,x_d
   7558 
   7559    fcvtau   d,s  (fcvt to unsigned int, nearest, ties away)
   7560    fcvtau   2d,4s,2s
   7561    fcvtau   w_s,x_s,w_d,x_d
   7562 
   7563    fcvtl{2} 4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   7564 
   7565    fcvtms   d,s  (fcvt to signed int, minus inf)
   7566    fcvtms   2d,4s,2s
   7567    fcvtms   w_s,x_s,w_d,x_d
   7568 
   7569    fcvtmu   d,s  (fcvt to unsigned int, minus inf)
   7570    fcvtmu   2d,4s,2s
   7571    fcvtmu   w_s,x_s,w_d,x_d
   7572 
   7573    fcvtn{2} 4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   7574 
   7575    fcvtns   d,s  (fcvt to signed int, nearest)
   7576    fcvtns   2d,4s,2s
   7577    fcvtns   w_s,x_s,w_d,x_d
   7578 
   7579    fcvtnu   d,s  (fcvt to unsigned int, nearest)
   7580    fcvtnu   2d,4s,2s
   7581    fcvtnu   w_s,x_s,w_d,x_d
   7582 
   7583    fcvtps   d,s  (fcvt to signed int, plus inf)
   7584    fcvtps   2d,4s,2s
   7585    fcvtps   w_s,x_s,w_d,x_d
   7586 
   7587    fcvtpu   d,s  (fcvt to unsigned int, plus inf)
   7588    fcvtpu   2d,4s,2s
   7589    fcvtpu   w_s,x_s,w_d,x_d
   7590 
   7591    fcvtxn   s_d (fcvt to lower prec narrow, rounding to odd)
   7592    fcvtxn   2s_2d,4s_2d
   7593 
   7594    fcvtzs   s,d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7595    fcvtzs   2d,4s,2s
   7596 
   7597    fcvtzs   s,d (fcvt to signed integer, to zero)
   7598    fcvtzs   2d,4s,2s
   7599 
   7600    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7601 
   7602    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed integer, to zero)
   7603 
   7604    fcvtzu   s,d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7605    fcvtzu   2d,4s,2s
   7606 
   7607    fcvtzu   s,d (fcvt to unsigned integer, to zero)
   7608    fcvtzu   2d,4s,2s
   7609 
   7610    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7611 
   7612    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned integer, to zero)
   7613 
   7614    fdiv     d,s
   7615    fdiv     2d,4s,2s
   7616 
   7617    fmadd    d,s
   7618    fnmadd   d,s
   7619    fnmsub   d,s
   7620    fnmul    d,s
   7621 
   7622    fmax     d,s
   7623    fmin     d,s
   7624 
   7625    fmax     2d,4s,2s
   7626    fmin     2d,4s,2s
   7627 
   7628    fmaxnm   d,s ("max number")
   7629    fminnm   d,s
   7630 
   7631    fmaxnm   2d,4s,2s
   7632    fminnm   2d,4s,2s
   7633 
   7634    fmaxnmp  d_2d,s_2s ("max number pairwise")
   7635    fminnmp  d_2d,s_2s
   7636 
   7637    fmaxnmp  2d,4s,2s
   7638    fminnmp  2d,4s,2s
   7639 
   7640    fmaxnmv  s_4s (maxnum across vector)
   7641    fminnmv  s_4s
   7642 
   7643    fmaxp    d_2d,s_2s (max of a pair)
   7644    fminp    d_2d,s_2s (max of a pair)
   7645 
   7646    fmaxp    2d,4s,2s  (max pairwise)
   7647    fminp    2d,4s,2s
   7648 
   7649    fmaxv    s_4s (max across vector)
   7650    fminv    s_4s
   7651 
   7652    fmla     d_d_d[],s_s_s[] (by element)
   7653    fmla     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7654 
   7655    fmla     2d,4s,2s
   7656 
   7657    fmls     d_d_d[],s_s_s[] (by element)
   7658    fmls     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7659 
   7660    fmls     2d,4s,2s
   7661 
   7662    fmov     2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   7663 
   7664    fmov     d_d,s_s
   7665 
   7666    fmov     s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   7667 
   7668    fmov     d,s #imm
   7669 
   7670    fmsub    d,s
   7671 
   7672    fmul     d_d_d[],s_s_s[]
   7673    fmul     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7674 
   7675    fmul     2d,4s,2s
   7676    fmul     d,s
   7677 
   7678    fmulx    d_d_d[],s_s_s[]
   7679    fmulx    2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7680 
   7681    fmulx    d,s
   7682    fmulx    2d,4s,2s
   7683 
   7684    fneg     d,s
   7685    fneg     2d,4s,2s
   7686 
   7687    frecpe   d,s (recip estimate)
   7688    frecpe   2d,4s,2s
   7689 
   7690    frecps   d,s (recip step)
   7691    frecps   2d,4s,2s
   7692 
   7693    frecpx   d,s (recip exponent)
   7694 
   7695    frinta   2d,4s,2s (round to integral, nearest away)
   7696    frinta   d,s
   7697 
   7698    frinti   2d,4s,2s (round to integral, per FPCR)
   7699    frinti   d,s
   7700 
   7701    frintm   2d,4s,2s (round to integral, minus inf)
   7702    frintm   d,s
   7703 
   7704    frintn   2d,4s,2s (round to integral, nearest, to even)
   7705    frintn   d,s
   7706 
   7707    frintp   2d,4s,2s (round to integral, plus inf)
   7708    frintp   d,s
   7709 
   7710    frintx   2d,4s,2s (round to integral exact, per FPCR)
   7711    frintx   d,s
   7712 
   7713    frintz   2d,4s,2s (round to integral, zero)
   7714    frintz   d,s
   7715 
   7716    frsqrte  d,s (est)
   7717    frsqrte  2d,4s,2s
   7718 
   7719    frsqrts  d,s (step)
   7720    frsqrts  2d,4s,2s
   7721 
   7722    fsqrt    d,s
   7723    fsqrt    2d,4s,2s
   7724 
   7725    fsub     d,s
   7726    fsub     2d,4s,2s
   7727 
   7728    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   7729 
   7730    ins      d[]_x, s[]_w, h[]_w, b[]_w
   7731 
   7732    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7733    ld1  (single 1-element structure to one lane of 1 reg)
   7734    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7735 
   7736    ld2  (multiple 2-element structures to 2 regs)
   7737    ld2  (single 2-element structure to one lane of 2 regs)
   7738    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7739 
   7740    ld3  (multiple 3-element structures to 3 regs)
   7741    ld3  (single 3-element structure to one lane of 3 regs)
   7742    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7743 
   7744    ld4  (multiple 4-element structures to 4 regs)
   7745    ld4  (single 4-element structure to one lane of 4 regs)
   7746    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7747 
   7748    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7749          addr = reg + uimm7 * reg_size
   7750 
   7751    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7752          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7753 
   7754    ldr   q,d,s,h,b from addr
   7755          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7756 
   7757    ldr   q,d,s from  pc+#imm19
   7758 
   7759    ldr   q,d,s,h,b from addr
   7760          addr = [Xn|SP, R <extend> <shift]
   7761 
   7762    ldur  q,d,s,h,b from addr
   7763          addr = [Xn|SP,#imm] (unscaled offset)
   7764 
   7765    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7766    mla   4s,2s,8h,4h,16b,8b
   7767 
   7768    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7769    mls   4s,2s,8h,4h,16b,8b
   7770 
   7771    movi  16b,8b   #imm8, LSL #0
   7772    movi  8h,4h    #imm8, LSL #0 or 8
   7773    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   7774    movi  4s,2s    #imm8, MSL #8 or 16
   7775    movi  d,       #imm64
   7776    movi  2d,      #imm64
   7777 
   7778    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7779    mul   4s,2s,8h,4h,16b,8b
   7780 
   7781    mvni  8h,4h    #imm8, LSL #0 or 8
   7782    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   7783    mvni  4s,2s    #imm8, MSL #8 or 16
   7784 
   7785    neg   d
   7786    neg   2d,4s,2s,8h,4h,16b,8b
   7787 
   7788    not   16b,8b
   7789 
   7790    orn   16b,8b
   7791 
   7792    orr   8h,4h   #imm8, LSL #0 or 8
   7793    orr   4s,2s   #imm8, LSL #0, 8, 16 or 24
   7794 
   7795    orr   16b,8b
   7796 
   7797    pmul  16b,8b
   7798 
   7799    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   7800 
   7801    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7802 
   7803    rbit    16b,8b
   7804    rev16   16b,8b
   7805    rev32   16b,8b,8h,4h
   7806    rev64   16b,8b,8h,4h,4s,2s
   7807 
   7808    rshrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7809 
   7810    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7811 
   7812    saba      16b,8b,8h,4h,4s,2s
   7813    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7814 
   7815    sabd      16b,8b,8h,4h,4s,2s
   7816    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7817 
   7818    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7819 
   7820    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7821 
   7822    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7823 
   7824    saddlv    h_16b/8b, s_8h/4h, d_4s
   7825 
   7826    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7827 
   7828    scvtf     d,s        _#fbits
   7829    scvtf     2d,4s,2s   _#fbits
   7830 
   7831    scvtf     d,s
   7832    scvtf     2d,4s,2s
   7833 
   7834    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   7835    scvtf     s_w, d_w, s_x, d_x
   7836 
   7837    sha1c       q_s_4s
   7838    sha1h       s_s
   7839    sha1m       q_s_4s
   7840    sha1p       q_s_4s
   7841    sha1su0     4s_4s_4s
   7842    sha1su1     4s_4s
   7843    sha256h2    q_q_4s
   7844    sha256h     q_q_4s
   7845    sha256su0   4s_4s
   7846    sha256su1   4s_4s_4s
   7847 
   7848    shadd       16b,8b,8h,4h,4s,2s
   7849 
   7850    shl         d_#imm
   7851    shl         16b,8b,8h,4h,4s,2s,2d  _#imm
   7852 
   7853    shll{2}   8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   7854 
   7855    shrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7856 
   7857    shsub       16b,8b,8h,4h,4s,2s
   7858 
   7859    sli         d_#imm
   7860    sli         2d,4s,2s,8h,4h,16b,8b  _#imm
   7861 
   7862    smax        4s,2s,8h,4h,16b,8b
   7863 
   7864    smaxp       4s,2s,8h,4h,16b,8b
   7865 
   7866    smaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   7867 
   7868    smin        4s,2s,8h,4h,16b,8b
   7869 
   7870    sminp       4s,2s,8h,4h,16b,8b
   7871 
   7872    sminv       s_4s,h_8h,h_4h,b_16b,b_8b
   7873 
   7874    smlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7875    smlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7876 
   7877    smlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7878    smlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7879 
   7880    smov        w_b[], w_h[], x_b[], x_h[], x_s[]
   7881 
   7882    smull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   7883    smull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7884 
   7885    sqabs       d,s,h,b
   7886    sqabs       2d,4s,2s,8h,4h,16b,8b
   7887 
   7888    sqadd       d,s,h,b
   7889    sqadd       2d,4s,2s,8h,4h,16b,8b
   7890 
   7891    sqdmlal     d_s_s[], s_h_h[]
   7892    sqdmlal{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7893 
   7894    sqdmlal     d_s_s, s_h_h
   7895    sqdmlal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7896 
   7897    sqdmlsl     d_s_s[], s_h_h[]
   7898    sqdmlsl{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7899 
   7900    sqdmlsl     d_s_s, s_h_h
   7901    sqdmlsl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7902 
   7903    sqdmulh     s_s_s[], h_h_h[]
   7904    sqdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7905 
   7906    sqdmulh     h,s
   7907    sqdmulh     4s,2s,8h,4h
   7908 
   7909    sqdmull     d_s_s[], s_h_h[]
   7910    sqdmull{2}  2d_2s/4s_s[], 4s_4h/2h_h[]
   7911 
   7912    sqdmull     d_s_s,s_h_h
   7913    sqdmull{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7914 
   7915    sqneg       d,s,h,b
   7916    sqneg       2d,4s,2s,8h,4h,16b,8b
   7917 
   7918    sqrdmulh    s_s_s[], h_h_h[]
   7919    sqrdmulh    4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7920 
   7921    sqrdmulh    h,s
   7922    sqrdmulh    4s,2s,8h,4h
   7923 
   7924    sqrshl      d,s,h,b
   7925    sqrshl      2d,4s,2s,8h,4h,16b,8b
   7926 
   7927    sqrshrn     s_d, h_s, b_h   #imm
   7928    sqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7929 
   7930    sqrshrun     s_d, h_s, b_h   #imm
   7931    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7932 
   7933    sqshl        d,s,h,b   _#imm
   7934    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   7935 
   7936    sqshl        d,s,h,b
   7937    sqshl        2d,4s,2s,8h,4h,16b,8b
   7938 
   7939    sqshlu       d,s,h,b  _#imm
   7940    sqshlu       2d,4s,2s,8h,4h,16b,8b  _#imm
   7941 
   7942    sqshrn       s_d, h_s, b_h   #imm
   7943    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7944 
   7945    sqshrun      s_d, h_s, b_h   #imm
   7946    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7947 
   7948    sqsub       d,s,h,b
   7949    sqsub       2d,4s,2s,8h,4h,16b,8b
   7950 
   7951    sqxtn       s_d,h_s,b_h
   7952    sqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7953 
   7954    sqxtun      s_d,h_s,b_h
   7955    sqxtun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7956 
   7957    srhadd      4s,2s,8h,4h,16b,8b
   7958 
   7959    sri         d_#imm
   7960    sri         2d,4s,2s,8h,4h,16b,8b  _#imm
   7961 
   7962    srshl (reg) d
   7963    srshl       2d,4s,2s,8h,4h,16b,8b
   7964 
   7965    srshr (imm) d
   7966    srshr       2d,4s,2s,8h,4h,16b,8b
   7967 
   7968    srsra (imm) d
   7969    srsra       2d,4s,2s,8h,4h,16b,8b
   7970 
   7971    sshl (reg)  d
   7972    sshl        2d,4s,2s,8h,4h,16b,8b
   7973 
   7974    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   7975 
   7976    sshr (imm)  d
   7977    sshr        2d,4s,2s,8h,4h,16b,8b
   7978 
   7979    ssra (imm)  d
   7980    ssra        2d,4s,2s,8h,4h,16b,8b
   7981 
   7982    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7983 
   7984    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7985 
   7986    st1 (multiple 1-element structures from 1/2/3/4 regs)
   7987    st1 (single 1-element structure for 1 lane of 1 reg)
   7988 
   7989    st2 (multiple 2-element structures from 2 regs)
   7990    st2 (single 2-element structure from 1 lane of 2 regs)
   7991 
   7992    st3 (multiple 3-element structures from 3 regs)
   7993    st3 (single 3-element structure from 1 lane of 3 regs)
   7994 
   7995    st4 (multiple 4-element structures from 4 regs)
   7996    st4 (single 4-element structure from one lane of 4 regs)
   7997 
   7998    stnp q_q_addr, d_d_addr, s_s_addr
   7999         addr = [Xn|SP, #imm]
   8000 
   8001    stp  q_q_addr, d_d_addr, s_s_addr
   8002         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   8003 
   8004    str  q,d,s,h,b_addr
   8005         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   8006 
   8007    str   q,d,s,h,b_addr
   8008          addr = [Xn|SP, R <extend> <shift]
   8009 
   8010    stur  q,d,s,h,b_addr
   8011          addr = [Xn|SP,#imm] (unscaled offset)
   8012 
   8013    sub   d
   8014    sub   2d,4s,2s,8h,4h,16b,8b
   8015 
   8016    subhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8017 
   8018    suqadd  d,s,h,b
   8019    suqadd  2d,4s,2s,8h,4h,16b,8b
   8020 
   8021    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8022    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8023    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8024    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8025 
   8026    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8027    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8028    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8029    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8030 
   8031    trn1    2d,4s,2s,8h,4h,16b,8b
   8032    trn2    2d,4s,2s,8h,4h,16b,8b
   8033 
   8034    uaba      16b,8b,8h,4h,4s,2s
   8035    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8036 
   8037    uabd      16b,8b,8h,4h,4s,2s
   8038    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8039 
   8040    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8041 
   8042    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8043 
   8044    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8045 
   8046    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8047 
   8048    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8049 
   8050    ucvtf     d,s        _#fbits
   8051    ucvtf     2d,4s,2s   _#fbits
   8052 
   8053    ucvtf     d,s
   8054    ucvtf     2d,4s,2s
   8055 
   8056    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8057    ucvtf     s_w, d_w, s_x, d_x
   8058 
   8059    uhadd       16b,8b,8h,4h,4s,2s
   8060 
   8061    uhsub       16b,8b,8h,4h,4s,2s
   8062 
   8063    umax        4s,2s,8h,4h,16b,8b
   8064 
   8065    umaxp       4s,2s,8h,4h,16b,8b
   8066 
   8067    umaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   8068 
   8069    umin        4s,2s,8h,4h,16b,8b
   8070 
   8071    uminp       4s,2s,8h,4h,16b,8b
   8072 
   8073    uminv       s_4s,h_8h,h_4h,b_16b,b_8b
   8074 
   8075    umlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8076    umlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8077 
   8078    umlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8079    umlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8080 
   8081    umov        w_b[], w_h[], x_b[], x_h[], x_s[]
   8082 
   8083    umull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   8084    umull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8085 
   8086    uqadd       d,s,h,b
   8087    uqadd       2d,4s,2s,8h,4h,16b,8b
   8088 
   8089    uqrshl      d,s,h,b
   8090    uqrshl      2d,4s,2s,8h,4h,16b,8b
   8091 
   8092    uqrshrn     s_d, h_s, b_h   #imm
   8093    uqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8094 
   8095    uqshl        d,s,h,b   _#imm
   8096    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8097 
   8098    uqshl        d,s,h,b
   8099    uqshl        2d,4s,2s,8h,4h,16b,8b
   8100 
   8101    uqshrn       s_d, h_s, b_h   #imm
   8102    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8103 
   8104    uqsub       d,s,h,b
   8105    uqsub       2d,4s,2s,8h,4h,16b,8b
   8106 
   8107    uqxtn       s_d,h_s,b_h
   8108    uqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8109 
   8110    urecpe      4s,2s
   8111 
   8112    urhadd      4s,2s,8h,4h,16b,8b
   8113 
   8114    urshl (reg) d
   8115    urshl       2d,4s,2s,8h,4h,16b,8b
   8116 
   8117    urshr (imm) d
   8118    urshr       2d,4s,2s,8h,4h,16b,8b
   8119 
   8120    ursqrte     4s,2s
   8121 
   8122    ursra (imm) d
   8123    ursra       2d,4s,2s,8h,4h,16b,8b
   8124 
   8125    ushl (reg)  d
   8126    ushl        2d,4s,2s,8h,4h,16b,8b
   8127 
   8128    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8129 
   8130    ushr (imm)  d
   8131    ushr        2d,4s,2s,8h,4h,16b,8b
   8132 
   8133    usqadd      d,s,h,b
   8134    usqadd      2d,4s,2s,8h,4h,16b,8b
   8135 
   8136    usra (imm)  d
   8137    usra        2d,4s,2s,8h,4h,16b,8b
   8138 
   8139    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8140 
   8141    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8142 
   8143    uzp1      2d,4s,2s,8h,4h,16b,8b
   8144    uzp2      2d,4s,2s,8h,4h,16b,8b
   8145 
   8146    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8147 
   8148    zip1      2d,4s,2s,8h,4h,16b,8b
   8149    zip2      2d,4s,2s,8h,4h,16b,8b
   8150 */
   8151 
   8152 
   8153 /* ---------------------------------------------------------------- */
   8154 /* -- List of insns, grouped somewhat by laneage configuration   -- */
   8155 /* ---------------------------------------------------------------- */
   8156 /*
   8157    ======================== FP ========================
   8158 
   8159    fabs      d,s
   8160    fabs      2d,4s,2s
   8161 
   8162    fneg      d,s
   8163    fneg      2d,4s,2s
   8164 
   8165    fsqrt     d,s
   8166    fsqrt     2d,4s,2s
   8167 
   8168    fadd      d,s
   8169    fsub      d,s
   8170 
   8171    fadd      2d,4s,2s
   8172    fsub      2d,4s,2s
   8173 
   8174    fabd      d,s
   8175    fabd      2d,4s,2s
   8176 
   8177    faddp     d,s (floating add pair)
   8178    faddp     2d,4s,2s
   8179 
   8180    fccmp     d,s (floating point conditional quiet compare)
   8181    fccmpe    d,s (floating point conditional signaling compare)
   8182 
   8183    fcmeq     d,s
   8184    fcmge     d,s
   8185    fcmgt     d,s
   8186    facgt     d,s  (floating abs compare GE)
   8187    facge     d,s  (floating abs compare GE)
   8188 
   8189    fcmeq     2d,4s,2s
   8190    fcmge     2d,4s,2s
   8191    fcmgt     2d,4s,2s
   8192    facge     2d,4s,2s
   8193    facgt     2d,4s,2s
   8194 
   8195    fcmeq_z   d,s
   8196    fcmge_z   d,s
   8197    fcmgt_z   d,s
   8198    fcmle_z   d,s
   8199    fcmlt_z   d,s
   8200 
   8201    fcmeq_z   2d,4s,2s
   8202    fcmge_z   2d,4s,2s
   8203    fcmgt_z   2d,4s,2s
   8204    fcmle_z   2d,4s,2s
   8205    fcmlt_z   2d,4s,2s
   8206 
   8207    fcmp_z    d,s
   8208    fcmpe_z   d,s
   8209    fcmp      d,s (floating point quiet, set flags)
   8210    fcmpe     d,s (floating point signaling, set flags)
   8211 
   8212    fcsel     d,s (fp cond select)
   8213 
   8214    fdiv      d,s
   8215    fdiv      2d,4s,2s
   8216 
   8217    fmadd     d,s
   8218    fnmadd    d,s
   8219    fmsub     d,s
   8220    fnmsub    d,s
   8221 
   8222    fnmul     d,s
   8223 
   8224    fmax      d,s
   8225    fmin      d,s
   8226    fmaxnm    d,s ("max number")
   8227    fminnm    d,s
   8228 
   8229    fmax      2d,4s,2s
   8230    fmin      2d,4s,2s
   8231    fmaxnm    2d,4s,2s
   8232    fminnm    2d,4s,2s
   8233 
   8234    fmaxnmp   d_2d,s_2s ("max number pairwise")
   8235    fminnmp   d_2d,s_2s
   8236 
   8237    fmaxnmp   2d,4s,2s
   8238    fminnmp   2d,4s,2s
   8239 
   8240    fmaxnmv   s_4s (maxnum across vector)
   8241    fminnmv   s_4s
   8242 
   8243    fmaxp     d_2d,s_2s (max of a pair)
   8244    fminp     d_2d,s_2s (max of a pair)
   8245 
   8246    fmaxp     2d,4s,2s  (max pairwise)
   8247    fminp     2d,4s,2s
   8248 
   8249    fmaxv     s_4s (max across vector)
   8250    fminv     s_4s
   8251 
   8252    fmla      2d,4s,2s
   8253    fmls      2d,4s,2s
   8254 
   8255    fmla      d_d_d[],s_s_s[] (by element)
   8256    fmls      d_d_d[],s_s_s[] (by element)
   8257 
   8258    fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8259    fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8260 
   8261    fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   8262 
   8263    fmov      d_d,s_s
   8264 
   8265    fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   8266 
   8267    fmov      d,s #imm
   8268 
   8269    fmul      d_d_d[],s_s_s[]
   8270    fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8271 
   8272    fmul      2d,4s,2s
   8273    fmul      d,s
   8274 
   8275    fmulx     d_d_d[],s_s_s[]
   8276    fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8277 
   8278    fmulx     d,s
   8279    fmulx     2d,4s,2s
   8280 
   8281    frecpe    d,s (recip estimate)
   8282    frecpe    2d,4s,2s
   8283 
   8284    frecps    d,s (recip step)
   8285    frecps    2d,4s,2s
   8286 
   8287    frecpx    d,s (recip exponent)
   8288 
   8289    frinta    d,s
   8290    frinti    d,s
   8291    frintm    d,s
   8292    frintn    d,s
   8293    frintp    d,s
   8294    frintx    d,s
   8295    frintz    d,s
   8296 
   8297    frinta    2d,4s,2s (round to integral, nearest away)
   8298    frinti    2d,4s,2s (round to integral, per FPCR)
   8299    frintm    2d,4s,2s (round to integral, minus inf)
   8300    frintn    2d,4s,2s (round to integral, nearest, to even)
   8301    frintp    2d,4s,2s (round to integral, plus inf)
   8302    frintx    2d,4s,2s (round to integral exact, per FPCR)
   8303    frintz    2d,4s,2s (round to integral, zero)
   8304 
   8305    frsqrte   d,s (est)
   8306    frsqrte   2d,4s,2s
   8307 
   8308    frsqrts   d,s (step)
   8309    frsqrts   2d,4s,2s
   8310 
   8311    ======================== CONV ========================
   8312 
   8313    fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   8314 
   8315    fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   8316 
   8317    fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   8318 
   8319    fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   8320    fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   8321    fcvtas    2d,4s,2s
   8322    fcvtau    2d,4s,2s
   8323    fcvtas    w_s,x_s,w_d,x_d
   8324    fcvtau    w_s,x_s,w_d,x_d
   8325 
   8326    fcvtms    d,s  (fcvt to signed int,   minus inf)
   8327    fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   8328    fcvtms    2d,4s,2s
   8329    fcvtmu    2d,4s,2s
   8330    fcvtms    w_s,x_s,w_d,x_d
   8331    fcvtmu    w_s,x_s,w_d,x_d
   8332 
   8333    fcvtns    d,s  (fcvt to signed int,   nearest)
   8334    fcvtnu    d,s  (fcvt to unsigned int, nearest)
   8335    fcvtns    2d,4s,2s
   8336    fcvtnu    2d,4s,2s
   8337    fcvtns    w_s,x_s,w_d,x_d
   8338    fcvtnu    w_s,x_s,w_d,x_d
   8339 
   8340    fcvtps    d,s  (fcvt to signed int,   plus inf)
   8341    fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   8342    fcvtps    2d,4s,2s
   8343    fcvtpu    2d,4s,2s
   8344    fcvtps    w_s,x_s,w_d,x_d
   8345    fcvtpu    w_s,x_s,w_d,x_d
   8346 
   8347    fcvtzs    d,s (fcvt to signed integer,   to zero)
   8348    fcvtzu    d,s (fcvt to unsigned integer, to zero)
   8349    fcvtzs    2d,4s,2s
   8350    fcvtzu    2d,4s,2s
   8351    fcvtzs    w_s,x_s,w_d,x_d
   8352    fcvtzu    w_s,x_s,w_d,x_d
   8353 
   8354    fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8355    fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8356    fcvtzs    2d,4s,2s
   8357    fcvtzu    2d,4s,2s
   8358    fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8359    fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8360 
   8361    fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   8362    fcvtxn    2s_2d,4s_2d
   8363 
   8364    scvtf     d,s        _#fbits
   8365    ucvtf     d,s        _#fbits
   8366 
   8367    scvtf     2d,4s,2s   _#fbits
   8368    ucvtf     2d,4s,2s   _#fbits
   8369 
   8370    scvtf     d,s
   8371    ucvtf     d,s
   8372 
   8373    scvtf     2d,4s,2s
   8374    ucvtf     2d,4s,2s
   8375 
   8376    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   8377    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8378 
   8379    scvtf     s_w, d_w, s_x, d_x
   8380    ucvtf     s_w, d_w, s_x, d_x
   8381 
   8382    ======================== INT ========================
   8383 
   8384    abs       d
   8385    neg       d
   8386 
   8387    abs       2d,4s,2s,8h,4h,16b,8b
   8388    neg       2d,4s,2s,8h,4h,16b,8b
   8389 
   8390    add       d
   8391    sub       d
   8392 
   8393    add       2d,4s,2s,8h,4h,16b,8b
   8394    sub       2d,4s,2s,8h,4h,16b,8b
   8395 
   8396    addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8397    subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8398    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8399    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8400 
   8401    addp     d (add pairs, across)
   8402    addp     2d,4s,2s,8h,4h,16b,8b
   8403    addv     4s,8h,4h,16b,18b (reduce across vector)
   8404 
   8405    and      16b,8b
   8406 
   8407    orr      8h,4h   #imm8, LSL #0 or 8
   8408    orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8409    bic      8h,4h   #imm8, LSL #0 or 8
   8410    bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8411    also movi, mvni
   8412 
   8413    bic      16b,8b (vector,reg) (bit clear)
   8414    bif      16b,8b (vector) (bit insert if false)
   8415    bit      16b,8b (vector) (bit insert if true)
   8416    bsl      16b,8b (vector) (bit select)
   8417 
   8418    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   8419    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   8420 
   8421    cmeq     d
   8422    cmge     d
   8423    cmgt     d
   8424    cmhi     d
   8425    cmhs     d
   8426    cmtst    d
   8427 
   8428    cmeq     2d,4s,2s,8h,4h,16b,8b
   8429    cmge     2d,4s,2s,8h,4h,16b,8b
   8430    cmgt     2d,4s,2s,8h,4h,16b,8b
   8431    cmhi     2d,4s,2s,8h,4h,16b,8b
   8432    cmhs     2d,4s,2s,8h,4h,16b,8b
   8433    cmtst    2d,4s,2s,8h,4h,16b,8b
   8434 
   8435    cmeq_z   d
   8436    cmge_z   d
   8437    cmgt_z   d
   8438    cmle_z   d
   8439    cmlt_z   d
   8440 
   8441    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   8442    cmge_z   2d,4s,2s,8h,4h,16b,8b
   8443    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   8444    cmle_z   2d,4s,2s,8h,4h,16b,8b
   8445    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   8446 
   8447    cnt      16b,8b (population count per byte)
   8448 
   8449    dup      d,s,h,b (vec elem to scalar)
   8450    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   8451    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   8452 
   8453    eor      16b,8b (vector)
   8454    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   8455 
   8456    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   8457 
   8458    ins      d[]_x, s[]_w, h[]_w, b[]_w
   8459 
   8460    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8461    mla   4s,2s,8h,4h,16b,8b
   8462 
   8463    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8464    mls   4s,2s,8h,4h,16b,8b
   8465 
   8466    movi  16b,8b   #imm8, LSL #0
   8467    movi  8h,4h    #imm8, LSL #0 or 8
   8468    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   8469    movi  4s,2s    #imm8, MSL #8 or 16
   8470    movi  d,       #imm64
   8471    movi  2d,      #imm64
   8472 
   8473    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8474    mul   4s,2s,8h,4h,16b,8b
   8475 
   8476    mvni  8h,4h    #imm8, LSL #0 or 8
   8477    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   8478    mvni  4s,2s    #imm8, MSL #8 or 16
   8479 
   8480    not   16b,8b
   8481 
   8482    orn   16b,8b
   8483    orr   16b,8b
   8484 
   8485    pmul  16b,8b
   8486 
   8487    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   8488 
   8489    rbit    16b,8b
   8490    rev16   16b,8b
   8491    rev32   16b,8b,8h,4h
   8492    rev64   16b,8b,8h,4h,4s,2s
   8493 
   8494    saba      16b,8b,8h,4h,4s,2s
   8495    uaba      16b,8b,8h,4h,4s,2s
   8496 
   8497    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8498    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8499 
   8500    sabd      16b,8b,8h,4h,4s,2s
   8501    uabd      16b,8b,8h,4h,4s,2s
   8502 
   8503    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8504    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8505 
   8506    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8507    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8508 
   8509    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8510    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8511    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8512    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8513 
   8514    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8515    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8516 
   8517    saddlv    h_16b/8b, s_8h/4h, d_4s
   8518    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8519 
   8520    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8521    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8522    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8523    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8524 
   8525    shadd        16b,8b,8h,4h,4s,2s
   8526    uhadd        16b,8b,8h,4h,4s,2s
   8527    shsub        16b,8b,8h,4h,4s,2s
   8528    uhsub        16b,8b,8h,4h,4s,2s
   8529 
   8530    shl          d_#imm
   8531    shl          16b,8b,8h,4h,4s,2s,2d  _#imm
   8532 
   8533    shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   8534 
   8535    shrn{2}      2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8536    rshrn{2}     2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8537 
   8538    sli          d_#imm
   8539    sri          d_#imm
   8540 
   8541    sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   8542    sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   8543 
   8544    smax         4s,2s,8h,4h,16b,8b
   8545    umax         4s,2s,8h,4h,16b,8b
   8546    smin         4s,2s,8h,4h,16b,8b
   8547    umin         4s,2s,8h,4h,16b,8b
   8548 
   8549    smaxp        4s,2s,8h,4h,16b,8b
   8550    umaxp        4s,2s,8h,4h,16b,8b
   8551    sminp        4s,2s,8h,4h,16b,8b
   8552    uminp        4s,2s,8h,4h,16b,8b
   8553 
   8554    smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8555    umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8556    sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8557    uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8558 
   8559    smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8560    umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8561    smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8562    umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8563    smull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8564    umull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8565 
   8566    smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8567    umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8568    smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8569    umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8570    smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8571    umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8572 
   8573    smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8574    umov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8575 
   8576    sqabs        d,s,h,b
   8577    sqneg        d,s,h,b
   8578 
   8579    sqabs        2d,4s,2s,8h,4h,16b,8b
   8580    sqneg        2d,4s,2s,8h,4h,16b,8b
   8581 
   8582    sqadd        d,s,h,b
   8583    uqadd        d,s,h,b
   8584    sqsub        d,s,h,b
   8585    uqsub        d,s,h,b
   8586 
   8587    sqadd        2d,4s,2s,8h,4h,16b,8b
   8588    uqadd        2d,4s,2s,8h,4h,16b,8b
   8589    sqsub        2d,4s,2s,8h,4h,16b,8b
   8590    uqsub        2d,4s,2s,8h,4h,16b,8b
   8591 
   8592    sqdmlal      d_s_s[], s_h_h[]
   8593    sqdmlsl      d_s_s[], s_h_h[]
   8594    sqdmull      d_s_s[], s_h_h[]
   8595 
   8596    sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8597    sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8598    sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   8599 
   8600    sqdmlal      d_s_s, s_h_h
   8601    sqdmlsl      d_s_s, s_h_h
   8602    sqdmull      d_s_s, s_h_h
   8603 
   8604    sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8605    sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8606    sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8607 
   8608    sqdmulh      s_s_s[], h_h_h[]
   8609    sqrdmulh     s_s_s[], h_h_h[]
   8610 
   8611    sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8612    sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8613 
   8614    sqdmulh      h,s
   8615    sqrdmulh     h,s
   8616 
   8617    sqdmulh      4s,2s,8h,4h
   8618    sqrdmulh     4s,2s,8h,4h
   8619 
   8620    sqshl        d,s,h,b
   8621    uqshl        d,s,h,b
   8622    sqrshl       d,s,h,b
   8623    uqrshl       d,s,h,b
   8624 
   8625    sqshl        2d,4s,2s,8h,4h,16b,8b
   8626    uqshl        2d,4s,2s,8h,4h,16b,8b
   8627    sqrshl       2d,4s,2s,8h,4h,16b,8b
   8628    uqrshl       2d,4s,2s,8h,4h,16b,8b
   8629 
   8630    sqrshrn      s_d, h_s, b_h   #imm
   8631    uqrshrn      s_d, h_s, b_h   #imm
   8632    sqshrn       s_d, h_s, b_h   #imm
   8633    uqshrn       s_d, h_s, b_h   #imm
   8634 
   8635    sqrshrun     s_d, h_s, b_h   #imm
   8636    sqshrun      s_d, h_s, b_h   #imm
   8637 
   8638    sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8639    uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8640    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8641    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8642 
   8643    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8644    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8645 
   8646    sqshl        d,s,h,b   _#imm
   8647    uqshl        d,s,h,b   _#imm
   8648    sqshlu       d,s,h,b   _#imm
   8649 
   8650    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8651    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8652    sqshlu       2d,4s,2s,8h,4h,16b,8b   _#imm
   8653 
   8654    sqxtn        s_d,h_s,b_h
   8655    uqxtn        s_d,h_s,b_h
   8656    sqxtun       s_d,h_s,b_h
   8657 
   8658    sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8659    uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8660    sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8661 
   8662    srhadd       4s,2s,8h,4h,16b,8b
   8663    urhadd       4s,2s,8h,4h,16b,8b
   8664 
   8665    sshl (reg)   d
   8666    ushl (reg)   d
   8667    sshr (imm)   d
   8668    ushr (imm)   d
   8669    ssra (imm)   d
   8670    usra (imm)   d
   8671 
   8672    srshl (reg)  d
   8673    urshl (reg)  d
   8674    srshr (imm)  d
   8675    urshr (imm)  d
   8676    srsra (imm)  d
   8677    ursra (imm)  d
   8678 
   8679    sshl         2d,4s,2s,8h,4h,16b,8b
   8680    ushl         2d,4s,2s,8h,4h,16b,8b
   8681    sshr         2d,4s,2s,8h,4h,16b,8b
   8682    ushr         2d,4s,2s,8h,4h,16b,8b
   8683    ssra         2d,4s,2s,8h,4h,16b,8b
   8684    usra         2d,4s,2s,8h,4h,16b,8b
   8685 
   8686    srshl        2d,4s,2s,8h,4h,16b,8b
   8687    urshl        2d,4s,2s,8h,4h,16b,8b
   8688    srshr        2d,4s,2s,8h,4h,16b,8b
   8689    urshr        2d,4s,2s,8h,4h,16b,8b
   8690    srsra        2d,4s,2s,8h,4h,16b,8b
   8691    ursra        2d,4s,2s,8h,4h,16b,8b
   8692 
   8693    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8694    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8695 
   8696    suqadd  d,s,h,b
   8697    suqadd  2d,4s,2s,8h,4h,16b,8b
   8698 
   8699    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8700    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8701    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8702    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8703 
   8704    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8705    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8706    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8707    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8708 
   8709    trn1    2d,4s,2s,8h,4h,16b,8b
   8710    trn2    2d,4s,2s,8h,4h,16b,8b
   8711 
   8712    urecpe      4s,2s
   8713 
   8714    ursqrte     4s,2s
   8715 
   8716    usqadd      d,s,h,b
   8717    usqadd      2d,4s,2s,8h,4h,16b,8b
   8718 
   8719    uzp1      2d,4s,2s,8h,4h,16b,8b
   8720    uzp2      2d,4s,2s,8h,4h,16b,8b
   8721 
   8722    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8723 
   8724    zip1      2d,4s,2s,8h,4h,16b,8b
   8725    zip2      2d,4s,2s,8h,4h,16b,8b
   8726 
   8727    ======================== MEM ========================
   8728 
   8729    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   8730    ld1  (single 1-element structure to one lane of 1 reg)
   8731    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   8732 
   8733    ld2  (multiple 2-element structures to 2 regs)
   8734    ld2  (single 2-element structure to one lane of 2 regs)
   8735    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   8736 
   8737    ld3  (multiple 3-element structures to 3 regs)
   8738    ld3  (single 3-element structure to one lane of 3 regs)
   8739    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   8740 
   8741    ld4  (multiple 4-element structures to 4 regs)
   8742    ld4  (single 4-element structure to one lane of 4 regs)
   8743    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   8744 
   8745    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   8746          addr = reg + uimm7 * reg_size
   8747 
   8748    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   8749          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8750 
   8751    ldr   q,d,s,h,b from addr
   8752          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8753 
   8754    ldr   q,d,s from  pc+#imm19
   8755 
   8756    ldr   q,d,s,h,b from addr
   8757          addr = [Xn|SP, R <extend> <shift]
   8758 
   8759    ldur  q,d,s,h,b from addr
   8760          addr = [Xn|SP,#imm] (unscaled offset)
   8761 
   8762    st1 (multiple 1-element structures from 1/2/3/4 regs)
   8763    st1 (single 1-element structure for 1 lane of 1 reg)
   8764 
   8765    st2 (multiple 2-element structures from 2 regs)
   8766    st2 (single 2-element structure from 1 lane of 2 regs)
   8767 
   8768    st3 (multiple 3-element structures from 3 regs)
   8769    st3 (single 3-element structure from 1 lane of 3 regs)
   8770 
   8771    st4 (multiple 4-element structures from 4 regs)
   8772    st4 (single 4-element structure from one lane of 4 regs)
   8773 
   8774    stnp q_q_addr, d_d_addr, s_s_addr
   8775         addr = [Xn|SP, #imm]
   8776 
   8777    stp  q_q_addr, d_d_addr, s_s_addr
   8778         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   8779 
   8780    str  q,d,s,h,b_addr
   8781         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   8782 
   8783    str   q,d,s,h,b_addr
   8784          addr = [Xn|SP, R <extend> <shift]
   8785 
   8786    stur  q,d,s,h,b_addr
   8787          addr = [Xn|SP,#imm] (unscaled offset)
   8788 
   8789    ======================== CRYPTO ========================
   8790 
   8791    aesd       16b (aes single round decryption)
   8792    aese       16b (aes single round encryption)
   8793    aesimc     16b (aes inverse mix columns)
   8794    aesmc      16b (aes mix columns)
   8795 
   8796    sha1c      q_s_4s
   8797    sha1h      s_s
   8798    sha1m      q_s_4s
   8799    sha1p      q_s_4s
   8800    sha1su0    4s_4s_4s
   8801    sha1su1    4s_4s
   8802 
   8803    sha256h2   q_q_4s
   8804    sha256h    q_q_4s
   8805    sha256su0  4s_4s
   8806    sha256su1  4s_4s_4s
   8807 */
   8808