Home | History | Annotate | Download | only in arm64
      1 
      2 #include <stdio.h>
      3 #include <assert.h>
      4 #include <malloc.h>  // memalign
      5 #include <string.h>  // memset
      6 #include "tests/malloc.h"
      7 #include <math.h>    // isnormal
      8 
      9 typedef  unsigned char           UChar;
     10 typedef  unsigned short int      UShort;
     11 typedef  unsigned int            UInt;
     12 typedef  signed int              Int;
     13 typedef  unsigned char           UChar;
     14 typedef  unsigned long long int  ULong;
     15 typedef  signed long long int    Long;
     16 typedef  double                  Double;
     17 typedef  float                   Float;
     18 
     19 typedef  unsigned char           Bool;
     20 #define False ((Bool)0)
     21 #define True  ((Bool)1)
     22 
     23 
     24 #define ITERS 1
     25 
     26 typedef
     27   enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
     28   LaneTy;
     29 
     30 union _V128 {
     31    UChar  u8[16];
     32    UShort u16[8];
     33    UInt   u32[4];
     34    ULong  u64[2];
     35    Float  f32[4];
     36    Double f64[2];
     37 };
     38 typedef  union _V128   V128;
     39 
     40 static inline UChar randUChar ( void )
     41 {
     42    static UInt seed = 80021;
     43    seed = 1103515245 * seed + 12345;
     44    return (seed >> 17) & 0xFF;
     45 }
     46 
     47 static ULong randULong ( LaneTy ty )
     48 {
     49    Int i;
     50    ULong r = 0;
     51    for (i = 0; i < 8; i++) {
     52       r = (r << 8) | (ULong)(0xFF & randUChar());
     53    }
     54    return r;
     55 }
     56 
     57 /* Generates a random V128.  Ensures that that it contains normalised
     58    FP numbers when viewed as either F32x4 or F64x2, so that it is
     59    reasonable to use in FP test cases. */
     60 static void randV128 ( /*OUT*/V128* v, LaneTy ty )
     61 {
     62    static UInt nCalls = 0, nIters = 0;
     63    Int i;
     64    nCalls++;
     65    while (1) {
     66       nIters++;
     67       for (i = 0; i < 16; i++) {
     68          v->u8[i] = randUChar();
     69       }
     70       if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
     71           && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
     72         break;
     73    }
     74    if (0 == (nCalls & 0xFF))
     75       printf("randV128: %u calls, %u iters\n", nCalls, nIters);
     76 }
     77 
     78 static void showV128 ( V128* v )
     79 {
     80    Int i;
     81    for (i = 15; i >= 0; i--)
     82       printf("%02x", (Int)v->u8[i]);
     83 }
     84 
     85 static void showBlock ( const char* msg, V128* block, Int nBlock )
     86 {
     87    Int i;
     88    printf("%s\n", msg);
     89    for (i = 0; i < nBlock; i++) {
     90       printf("  ");
     91       showV128(&block[i]);
     92       printf("\n");
     93    }
     94 }
     95 
     96 static ULong dup4x16 ( UInt x )
     97 {
     98    ULong r = x & 0xF;
     99    r |= (r << 4);
    100    r |= (r << 8);
    101    r |= (r << 16);
    102    r |= (r << 32);
    103    return r;
    104 }
    105 
    106 // Generate a random double-precision number.  About 1 time in 2,
    107 // instead return a special value (+/- Inf, +/-Nan, denorm).
    108 // This ensures that many of the groups of 4 calls here will
    109 // return a special value.
    110 
    111 static Double special_values[10];
    112 static Bool   special_values_initted = False;
    113 
    114 static __attribute__((noinline))
    115 Double negate ( Double d ) { return -d; }
    116 static __attribute__((noinline))
    117 Double divf64 ( Double x, Double y ) { return x/y; }
    118 
    119 static __attribute__((noinline))
    120 Double plusZero  ( void ) { return 0.0; }
    121 static __attribute__((noinline))
    122 Double minusZero ( void ) { return negate(plusZero()); }
    123 
    124 static __attribute__((noinline))
    125 Double plusOne  ( void ) { return 1.0; }
    126 static __attribute__((noinline))
    127 Double minusOne ( void ) { return negate(plusOne()); }
    128 
    129 static __attribute__((noinline))
    130 Double plusInf   ( void ) { return 1.0 / 0.0; }
    131 static __attribute__((noinline))
    132 Double minusInf  ( void ) { return negate(plusInf()); }
    133 
    134 static __attribute__((noinline))
    135 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
    136 static __attribute__((noinline))
    137 Double minusNaN ( void ) { return negate(plusNaN()); }
    138 
    139 static __attribute__((noinline))
    140 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
    141 static __attribute__((noinline))
    142 Double minusDenorm ( void ) { return negate(plusDenorm()); }
    143 
    144 
    145 static void ensure_special_values_initted ( void )
    146 {
    147    if (special_values_initted) return;
    148    special_values[0] = plusZero();
    149    special_values[1] = minusZero();
    150    special_values[2] = plusOne();
    151    special_values[3] = minusOne();
    152    special_values[4] = plusInf();
    153    special_values[5] = minusInf();
    154    special_values[6] = plusNaN();
    155    special_values[7] = minusNaN();
    156    special_values[8] = plusDenorm();
    157    special_values[9] = minusDenorm();
    158    special_values_initted = True;
    159    int i;
    160    printf("\n");
    161    for (i = 0; i < 10; i++) {
    162       printf("special value %d = %e\n", i, special_values[i]);
    163    }
    164    printf("\n");
    165 }
    166 
    167 static Double randDouble ( void )
    168 {
    169    ensure_special_values_initted();
    170    UChar c = randUChar();
    171    if (c >= 128) {
    172       // return a normal number most of the time.
    173       // 0 .. 2^63-1
    174       ULong u64 = randULong(TyDF);
    175       // -2^62 .. 2^62-1
    176       Long s64 = (Long)u64;
    177       // -2^55 .. 2^55-1
    178       s64 >>= (62-55);
    179       // and now as a float
    180       return (Double)s64;
    181    }
    182    c = randUChar() % 10;
    183    return special_values[c];
    184 }
    185 
    186 static Float randFloat ( void )
    187 {
    188    ensure_special_values_initted();
    189    UChar c = randUChar();
    190    if (c >= 128) {
    191       // return a normal number most of the time.
    192       // 0 .. 2^63-1
    193       ULong u64 = randULong(TyDF);
    194       // -2^62 .. 2^62-1
    195       Long s64 = (Long)u64;
    196       // -2^25 .. 2^25-1
    197       s64 >>= (62-25);
    198       // and now as a float
    199       return (Float)s64;
    200    }
    201    c = randUChar() % 10;
    202    return special_values[c];
    203 }
    204 
    205 void randBlock_Doubles ( V128* block, Int nBlock )
    206 {
    207    Int i;
    208    for (i = 0; i < nBlock; i++) {
    209       block[i].f64[0] = randDouble();
    210       block[i].f64[1] = randDouble();
    211    }
    212 }
    213 
    214 void randBlock_Floats ( V128* block, Int nBlock )
    215 {
    216    Int i;
    217    for (i = 0; i < nBlock; i++) {
    218       block[i].f32[0] = randFloat();
    219       block[i].f32[1] = randFloat();
    220       block[i].f32[2] = randFloat();
    221       block[i].f32[3] = randFloat();
    222    }
    223 }
    224 
    225 
    226 /* ---------------------------------------------------------------- */
    227 /* -- Parameterisable test macros                                -- */
    228 /* ---------------------------------------------------------------- */
    229 
    230 #define DO50(_action) \
    231    do { \
    232       Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
    233    } while (0)
    234 
    235 
    236 /* Note this also sets the destination register to a known value (0x55..55)
    237    since it can sometimes be an input to the instruction too. */
    238 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
    239   __attribute__((noinline)) \
    240   static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
    241      Int i; \
    242      for (i = 0; i < ITERS; i++) { \
    243         V128 block[2+1]; \
    244         memset(block, 0x55, sizeof(block)); \
    245         randV128(&block[0], ty); \
    246         randV128(&block[1], ty); \
    247         __asm__ __volatile__( \
    248            "mov   x30, #0 ; msr fpsr, x30 ; " \
    249            "ldr   q7, [%0, #0]   ; " \
    250            "ldr   q8, [%0, #16]   ; " \
    251            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
    252            "str   q8, [%0, #16] ; " \
    253            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    254            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    255         ); \
    256         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN); \
    257         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    258         showV128(&block[0]); printf("  "); \
    259         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    260      } \
    261   }
    262 
    263 
    264 /* Note this also sets the destination register to a known value (0x55..55)
    265    since it can sometimes be an input to the instruction too. */
    266 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM)  \
    267   __attribute__((noinline)) \
    268   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
    269      Int i; \
    270      for (i = 0; i < ITERS; i++) { \
    271         V128 block[3+1]; \
    272         memset(block, 0x55, sizeof(block)); \
    273         randV128(&block[0], ty); \
    274         randV128(&block[1], ty); \
    275         randV128(&block[2], ty); \
    276         __asm__ __volatile__( \
    277            "mov   x30, #0 ; msr fpsr, x30 ; " \
    278            "ldr   q7, [%0, #0]   ; " \
    279            "ldr   q8, [%0, #16]   ; " \
    280            "ldr   q9, [%0, #32]   ; " \
    281            #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
    282            "str   q9, [%0, #32] ; " \
    283            "mrs   x30, fpsr ; str x30, [%0, #48] " \
    284            : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
    285         ); \
    286         printf(#INSN   " v9." #SUFFIXD \
    287                ", v7." #SUFFIXN ", v8." #SUFFIXM "  ");   \
    288         UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
    289         showV128(&block[0]); printf("  "); \
    290         showV128(&block[1]); printf("  "); \
    291         showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
    292      } \
    293   }
    294 
    295 
    296 /* Note this also sets the destination register to a known value (0x55..55)
    297    since it can sometimes be an input to the instruction too. */
    298 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
    299   __attribute__((noinline)) \
    300   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
    301      Int i; \
    302      for (i = 0; i < ITERS; i++) { \
    303         V128 block[2+1]; \
    304         memset(block, 0x55, sizeof(block)); \
    305         randV128(&block[0], ty); \
    306         randV128(&block[1], ty); \
    307         __asm__ __volatile__( \
    308            "mov   x30, #0 ; msr fpsr, x30 ; " \
    309            "ldr   q7, [%0, #0]   ; " \
    310            "ldr   q8, [%0, #16]   ; " \
    311            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
    312            "str   q8, [%0, #16] ; " \
    313            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    314            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    315         ); \
    316         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT "  "); \
    317         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    318         showV128(&block[0]); printf("  "); \
    319         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    320      } \
    321   }
    322 
    323 
    324 /* Generate a test that involves one integer reg and one vector reg,
    325    with no bias as towards which is input or output. */
    326 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
    327   __attribute__((noinline)) \
    328   static void test_##TESTNAME ( LaneTy ty ) { \
    329      Int i; \
    330      assert(INTREGNO != 30); \
    331      for (i = 0; i < ITERS; i++) { \
    332         V128 block[4+1]; \
    333         memset(block, 0x55, sizeof(block)); \
    334         randV128(&block[0], ty); \
    335         randV128(&block[1], ty); \
    336         randV128(&block[2], ty); \
    337         randV128(&block[3], ty); \
    338         __asm__ __volatile__( \
    339            "mov   x30, #0 ; msr fpsr, x30 ; " \
    340            "ldr   q"#VECREGNO", [%0, #0]  ; " \
    341            "ldr   x"#INTREGNO", [%0, #16] ; " \
    342            INSN " ; " \
    343            "str   q"#VECREGNO", [%0, #32] ; " \
    344            "str   x"#INTREGNO", [%0, #48] ; " \
    345            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    346            : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
    347         ); \
    348         printf(INSN   "   "); \
    349         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    350         showV128(&block[0]); printf("  "); \
    351         showV128(&block[1]); printf("  "); \
    352         showV128(&block[2]); printf("  "); \
    353         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    354      } \
    355   }
    356 
    357 
    358 /* Generate a test that involves two vector regs,
    359    with no bias as towards which is input or output.
    360    It's OK to use x10 as scratch.*/
    361 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
    362   __attribute__((noinline)) \
    363   static void test_##TESTNAME ( LaneTy ty ) { \
    364      Int i; \
    365      for (i = 0; i < ITERS; i++) { \
    366         V128 block[4+1]; \
    367         memset(block, 0x55, sizeof(block)); \
    368         randV128(&block[0], ty); \
    369         randV128(&block[1], ty); \
    370         randV128(&block[2], ty); \
    371         randV128(&block[3], ty); \
    372         __asm__ __volatile__( \
    373            "mov   x30, #0 ; msr fpsr, x30 ; " \
    374            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    375            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    376            INSN " ; " \
    377            "str   q"#VECREG1NO", [%0, #32] ; " \
    378            "str   q"#VECREG2NO", [%0, #48] ; " \
    379            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    380            : : "r"(&block[0]) \
    381              : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
    382         ); \
    383         printf(INSN   "   "); \
    384         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    385         showV128(&block[0]); printf("  "); \
    386         showV128(&block[1]); printf("  "); \
    387         showV128(&block[2]); printf("  "); \
    388         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    389      } \
    390   }
    391 
    392 
    393 /* Generate a test that involves three vector regs,
    394    with no bias as towards which is input or output.  It's also OK
    395    to use v16, v17, v18 as scratch. */
    396 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO)  \
    397   __attribute__((noinline)) \
    398   static void test_##TESTNAME ( LaneTy ty ) { \
    399      Int i; \
    400      for (i = 0; i < ITERS; i++) { \
    401         V128 block[6+1]; \
    402         memset(block, 0x55, sizeof(block)); \
    403         randV128(&block[0], ty); \
    404         randV128(&block[1], ty); \
    405         randV128(&block[2], ty); \
    406         randV128(&block[3], ty); \
    407         randV128(&block[4], ty); \
    408         randV128(&block[5], ty); \
    409         __asm__ __volatile__( \
    410            "mov   x30, #0 ; msr fpsr, x30 ; " \
    411            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    412            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    413            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    414            INSN " ; " \
    415            "str   q"#VECREG1NO", [%0, #48] ; " \
    416            "str   q"#VECREG2NO", [%0, #64] ; " \
    417            "str   q"#VECREG3NO", [%0, #80] ; " \
    418            "mrs   x30, fpsr ; str x30, [%0, #96] " \
    419            : : "r"(&block[0]) \
    420            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
    421              "v16", "v17", "v18", "x30" \
    422         ); \
    423         printf(INSN   "   "); \
    424         UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
    425         showV128(&block[0]); printf("  "); \
    426         showV128(&block[1]); printf("  "); \
    427         showV128(&block[2]); printf("  "); \
    428         showV128(&block[3]); printf("  "); \
    429         showV128(&block[4]); printf("  "); \
    430         showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
    431      } \
    432   }
    433 
    434 
    435 /* Generate a test that involves four vector regs,
    436    with no bias as towards which is input or output.  It's also OK
    437    to use v16, v17, v18 as scratch. */
    438 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
    439                                        VECREG3NO,VECREG4NO)  \
    440   __attribute__((noinline)) \
    441   static void test_##TESTNAME ( LaneTy ty ) { \
    442      Int i; \
    443      for (i = 0; i < ITERS; i++) { \
    444         V128 block[8+1]; \
    445         memset(block, 0x55, sizeof(block)); \
    446         randV128(&block[0], ty); \
    447         randV128(&block[1], ty); \
    448         randV128(&block[2], ty); \
    449         randV128(&block[3], ty); \
    450         randV128(&block[4], ty); \
    451         randV128(&block[5], ty); \
    452         randV128(&block[6], ty); \
    453         randV128(&block[7], ty); \
    454         __asm__ __volatile__( \
    455            "mov   x30, #0 ; msr fpsr, x30 ; " \
    456            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    457            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    458            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    459            "ldr   q"#VECREG4NO", [%0, #48] ; " \
    460            INSN " ; " \
    461            "str   q"#VECREG1NO", [%0, #64] ; " \
    462            "str   q"#VECREG2NO", [%0, #80] ; " \
    463            "str   q"#VECREG3NO", [%0, #96] ; " \
    464            "str   q"#VECREG4NO", [%0, #112] ; " \
    465            "mrs   x30, fpsr ; str x30, [%0, #128] " \
    466            : : "r"(&block[0]) \
    467            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
    468                        "v"#VECREG3NO, "v"#VECREG4NO, \
    469              "v16", "v17", "v18", "x30" \
    470         ); \
    471         printf(INSN   "   "); \
    472         UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
    473         showV128(&block[0]); printf("  "); \
    474         showV128(&block[1]); printf("  "); \
    475         showV128(&block[2]); printf("  "); \
    476         showV128(&block[3]); printf("  "); \
    477         showV128(&block[4]); printf("  "); \
    478         showV128(&block[5]); printf("  "); \
    479         showV128(&block[6]); printf("  "); \
    480         showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
    481      } \
    482   }
    483 
    484 
    485 /* ---------------------------------------------------------------- */
    486 /* -- Test functions and non-parameterisable test macros         -- */
    487 /* ---------------------------------------------------------------- */
    488 
    489 void test_UMINV ( void )
    490 {
    491   int i;
    492   V128 block[2];
    493 
    494   /* -- 4s -- */
    495 
    496   for (i = 0; i < 10; i++) {
    497     memset(&block, 0x55, sizeof(block));
    498     randV128(&block[0], TyS);
    499     randV128(&block[1], TyS);
    500     __asm__ __volatile__(
    501        "ldr   q7, [%0, #0]   ; "
    502        "uminv s8, v7.4s   ; "
    503        "str   q8, [%0, #16] "
    504        : : "r"(&block[0]) : "memory", "v7", "v8"
    505                          );
    506     printf("UMINV v8, v7.4s  ");
    507     showV128(&block[0]); printf("  ");
    508     showV128(&block[1]); printf("\n");
    509   }
    510 
    511   /* -- 8h -- */
    512 
    513   for (i = 0; i < 10; i++) {
    514     memset(&block, 0x55, sizeof(block));
    515     randV128(&block[0], TyH);
    516     randV128(&block[1], TyH);
    517     __asm__ __volatile__(
    518        "ldr   q7, [%0, #0]   ; "
    519        "uminv h8, v7.8h   ; "
    520        "str   q8, [%0, #16] "
    521        : : "r"(&block[0]) : "memory", "v7", "v8"
    522                          );
    523     printf("UMINV h8, v7.8h  ");
    524     showV128(&block[0]); printf("  ");
    525     showV128(&block[1]); printf("\n");
    526   }
    527 
    528   /* -- 4h -- */
    529 
    530   for (i = 0; i < 10; i++) {
    531     memset(&block, 0x55, sizeof(block));
    532     randV128(&block[0], TyH);
    533     randV128(&block[1], TyH);
    534     __asm__ __volatile__(
    535        "ldr   q7, [%0, #0]   ; "
    536        "uminv h8, v7.4h   ; "
    537        "str   q8, [%0, #16] "
    538        : : "r"(&block[0]) : "memory", "v7", "v8"
    539                          );
    540     printf("UMINV h8, v7.4h  ");
    541     showV128(&block[0]); printf("  ");
    542     showV128(&block[1]); printf("\n");
    543   }
    544 
    545   /* -- 16b -- */
    546 
    547   for (i = 0; i < 10; i++) {
    548     memset(&block, 0x55, sizeof(block));
    549     randV128(&block[0], TyB);
    550     randV128(&block[1], TyB);
    551     __asm__ __volatile__(
    552        "ldr   q7, [%0, #0]   ; "
    553        "uminv b8, v7.16b   ; "
    554        "str   q8, [%0, #16] "
    555        : : "r"(&block[0]) : "memory", "v7", "v8"
    556                          );
    557     printf("UMINV b8, v7.16b  ");
    558     showV128(&block[0]); printf("  ");
    559     showV128(&block[1]); printf("\n");
    560   }
    561 
    562   /* -- 8b -- */
    563 
    564   for (i = 0; i < 10; i++) {
    565     memset(&block, 0x55, sizeof(block));
    566     randV128(&block[0], TyB);
    567     randV128(&block[1], TyB);
    568     __asm__ __volatile__(
    569        "ldr   q7, [%0, #0]   ; "
    570        "uminv b8, v7.8b   ; "
    571        "str   q8, [%0, #16] "
    572        : : "r"(&block[0]) : "memory", "v7", "v8"
    573                          );
    574     printf("UMINV b8, v7.8b  ");
    575     showV128(&block[0]); printf("  ");
    576     showV128(&block[1]); printf("\n");
    577   }
    578 
    579 }
    580 
    581 
    582 void test_UMAXV ( void )
    583 {
    584   int i;
    585   V128 block[2];
    586 
    587   /* -- 4s -- */
    588 
    589   for (i = 0; i < 10; i++) {
    590     memset(&block, 0x55, sizeof(block));
    591     randV128(&block[0], TyS);
    592     randV128(&block[1], TyS);
    593     __asm__ __volatile__(
    594        "ldr   q7, [%0, #0]   ; "
    595        "umaxv s8, v7.4s   ; "
    596        "str   q8, [%0, #16] "
    597        : : "r"(&block[0]) : "memory", "v7", "v8"
    598                          );
    599     printf("UMAXV v8, v7.4s  ");
    600     showV128(&block[0]); printf("  ");
    601     showV128(&block[1]); printf("\n");
    602   }
    603 
    604   /* -- 8h -- */
    605 
    606   for (i = 0; i < 10; i++) {
    607     memset(&block, 0x55, sizeof(block));
    608     randV128(&block[0], TyH);
    609     randV128(&block[1], TyH);
    610     __asm__ __volatile__(
    611        "ldr   q7, [%0, #0]   ; "
    612        "umaxv h8, v7.8h   ; "
    613        "str   q8, [%0, #16] "
    614        : : "r"(&block[0]) : "memory", "v7", "v8"
    615                          );
    616     printf("UMAXV h8, v7.8h  ");
    617     showV128(&block[0]); printf("  ");
    618     showV128(&block[1]); printf("\n");
    619   }
    620 
    621   /* -- 4h -- */
    622 
    623   for (i = 0; i < 10; i++) {
    624     memset(&block, 0x55, sizeof(block));
    625     randV128(&block[0], TyH);
    626     randV128(&block[1], TyH);
    627     __asm__ __volatile__(
    628        "ldr   q7, [%0, #0]   ; "
    629        "umaxv h8, v7.4h   ; "
    630        "str   q8, [%0, #16] "
    631        : : "r"(&block[0]) : "memory", "v7", "v8"
    632                          );
    633     printf("UMAXV h8, v7.4h  ");
    634     showV128(&block[0]); printf("  ");
    635     showV128(&block[1]); printf("\n");
    636   }
    637 
    638   /* -- 16b -- */
    639 
    640   for (i = 0; i < 10; i++) {
    641     memset(&block, 0x55, sizeof(block));
    642     randV128(&block[0], TyB);
    643     randV128(&block[1], TyB);
    644     __asm__ __volatile__(
    645        "ldr   q7, [%0, #0]   ; "
    646        "umaxv b8, v7.16b   ; "
    647        "str   q8, [%0, #16] "
    648        : : "r"(&block[0]) : "memory", "v7", "v8"
    649                          );
    650     printf("UMAXV b8, v7.16b  ");
    651     showV128(&block[0]); printf("  ");
    652     showV128(&block[1]); printf("\n");
    653   }
    654 
    655   /* -- 8b -- */
    656 
    657   for (i = 0; i < 10; i++) {
    658     memset(&block, 0x55, sizeof(block));
    659     randV128(&block[0], TyB);
    660     randV128(&block[1], TyB);
    661     __asm__ __volatile__(
    662        "ldr   q7, [%0, #0]   ; "
    663        "umaxv b8, v7.8b   ; "
    664        "str   q8, [%0, #16] "
    665        : : "r"(&block[0]) : "memory", "v7", "v8"
    666                          );
    667     printf("UMAXV b8, v7.8b  ");
    668     showV128(&block[0]); printf("  ");
    669     showV128(&block[1]); printf("\n");
    670   }
    671 
    672 }
    673 
    674 
    675 void test_INS_general ( void )
    676 {
    677   V128 block[3];
    678 
    679   /* -- D[0..1] -- */
    680 
    681   memset(&block, 0x55, sizeof(block));
    682   block[1].u64[0] = randULong(TyD);
    683   __asm__ __volatile__(
    684      "ldr q7, [%0, #0]   ; "
    685      "ldr x19, [%0, #16] ; "
    686      "ins v7.d[0], x19   ; "
    687      "str q7, [%0, #32] "
    688      : : "r"(&block[0]) : "memory", "x19", "v7"
    689   );
    690   printf("INS v7.u64[0],x19  ");
    691   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    692   showV128(&block[2]); printf("\n");
    693 
    694   memset(&block, 0x55, sizeof(block));
    695   block[1].u64[0] = randULong(TyD);
    696   __asm__ __volatile__(
    697      "ldr q7, [%0, #0]   ; "
    698      "ldr x19, [%0, #16] ; "
    699      "ins v7.d[1], x19   ; "
    700      "str q7, [%0, #32] "
    701      : : "r"(&block[0]) : "memory", "x19", "v7"
    702   );
    703   printf("INS v7.d[1],x19  ");
    704   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    705   showV128(&block[2]); printf("\n");
    706 
    707   /* -- S[0..3] -- */
    708 
    709   memset(&block, 0x55, sizeof(block));
    710   block[1].u64[0] = randULong(TyS);
    711   __asm__ __volatile__(
    712      "ldr q7, [%0, #0]   ; "
    713      "ldr x19, [%0, #16] ; "
    714      "ins v7.s[0], w19   ; "
    715      "str q7, [%0, #32] "
    716      : : "r"(&block[0]) : "memory", "x19", "v7"
    717   );
    718   printf("INS v7.s[0],x19  ");
    719   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    720   showV128(&block[2]); printf("\n");
    721 
    722   memset(&block, 0x55, sizeof(block));
    723   block[1].u64[0] = randULong(TyS);
    724   __asm__ __volatile__(
    725      "ldr q7, [%0, #0]   ; "
    726      "ldr x19, [%0, #16] ; "
    727      "ins v7.s[1], w19   ; "
    728      "str q7, [%0, #32] "
    729      : : "r"(&block[0]) : "memory", "x19", "v7"
    730   );
    731   printf("INS v7.s[1],x19  ");
    732   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    733   showV128(&block[2]); printf("\n");
    734 
    735   memset(&block, 0x55, sizeof(block));
    736   block[1].u64[0] = randULong(TyS);
    737   __asm__ __volatile__(
    738      "ldr q7, [%0, #0]   ; "
    739      "ldr x19, [%0, #16] ; "
    740      "ins v7.s[2], w19   ; "
    741      "str q7, [%0, #32] "
    742      : : "r"(&block[0]) : "memory", "x19", "v7"
    743   );
    744   printf("INS v7.s[2],x19  ");
    745   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    746   showV128(&block[2]); printf("\n");
    747 
    748   memset(&block, 0x55, sizeof(block));
    749   block[1].u64[0] = randULong(TyS);
    750   __asm__ __volatile__(
    751      "ldr q7, [%0, #0]   ; "
    752      "ldr x19, [%0, #16] ; "
    753      "ins v7.s[3], w19   ; "
    754      "str q7, [%0, #32] "
    755      : : "r"(&block[0]) : "memory", "x19", "v7"
    756   );
    757   printf("INS v7.s[3],x19  ");
    758   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    759   showV128(&block[2]); printf("\n");
    760 
    761   /* -- H[0..7] -- */
    762 
    763   memset(&block, 0x55, sizeof(block));
    764   block[1].u64[0] = randULong(TyH);
    765   __asm__ __volatile__(
    766      "ldr q7, [%0, #0]   ; "
    767      "ldr x19, [%0, #16] ; "
    768      "ins v7.h[0], w19   ; "
    769      "str q7, [%0, #32] "
    770      : : "r"(&block[0]) : "memory", "x19", "v7"
    771   );
    772   printf("INS v7.h[0],x19  ");
    773   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    774   showV128(&block[2]); printf("\n");
    775 
    776   memset(&block, 0x55, sizeof(block));
    777   block[1].u64[0] = randULong(TyH);
    778   __asm__ __volatile__(
    779      "ldr q7, [%0, #0]   ; "
    780      "ldr x19, [%0, #16] ; "
    781      "ins v7.h[1], w19   ; "
    782      "str q7, [%0, #32] "
    783      : : "r"(&block[0]) : "memory", "x19", "v7"
    784   );
    785   printf("INS v7.h[1],x19  ");
    786   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    787   showV128(&block[2]); printf("\n");
    788 
    789   memset(&block, 0x55, sizeof(block));
    790   block[1].u64[0] = randULong(TyH);
    791   __asm__ __volatile__(
    792      "ldr q7, [%0, #0]   ; "
    793      "ldr x19, [%0, #16] ; "
    794      "ins v7.h[2], w19   ; "
    795      "str q7, [%0, #32] "
    796      : : "r"(&block[0]) : "memory", "x19", "v7"
    797   );
    798   printf("INS v7.h[2],x19  ");
    799   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    800   showV128(&block[2]); printf("\n");
    801 
    802   memset(&block, 0x55, sizeof(block));
    803   block[1].u64[0] = randULong(TyH);
    804   __asm__ __volatile__(
    805      "ldr q7, [%0, #0]   ; "
    806      "ldr x19, [%0, #16] ; "
    807      "ins v7.h[3], w19   ; "
    808      "str q7, [%0, #32] "
    809      : : "r"(&block[0]) : "memory", "x19", "v7"
    810   );
    811   printf("INS v7.h[3],x19  ");
    812   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    813   showV128(&block[2]); printf("\n");
    814 
    815   memset(&block, 0x55, sizeof(block));
    816   block[1].u64[0] = randULong(TyH);
    817   __asm__ __volatile__(
    818      "ldr q7, [%0, #0]   ; "
    819      "ldr x19, [%0, #16] ; "
    820      "ins v7.h[4], w19   ; "
    821      "str q7, [%0, #32] "
    822      : : "r"(&block[0]) : "memory", "x19", "v7"
    823   );
    824   printf("INS v7.h[4],x19  ");
    825   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    826   showV128(&block[2]); printf("\n");
    827 
    828   memset(&block, 0x55, sizeof(block));
    829   block[1].u64[0] = randULong(TyH);
    830   __asm__ __volatile__(
    831      "ldr q7, [%0, #0]   ; "
    832      "ldr x19, [%0, #16] ; "
    833      "ins v7.h[5], w19   ; "
    834      "str q7, [%0, #32] "
    835      : : "r"(&block[0]) : "memory", "x19", "v7"
    836   );
    837   printf("INS v7.h[5],x19  ");
    838   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    839   showV128(&block[2]); printf("\n");
    840 
    841   memset(&block, 0x55, sizeof(block));
    842   block[1].u64[0] = randULong(TyH);
    843   __asm__ __volatile__(
    844      "ldr q7, [%0, #0]   ; "
    845      "ldr x19, [%0, #16] ; "
    846      "ins v7.h[6], w19   ; "
    847      "str q7, [%0, #32] "
    848      : : "r"(&block[0]) : "memory", "x19", "v7"
    849   );
    850   printf("INS v7.h[6],x19  ");
    851   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    852   showV128(&block[2]); printf("\n");
    853 
    854   memset(&block, 0x55, sizeof(block));
    855   block[1].u64[0] = randULong(TyH);
    856   __asm__ __volatile__(
    857      "ldr q7, [%0, #0]   ; "
    858      "ldr x19, [%0, #16] ; "
    859      "ins v7.h[7], w19   ; "
    860      "str q7, [%0, #32] "
    861      : : "r"(&block[0]) : "memory", "x19", "v7"
    862   );
    863   printf("INS v7.h[7],x19  ");
    864   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    865   showV128(&block[2]); printf("\n");
    866 
    867   /* -- B[0,15] -- */
    868 
    869   memset(&block, 0x55, sizeof(block));
    870   block[1].u64[0] = randULong(TyB);
    871   __asm__ __volatile__(
    872      "ldr q7, [%0, #0]   ; "
    873      "ldr x19, [%0, #16] ; "
    874      "ins v7.b[0], w19   ; "
    875      "str q7, [%0, #32] "
    876      : : "r"(&block[0]) : "memory", "x19", "v7"
    877   );
    878   printf("INS v7.b[0],x19  ");
    879   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    880   showV128(&block[2]); printf("\n");
    881 
    882   memset(&block, 0x55, sizeof(block));
    883   block[1].u64[0] = randULong(TyB);
    884   __asm__ __volatile__(
    885      "ldr q7, [%0, #0]   ; "
    886      "ldr x19, [%0, #16] ; "
    887      "ins v7.b[15], w19   ; "
    888      "str q7, [%0, #32] "
    889      : : "r"(&block[0]) : "memory", "x19", "v7"
    890   );
    891   printf("INS v7.b[15],x19 ");
    892   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    893   showV128(&block[2]); printf("\n");
    894 }
    895 
    896 
    897 
    898 void test_SMINV ( void )
    899 {
    900   int i;
    901   V128 block[2];
    902 
    903   /* -- 4s -- */
    904 
    905   for (i = 0; i < 10; i++) {
    906     memset(&block, 0x55, sizeof(block));
    907     randV128(&block[0], TyS);
    908     randV128(&block[1], TyS);
    909     __asm__ __volatile__(
    910        "ldr   q7, [%0, #0]   ; "
    911        "sminv s8, v7.4s   ; "
    912        "str   q8, [%0, #16] "
    913        : : "r"(&block[0]) : "memory", "v7", "v8"
    914                          );
    915     printf("SMINV v8, v7.4s  ");
    916     showV128(&block[0]); printf("  ");
    917     showV128(&block[1]); printf("\n");
    918   }
    919 
    920   /* -- 8h -- */
    921 
    922   for (i = 0; i < 10; i++) {
    923     memset(&block, 0x55, sizeof(block));
    924     randV128(&block[0], TyH);
    925     randV128(&block[1], TyH);
    926     __asm__ __volatile__(
    927        "ldr   q7, [%0, #0]   ; "
    928        "sminv h8, v7.8h   ; "
    929        "str   q8, [%0, #16] "
    930        : : "r"(&block[0]) : "memory", "v7", "v8"
    931                          );
    932     printf("SMINV h8, v7.8h  ");
    933     showV128(&block[0]); printf("  ");
    934     showV128(&block[1]); printf("\n");
    935   }
    936 
    937   /* -- 4h -- */
    938 
    939   for (i = 0; i < 10; i++) {
    940     memset(&block, 0x55, sizeof(block));
    941     randV128(&block[0], TyH);
    942     randV128(&block[1], TyH);
    943     __asm__ __volatile__(
    944        "ldr   q7, [%0, #0]   ; "
    945        "sminv h8, v7.4h   ; "
    946        "str   q8, [%0, #16] "
    947        : : "r"(&block[0]) : "memory", "v7", "v8"
    948                          );
    949     printf("SMINV h8, v7.4h  ");
    950     showV128(&block[0]); printf("  ");
    951     showV128(&block[1]); printf("\n");
    952   }
    953 
    954   /* -- 16b -- */
    955 
    956   for (i = 0; i < 10; i++) {
    957     memset(&block, 0x55, sizeof(block));
    958     randV128(&block[0], TyB);
    959     randV128(&block[1], TyB);
    960     __asm__ __volatile__(
    961        "ldr   q7, [%0, #0]   ; "
    962        "sminv b8, v7.16b   ; "
    963        "str   q8, [%0, #16] "
    964        : : "r"(&block[0]) : "memory", "v7", "v8"
    965                          );
    966     printf("SMINV b8, v7.16b  ");
    967     showV128(&block[0]); printf("  ");
    968     showV128(&block[1]); printf("\n");
    969   }
    970 
    971   /* -- 8b -- */
    972 
    973   for (i = 0; i < 10; i++) {
    974     memset(&block, 0x55, sizeof(block));
    975     randV128(&block[0], TyB);
    976     randV128(&block[1], TyB);
    977     __asm__ __volatile__(
    978        "ldr   q7, [%0, #0]   ; "
    979        "sminv b8, v7.8b   ; "
    980        "str   q8, [%0, #16] "
    981        : : "r"(&block[0]) : "memory", "v7", "v8"
    982                          );
    983     printf("SMINV b8, v7.8b  ");
    984     showV128(&block[0]); printf("  ");
    985     showV128(&block[1]); printf("\n");
    986   }
    987 
    988 }
    989 
    990 
    991 void test_SMAXV ( void )
    992 {
    993   int i;
    994   V128 block[2];
    995 
    996   /* -- 4s -- */
    997 
    998   for (i = 0; i < 10; i++) {
    999     memset(&block, 0x55, sizeof(block));
   1000     randV128(&block[0], TyS);
   1001     randV128(&block[1], TyS);
   1002     __asm__ __volatile__(
   1003        "ldr   q7, [%0, #0]   ; "
   1004        "smaxv s8, v7.4s   ; "
   1005        "str   q8, [%0, #16] "
   1006        : : "r"(&block[0]) : "memory", "v7", "v8"
   1007                          );
   1008     printf("SMAXV v8, v7.4s  ");
   1009     showV128(&block[0]); printf("  ");
   1010     showV128(&block[1]); printf("\n");
   1011   }
   1012 
   1013   /* -- 8h -- */
   1014 
   1015   for (i = 0; i < 10; i++) {
   1016     memset(&block, 0x55, sizeof(block));
   1017     randV128(&block[0], TyH);
   1018     randV128(&block[1], TyH);
   1019     __asm__ __volatile__(
   1020        "ldr   q7, [%0, #0]   ; "
   1021        "smaxv h8, v7.8h   ; "
   1022        "str   q8, [%0, #16] "
   1023        : : "r"(&block[0]) : "memory", "v7", "v8"
   1024                          );
   1025     printf("SMAXV h8, v7.8h  ");
   1026     showV128(&block[0]); printf("  ");
   1027     showV128(&block[1]); printf("\n");
   1028   }
   1029 
   1030   /* -- 4h -- */
   1031 
   1032   for (i = 0; i < 10; i++) {
   1033     memset(&block, 0x55, sizeof(block));
   1034     randV128(&block[0], TyH);
   1035     randV128(&block[1], TyH);
   1036     __asm__ __volatile__(
   1037        "ldr   q7, [%0, #0]   ; "
   1038        "smaxv h8, v7.4h   ; "
   1039        "str   q8, [%0, #16] "
   1040        : : "r"(&block[0]) : "memory", "v7", "v8"
   1041                          );
   1042     printf("SMAXV h8, v7.4h  ");
   1043     showV128(&block[0]); printf("  ");
   1044     showV128(&block[1]); printf("\n");
   1045   }
   1046 
   1047   /* -- 16b -- */
   1048 
   1049   for (i = 0; i < 10; i++) {
   1050     memset(&block, 0x55, sizeof(block));
   1051     randV128(&block[0], TyB);
   1052     randV128(&block[1], TyB);
   1053     __asm__ __volatile__(
   1054        "ldr   q7, [%0, #0]   ; "
   1055        "smaxv b8, v7.16b   ; "
   1056        "str   q8, [%0, #16] "
   1057        : : "r"(&block[0]) : "memory", "v7", "v8"
   1058                          );
   1059     printf("SMAXV b8, v7.16b  ");
   1060     showV128(&block[0]); printf("  ");
   1061     showV128(&block[1]); printf("\n");
   1062   }
   1063 
   1064   /* -- 8b -- */
   1065 
   1066   for (i = 0; i < 10; i++) {
   1067     memset(&block, 0x55, sizeof(block));
   1068     randV128(&block[0], TyB);
   1069     randV128(&block[1], TyB);
   1070     __asm__ __volatile__(
   1071        "ldr   q7, [%0, #0]   ; "
   1072        "smaxv b8, v7.8b   ; "
   1073        "str   q8, [%0, #16] "
   1074        : : "r"(&block[0]) : "memory", "v7", "v8"
   1075                          );
   1076     printf("SMAXV b8, v7.8b  ");
   1077     showV128(&block[0]); printf("  ");
   1078     showV128(&block[1]); printf("\n");
   1079   }
   1080 
   1081 }
   1082 
   1083 
   1084 //======== FCCMP_D ========//
   1085 
   1086 #define GEN_test_FCCMP_D_D_0xF_EQ \
   1087   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
   1088   { \
   1089      V128 block[4]; \
   1090      randBlock_Doubles(&block[0], 3); \
   1091      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1092      showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
   1093      __asm__ __volatile__( \
   1094         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1095         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1096         "fccmp d29, d11, #0xf, eq; " \
   1097         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1098         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1099         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1100      ); \
   1101      showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
   1102      printf("\n"); \
   1103   }
   1104 
   1105 #define GEN_test_FCCMP_D_D_0xF_NE \
   1106   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
   1107   { \
   1108      V128 block[4]; \
   1109      randBlock_Doubles(&block[0], 3); \
   1110      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1111      showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
   1112      __asm__ __volatile__( \
   1113         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1114         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1115         "fccmp d29, d11, #0xf, ne; " \
   1116         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1117         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1118         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1119      ); \
   1120      showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
   1121      printf("\n"); \
   1122   }
   1123 
   1124 #define GEN_test_FCCMP_D_D_0x0_EQ \
   1125   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
   1126   { \
   1127      V128 block[4]; \
   1128      randBlock_Doubles(&block[0], 3); \
   1129      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1130      showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
   1131      __asm__ __volatile__( \
   1132         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1133         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1134         "fccmp d29, d11, #0x0, eq; " \
   1135         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1136         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1137         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1138      ); \
   1139      showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
   1140      printf("\n"); \
   1141   }
   1142 
   1143 #define GEN_test_FCCMP_D_D_0x0_NE \
   1144   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
   1145   { \
   1146      V128 block[4]; \
   1147      randBlock_Doubles(&block[0], 3); \
   1148      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1149      showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
   1150      __asm__ __volatile__( \
   1151         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1152         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1153         "fccmp d29, d11, #0x0, ne; " \
   1154         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1155         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1156         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1157      ); \
   1158      showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
   1159      printf("\n"); \
   1160   }
   1161 
   1162 //======== FCCMP_S ========//
   1163 
   1164 #define GEN_test_FCCMP_S_S_0xF_EQ \
   1165   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
   1166   { \
   1167      V128 block[4]; \
   1168      randBlock_Floats(&block[0], 3); \
   1169      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1170      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1171      __asm__ __volatile__( \
   1172         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1173         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1174         "fccmp s29, s11, #0xf, eq; " \
   1175         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1176         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1177         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1178      ); \
   1179      showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
   1180      printf("\n"); \
   1181   }
   1182 
   1183 #define GEN_test_FCCMP_S_S_0xF_NE \
   1184   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
   1185   { \
   1186      V128 block[4]; \
   1187      randBlock_Floats(&block[0], 3); \
   1188      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1189      showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
   1190      __asm__ __volatile__( \
   1191         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1192         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1193         "fccmp s29, s11, #0xf, ne; " \
   1194         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1195         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1196         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1197      ); \
   1198      showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
   1199      printf("\n"); \
   1200   }
   1201 
   1202 #define GEN_test_FCCMP_S_S_0x0_EQ \
   1203   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
   1204   { \
   1205      V128 block[4]; \
   1206      randBlock_Floats(&block[0], 3); \
   1207      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1208      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1209      __asm__ __volatile__( \
   1210         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1211         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1212         "fccmp s29, s11, #0x0, eq; " \
   1213         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1214         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1215         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1216      ); \
   1217      showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
   1218      printf("\n"); \
   1219   }
   1220 
   1221 #define GEN_test_FCCMP_S_S_0x0_NE \
   1222   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
   1223   { \
   1224      V128 block[4]; \
   1225      randBlock_Floats(&block[0], 3); \
   1226      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1227      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1228      __asm__ __volatile__( \
   1229         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1230         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1231         "fccmp s29, s11, #0x0, ne; " \
   1232         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1233         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1234         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1235      ); \
   1236      showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
   1237      printf("\n"); \
   1238   }
   1239 
   1240 //======== FCCMPE_D ========//
   1241 
   1242 #define GEN_test_FCCMPE_D_D_0xF_EQ \
   1243   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
   1244   { \
   1245      V128 block[4]; \
   1246      randBlock_Doubles(&block[0], 3); \
   1247      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1248      showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
   1249      __asm__ __volatile__( \
   1250         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1251         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1252         "fccmpe d29, d11, #0xf, eq; " \
   1253         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1254         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1255         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1256      ); \
   1257      showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
   1258      printf("\n"); \
   1259   }
   1260 
   1261 #define GEN_test_FCCMPE_D_D_0xF_NE \
   1262   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
   1263   { \
   1264      V128 block[4]; \
   1265      randBlock_Doubles(&block[0], 3); \
   1266      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1267      showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
   1268      __asm__ __volatile__( \
   1269         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1270         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1271         "fccmpe d29, d11, #0xf, ne; " \
   1272         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1273         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1274         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1275      ); \
   1276      showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
   1277      printf("\n"); \
   1278   }
   1279 
   1280 #define GEN_test_FCCMPE_D_D_0x0_EQ \
   1281   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
   1282   { \
   1283      V128 block[4]; \
   1284      randBlock_Doubles(&block[0], 3); \
   1285      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1286      showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
   1287      __asm__ __volatile__( \
   1288         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1289         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1290         "fccmpe d29, d11, #0x0, eq; " \
   1291         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1292         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1293         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1294      ); \
   1295      showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
   1296      printf("\n"); \
   1297   }
   1298 
   1299 #define GEN_test_FCCMPE_D_D_0x0_NE \
   1300   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
   1301   { \
   1302      V128 block[4]; \
   1303      randBlock_Doubles(&block[0], 3); \
   1304      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1305      showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
   1306      __asm__ __volatile__( \
   1307         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1308         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1309         "fccmpe d29, d11, #0x0, ne; " \
   1310         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1311         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1312         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1313      ); \
   1314      showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
   1315      printf("\n"); \
   1316   }
   1317 
   1318 //======== FCCMPE_S ========//
   1319 
   1320 #define GEN_test_FCCMPE_S_S_0xF_EQ \
   1321   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
   1322   { \
   1323      V128 block[4]; \
   1324      randBlock_Floats(&block[0], 3); \
   1325      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1326      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1327      __asm__ __volatile__( \
   1328         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1329         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1330         "fccmpe s29, s11, #0xf, eq; " \
   1331         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1332         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1333         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1334      ); \
   1335      showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
   1336      printf("\n"); \
   1337   }
   1338 
   1339 #define GEN_test_FCCMPE_S_S_0xF_NE \
   1340   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
   1341   { \
   1342      V128 block[4]; \
   1343      randBlock_Floats(&block[0], 3); \
   1344      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1345      showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
   1346      __asm__ __volatile__( \
   1347         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1348         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1349         "fccmpe s29, s11, #0xf, ne; " \
   1350         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1351         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1352         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1353      ); \
   1354      showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
   1355      printf("\n"); \
   1356   }
   1357 
   1358 #define GEN_test_FCCMPE_S_S_0x0_EQ \
   1359   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
   1360   { \
   1361      V128 block[4]; \
   1362      randBlock_Floats(&block[0], 3); \
   1363      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1364      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1365      __asm__ __volatile__( \
   1366         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1367         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1368         "fccmpe s29, s11, #0x0, eq; " \
   1369         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1370         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1371         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1372      ); \
   1373      showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
   1374      printf("\n"); \
   1375   }
   1376 
   1377 #define GEN_test_FCCMPE_S_S_0x0_NE \
   1378   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
   1379   { \
   1380      V128 block[4]; \
   1381      randBlock_Floats(&block[0], 3); \
   1382      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1383      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1384      __asm__ __volatile__( \
   1385         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1386         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1387         "fccmpe s29, s11, #0x0, ne; " \
   1388         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1389         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1390         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1391      ); \
   1392      showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
   1393      printf("\n"); \
   1394   }
   1395 
   1396 //======== FCMEQ_D_D_D ========//
   1397 
   1398 #define GEN_test_FCMEQ_D_D_D \
   1399   __attribute__((noinline)) static void test_FCMEQ_D_D_D ( void ) \
   1400   { \
   1401      V128 block[4]; \
   1402      randBlock_Doubles(&block[0], 3); \
   1403      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1404      showBlock("FCMEQ_D_D_D before", &block[0], 4); \
   1405      __asm__ __volatile__( \
   1406         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1407         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1408         "fcmeq d29, d11, d9; " \
   1409         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1410         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1411         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1412      ); \
   1413      showBlock("FCMEQ_D_D_D after", &block[0], 4); \
   1414      printf("\n"); \
   1415   }
   1416 
   1417 //======== FCMEQ_S_S_S ========//
   1418 
   1419 #define GEN_test_FCMEQ_S_S_S \
   1420   __attribute__((noinline)) static void test_FCMEQ_S_S_S ( void ) \
   1421   { \
   1422      V128 block[4]; \
   1423      randBlock_Floats(&block[0], 3); \
   1424      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1425      showBlock("FCMEQ_S_S_S before", &block[0], 4); \
   1426      __asm__ __volatile__( \
   1427         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1428         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1429         "fcmeq s29, s11, s9; " \
   1430         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1431         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1432         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1433      ); \
   1434      showBlock("FCMEQ_S_S_S after", &block[0], 4); \
   1435      printf("\n"); \
   1436   }
   1437 
   1438 //======== FCMGE_D_D_D ========//
   1439 
   1440 #define GEN_test_FCMGE_D_D_D \
   1441   __attribute__((noinline)) static void test_FCMGE_D_D_D ( void ) \
   1442   { \
   1443      V128 block[4]; \
   1444      randBlock_Doubles(&block[0], 3); \
   1445      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1446      showBlock("FCMGE_D_D_D before", &block[0], 4); \
   1447      __asm__ __volatile__( \
   1448         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1449         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1450         "fcmge d29, d11, d9; " \
   1451         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1452         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1453         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1454      ); \
   1455      showBlock("FCMGE_D_D_D after", &block[0], 4); \
   1456      printf("\n"); \
   1457   }
   1458 
   1459 //======== FCMGE_S_S_S ========//
   1460 
   1461 #define GEN_test_FCMGE_S_S_S \
   1462   __attribute__((noinline)) static void test_FCMGE_S_S_S ( void ) \
   1463   { \
   1464      V128 block[4]; \
   1465      randBlock_Floats(&block[0], 3); \
   1466      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1467      showBlock("FCMGE_S_S_S before", &block[0], 4); \
   1468      __asm__ __volatile__( \
   1469         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1470         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1471         "fcmge s29, s11, s9; " \
   1472         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1473         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1474         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1475      ); \
   1476      showBlock("FCMGE_S_S_S after", &block[0], 4); \
   1477      printf("\n"); \
   1478   }
   1479 
   1480 //======== FCMGT_D_D_D ========//
   1481 
   1482 #define GEN_test_FCMGT_D_D_D \
   1483   __attribute__((noinline)) static void test_FCMGT_D_D_D ( void ) \
   1484   { \
   1485      V128 block[4]; \
   1486      randBlock_Doubles(&block[0], 3); \
   1487      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1488      showBlock("FCMGT_D_D_D before", &block[0], 4); \
   1489      __asm__ __volatile__( \
   1490         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1491         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1492         "fcmgt d29, d11, d9; " \
   1493         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1494         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1495         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1496      ); \
   1497      showBlock("FCMGT_D_D_D after", &block[0], 4); \
   1498      printf("\n"); \
   1499   }
   1500 
   1501 //======== FCMGT_S_S_S ========//
   1502 
   1503 #define GEN_test_FCMGT_S_S_S \
   1504   __attribute__((noinline)) static void test_FCMGT_S_S_S ( void ) \
   1505   { \
   1506      V128 block[4]; \
   1507      randBlock_Floats(&block[0], 3); \
   1508      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1509      showBlock("FCMGT_S_S_S before", &block[0], 4); \
   1510      __asm__ __volatile__( \
   1511         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1512         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1513         "fcmgt s29, s11, s9; " \
   1514         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1515         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1516         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1517      ); \
   1518      showBlock("FCMGT_S_S_S after", &block[0], 4); \
   1519      printf("\n"); \
   1520   }
   1521 
   1522 //======== FACGT_D_D_D ========//
   1523 
   1524 #define GEN_test_FACGT_D_D_D \
   1525   __attribute__((noinline)) static void test_FACGT_D_D_D ( void ) \
   1526   { \
   1527      V128 block[4]; \
   1528      randBlock_Doubles(&block[0], 3); \
   1529      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1530      showBlock("FACGT_D_D_D before", &block[0], 4); \
   1531      __asm__ __volatile__( \
   1532         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1533         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1534         "facgt d29, d11, d9; " \
   1535         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1536         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1537         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1538      ); \
   1539      showBlock("FACGT_D_D_D after", &block[0], 4); \
   1540      printf("\n"); \
   1541   }
   1542 
   1543 //======== FACGT_S_S_S ========//
   1544 
   1545 #define GEN_test_FACGT_S_S_S \
   1546   __attribute__((noinline)) static void test_FACGT_S_S_S ( void ) \
   1547   { \
   1548      V128 block[4]; \
   1549      randBlock_Floats(&block[0], 3); \
   1550      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1551      showBlock("FACGT_S_S_S before", &block[0], 4); \
   1552      __asm__ __volatile__( \
   1553         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1554         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1555         "facgt s29, s11, s9; " \
   1556         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1557         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1558         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1559      ); \
   1560      showBlock("FACGT_S_S_S after", &block[0], 4); \
   1561      printf("\n"); \
   1562   }
   1563 
   1564 //======== FACGE_D_D_D ========//
   1565 
   1566 #define GEN_test_FACGE_D_D_D \
   1567   __attribute__((noinline)) static void test_FACGE_D_D_D ( void ) \
   1568   { \
   1569      V128 block[4]; \
   1570      randBlock_Doubles(&block[0], 3); \
   1571      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1572      showBlock("FACGE_D_D_D before", &block[0], 4); \
   1573      __asm__ __volatile__( \
   1574         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1575         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1576         "facge d29, d11, d9; " \
   1577         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1578         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1579         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1580      ); \
   1581      showBlock("FACGE_D_D_D after", &block[0], 4); \
   1582      printf("\n"); \
   1583   }
   1584 
   1585 //======== FACGE_S_S_S ========//
   1586 
   1587 #define GEN_test_FACGE_S_S_S \
   1588   __attribute__((noinline)) static void test_FACGE_S_S_S ( void ) \
   1589   { \
   1590      V128 block[4]; \
   1591      randBlock_Floats(&block[0], 3); \
   1592      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1593      showBlock("FACGE_S_S_S before", &block[0], 4); \
   1594      __asm__ __volatile__( \
   1595         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1596         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1597         "facge s29, s11, s9; " \
   1598         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1599         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1600         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1601      ); \
   1602      showBlock("FACGE_S_S_S after", &block[0], 4); \
   1603      printf("\n"); \
   1604   }
   1605 
   1606 //======== FCMEQ_D_D_Z ========//
   1607 
   1608 #define GEN_test_FCMEQ_D_D_Z \
   1609   __attribute__((noinline)) static void test_FCMEQ_D_D_Z ( void ) \
   1610   { \
   1611      V128 block[4]; \
   1612      randBlock_Doubles(&block[0], 3); \
   1613      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1614      showBlock("FCMEQ_D_D_Z before", &block[0], 4); \
   1615      __asm__ __volatile__( \
   1616         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1617         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1618         "fcmeq d29, d11, #0; " \
   1619         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1620         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1621         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1622      ); \
   1623      showBlock("FCMEQ_D_D_Z after", &block[0], 4); \
   1624      printf("\n"); \
   1625   }
   1626 
   1627 //======== FCMEQ_S_S_Z ========//
   1628 
   1629 #define GEN_test_FCMEQ_S_S_Z \
   1630   __attribute__((noinline)) static void test_FCMEQ_S_S_Z ( void ) \
   1631   { \
   1632      V128 block[4]; \
   1633      randBlock_Floats(&block[0], 3); \
   1634      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1635      showBlock("FCMEQ_S_S_Z before", &block[0], 4); \
   1636      __asm__ __volatile__( \
   1637         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1638         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1639         "fcmeq s29, s11, #0; " \
   1640         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1641         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1642         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1643      ); \
   1644      showBlock("FCMEQ_S_S_Z after", &block[0], 4); \
   1645      printf("\n"); \
   1646   }
   1647 
   1648 //======== FCMGE_D_D_Z ========//
   1649 
   1650 #define GEN_test_FCMGE_D_D_Z \
   1651   __attribute__((noinline)) static void test_FCMGE_D_D_Z ( void ) \
   1652   { \
   1653      V128 block[4]; \
   1654      randBlock_Doubles(&block[0], 3); \
   1655      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1656      showBlock("FCMGE_D_D_Z before", &block[0], 4); \
   1657      __asm__ __volatile__( \
   1658         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1659         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1660         "fcmge d29, d11, #0; " \
   1661         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1662         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1663         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1664      ); \
   1665      showBlock("FCMGE_D_D_Z after", &block[0], 4); \
   1666      printf("\n"); \
   1667   }
   1668 
   1669 //======== FCMGE_S_S_Z ========//
   1670 
   1671 #define GEN_test_FCMGE_S_S_Z \
   1672   __attribute__((noinline)) static void test_FCMGE_S_S_Z ( void ) \
   1673   { \
   1674      V128 block[4]; \
   1675      randBlock_Floats(&block[0], 3); \
   1676      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1677      showBlock("FCMGE_S_S_Z before", &block[0], 4); \
   1678      __asm__ __volatile__( \
   1679         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1680         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1681         "fcmge s29, s11, #0; " \
   1682         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1683         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1684         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1685      ); \
   1686      showBlock("FCMGE_S_S_Z after", &block[0], 4); \
   1687      printf("\n"); \
   1688   }
   1689 
   1690 //======== FCMGT_D_D_Z ========//
   1691 
   1692 #define GEN_test_FCMGT_D_D_Z \
   1693   __attribute__((noinline)) static void test_FCMGT_D_D_Z ( void ) \
   1694   { \
   1695      V128 block[4]; \
   1696      randBlock_Doubles(&block[0], 3); \
   1697      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1698      showBlock("FCMGT_D_D_Z before", &block[0], 4); \
   1699      __asm__ __volatile__( \
   1700         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1701         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1702         "fcmgt d29, d11, #0; " \
   1703         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1704         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1705         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1706      ); \
   1707      showBlock("FCMGT_D_D_Z after", &block[0], 4); \
   1708      printf("\n"); \
   1709   }
   1710 
   1711 //======== FCMGT_S_S_Z ========//
   1712 
   1713 #define GEN_test_FCMGT_S_S_Z \
   1714   __attribute__((noinline)) static void test_FCMGT_S_S_Z ( void ) \
   1715   { \
   1716      V128 block[4]; \
   1717      randBlock_Floats(&block[0], 3); \
   1718      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1719      showBlock("FCMGT_S_S_Z before", &block[0], 4); \
   1720      __asm__ __volatile__( \
   1721         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1722         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1723         "fcmgt s29, s11, #0; " \
   1724         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1725         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1726         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1727      ); \
   1728      showBlock("FCMGT_S_S_Z after", &block[0], 4); \
   1729      printf("\n"); \
   1730   }
   1731 
   1732 //======== FCMLE_D_D_Z ========//
   1733 
   1734 #define GEN_test_FCMLE_D_D_Z \
   1735   __attribute__((noinline)) static void test_FCMLE_D_D_Z ( void ) \
   1736   { \
   1737      V128 block[4]; \
   1738      randBlock_Doubles(&block[0], 3); \
   1739      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1740      showBlock("FCMLE_D_D_Z before", &block[0], 4); \
   1741      __asm__ __volatile__( \
   1742         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1743         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1744         "fcmle d29, d11, #0; " \
   1745         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1746         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1747         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1748      ); \
   1749      showBlock("FCMLE_D_D_Z after", &block[0], 4); \
   1750      printf("\n"); \
   1751   }
   1752 
   1753 //======== FCMLE_S_S_Z ========//
   1754 
   1755 #define GEN_test_FCMLE_S_S_Z \
   1756   __attribute__((noinline)) static void test_FCMLE_S_S_Z ( void ) \
   1757   { \
   1758      V128 block[4]; \
   1759      randBlock_Floats(&block[0], 3); \
   1760      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1761      showBlock("FCMLE_S_S_Z before", &block[0], 4); \
   1762      __asm__ __volatile__( \
   1763         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1764         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1765         "fcmle s29, s11, #0; " \
   1766         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1767         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1768         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1769      ); \
   1770      showBlock("FCMLE_S_S_Z after", &block[0], 4); \
   1771      printf("\n"); \
   1772   }
   1773 
   1774 //======== FCMLT_D_D_Z ========//
   1775 
   1776 #define GEN_test_FCMLT_D_D_Z \
   1777   __attribute__((noinline)) static void test_FCMLT_D_D_Z ( void ) \
   1778   { \
   1779      V128 block[4]; \
   1780      randBlock_Doubles(&block[0], 3); \
   1781      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1782      showBlock("FCMLT_D_D_Z before", &block[0], 4); \
   1783      __asm__ __volatile__( \
   1784         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1785         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1786         "fcmlt d29, d11, #0; " \
   1787         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1788         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1789         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1790      ); \
   1791      showBlock("FCMLT_D_D_Z after", &block[0], 4); \
   1792      printf("\n"); \
   1793   }
   1794 
   1795 //======== FCMLT_S_S_Z ========//
   1796 
   1797 #define GEN_test_FCMLT_S_S_Z \
   1798   __attribute__((noinline)) static void test_FCMLT_S_S_Z ( void ) \
   1799   { \
   1800      V128 block[4]; \
   1801      randBlock_Floats(&block[0], 3); \
   1802      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1803      showBlock("FCMLT_S_S_Z before", &block[0], 4); \
   1804      __asm__ __volatile__( \
   1805         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1806         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1807         "fcmlt s29, s11, #0; " \
   1808         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1809         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1810         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1811      ); \
   1812      showBlock("FCMLT_S_S_Z after", &block[0], 4); \
   1813      printf("\n"); \
   1814   }
   1815 
   1816 //======== FCMP_D_D ========//
   1817 
   1818 #define GEN_test_FCMP_D_D \
   1819   __attribute__((noinline)) static void test_FCMP_D_D ( void ) \
   1820   { \
   1821      V128 block[4]; \
   1822      randBlock_Doubles(&block[0], 3); \
   1823      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1824      showBlock("FCMP_D_D before", &block[0], 4); \
   1825      __asm__ __volatile__( \
   1826         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1827         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1828         "fcmp d29, d11; " \
   1829         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1830         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1831         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1832      ); \
   1833      showBlock("FCMP_D_D after", &block[0], 4); \
   1834      printf("\n"); \
   1835   }
   1836 
   1837 //======== FCMP_S_S ========//
   1838 
   1839 #define GEN_test_FCMP_S_S \
   1840   __attribute__((noinline)) static void test_FCMP_S_S ( void ) \
   1841   { \
   1842      V128 block[4]; \
   1843      randBlock_Floats(&block[0], 3); \
   1844      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1845      showBlock("FCMP_S_S before", &block[0], 4); \
   1846      __asm__ __volatile__( \
   1847         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1848         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1849         "fcmp s29, s11; " \
   1850         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1851         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1852         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1853      ); \
   1854      showBlock("FCMP_S_S after", &block[0], 4); \
   1855      printf("\n"); \
   1856   }
   1857 
   1858 //======== FCMPE_D_D ========//
   1859 
   1860 #define GEN_test_FCMPE_D_D \
   1861   __attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
   1862   { \
   1863      V128 block[4]; \
   1864      randBlock_Doubles(&block[0], 3); \
   1865      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1866      showBlock("FCMPE_D_D before", &block[0], 4); \
   1867      __asm__ __volatile__( \
   1868         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1869         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1870         "fcmpe d29, d11; " \
   1871         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1872         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1873         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1874      ); \
   1875      showBlock("FCMPE_D_D after", &block[0], 4); \
   1876      printf("\n"); \
   1877   }
   1878 
   1879 //======== FCMPE_S_S ========//
   1880 
   1881 #define GEN_test_FCMPE_S_S \
   1882   __attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
   1883   { \
   1884      V128 block[4]; \
   1885      randBlock_Floats(&block[0], 3); \
   1886      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1887      showBlock("FCMPE_S_S before", &block[0], 4); \
   1888      __asm__ __volatile__( \
   1889         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1890         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1891         "fcmpe s29, s11; " \
   1892         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1893         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1894         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1895      ); \
   1896      showBlock("FCMPE_S_S after", &block[0], 4); \
   1897      printf("\n"); \
   1898   }
   1899 
   1900 //======== FCMP_D_Z ========//
   1901 
   1902 #define GEN_test_FCMP_D_Z \
   1903   __attribute__((noinline)) static void test_FCMP_D_Z ( void ) \
   1904   { \
   1905      V128 block[4]; \
   1906      randBlock_Doubles(&block[0], 3); \
   1907      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1908      showBlock("FCMP_D_Z before", &block[0], 4); \
   1909      __asm__ __volatile__( \
   1910         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1911         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1912         "fcmp d29, #0; " \
   1913         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1914         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1915         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1916      ); \
   1917      showBlock("FCMP_D_Z after", &block[0], 4); \
   1918      printf("\n"); \
   1919   }
   1920 
   1921 //======== FCMP_S_Z ========//
   1922 
   1923 #define GEN_test_FCMP_S_Z \
   1924   __attribute__((noinline)) static void test_FCMP_S_Z ( void ) \
   1925   { \
   1926      V128 block[4]; \
   1927      randBlock_Floats(&block[0], 3); \
   1928      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1929      showBlock("FCMP_S_Z before", &block[0], 4); \
   1930      __asm__ __volatile__( \
   1931         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1932         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1933         "fcmp s29, #0; " \
   1934         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1935         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1936         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1937      ); \
   1938      showBlock("FCMP_S_Z after", &block[0], 4); \
   1939      printf("\n"); \
   1940   }
   1941 
   1942 //======== FCMPE_D_Z ========//
   1943 
   1944 #define GEN_test_FCMPE_D_Z \
   1945   __attribute__((noinline)) static void test_FCMPE_D_Z ( void ) \
   1946   { \
   1947      V128 block[4]; \
   1948      randBlock_Doubles(&block[0], 3); \
   1949      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1950      showBlock("FCMPE_D_Z before", &block[0], 4); \
   1951      __asm__ __volatile__( \
   1952         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1953         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1954         "fcmpe d29, #0; " \
   1955         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1956         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1957         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1958      ); \
   1959      showBlock("FCMPE_D_Z after", &block[0], 4); \
   1960      printf("\n"); \
   1961   }
   1962 
   1963 //======== FCMPE_S_Z ========//
   1964 
   1965 #define GEN_test_FCMPE_S_Z \
   1966   __attribute__((noinline)) static void test_FCMPE_S_Z ( void ) \
   1967   { \
   1968      V128 block[4]; \
   1969      randBlock_Floats(&block[0], 3); \
   1970      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1971      showBlock("FCMPE_S_Z before", &block[0], 4); \
   1972      __asm__ __volatile__( \
   1973         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1974         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1975         "fcmpe s29, #0; " \
   1976         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1977         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1978         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1979      ); \
   1980      showBlock("FCMPE_S_Z after", &block[0], 4); \
   1981      printf("\n"); \
   1982   }
   1983 
   1984 //======== FCSEL_D_D_D_EQ ========//
   1985 
   1986 #define GEN_test_FCSEL_D_D_D_EQ \
   1987   __attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
   1988   { \
   1989      V128 block[4]; \
   1990      randBlock_Doubles(&block[0], 3); \
   1991      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1992      showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
   1993      __asm__ __volatile__( \
   1994         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1995         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1996         "fcsel d29, d11, d9, eq; " \
   1997         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1998         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1999         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2000      ); \
   2001      showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
   2002      printf("\n"); \
   2003   }
   2004 
   2005 //======== FCSEL_D_D_D_NE ========//
   2006 
   2007 #define GEN_test_FCSEL_D_D_D_NE \
   2008   __attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
   2009   { \
   2010      V128 block[4]; \
   2011      randBlock_Doubles(&block[0], 3); \
   2012      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2013      showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
   2014      __asm__ __volatile__( \
   2015         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2016         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2017         "fcsel d29, d11, d9, ne; " \
   2018         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2019         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2020         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2021      ); \
   2022      showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
   2023      printf("\n"); \
   2024   }
   2025 
   2026 //======== FCSEL_S_S_S_EQ ========//
   2027 
   2028 #define GEN_test_FCSEL_S_S_S_EQ \
   2029   __attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
   2030   { \
   2031      V128 block[4]; \
   2032      randBlock_Doubles(&block[0], 3); \
   2033      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2034      showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
   2035      __asm__ __volatile__( \
   2036         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2037         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2038         "fcsel s29, s11, s9, eq; " \
   2039         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2040         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2041         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2042      ); \
   2043      showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
   2044      printf("\n"); \
   2045   }
   2046 
   2047 //======== FCSEL_S_S_S_NE ========//
   2048 
   2049 #define GEN_test_FCSEL_S_S_S_NE \
   2050   __attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
   2051   { \
   2052      V128 block[4]; \
   2053      randBlock_Doubles(&block[0], 3); \
   2054      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2055      showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
   2056      __asm__ __volatile__( \
   2057         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2058         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2059         "fcsel s29, s11, s9, ne; " \
   2060         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2061         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2062         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2063      ); \
   2064      showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
   2065      printf("\n"); \
   2066   }
   2067 
   2068 
   2069 /* ---------------------------------------------------------------- */
   2070 /* -- Tests, in the same order that they appear in main()        -- */
   2071 /* ---------------------------------------------------------------- */
   2072 
   2073 // ======================== FP ========================
   2074 
   2075 GEN_TWOVEC_TEST(fabs_d_d,   "fabs d22,    d23",    22, 23)
   2076 GEN_TWOVEC_TEST(fabs_s_s,   "fabs s22,    s23",    22, 23)
   2077 GEN_TWOVEC_TEST(fabs_2d_2d, "fabs v22.2d, v23.2d", 22, 23)
   2078 GEN_TWOVEC_TEST(fabs_4s_4s, "fabs v22.4s, v23.4s", 22, 23)
   2079 GEN_TWOVEC_TEST(fabs_2s_2s, "fabs v22.2s, v23.2s", 22, 23)
   2080 
   2081 GEN_TWOVEC_TEST(fneg_d_d,   "fneg d22, d23",       22, 23)
   2082 GEN_TWOVEC_TEST(fneg_s_s,   "fneg s22, s23",       22, 23)
   2083 GEN_TWOVEC_TEST(fneg_2d_2d, "fneg v22.2d, v23.2d", 22, 23)
   2084 GEN_TWOVEC_TEST(fneg_4s_4s, "fneg v22.4s, v23.4s", 22, 23)
   2085 GEN_TWOVEC_TEST(fneg_2s_2s, "fneg v22.2s, v23.2s", 22, 23)
   2086 
   2087 GEN_TWOVEC_TEST(fsqrt_d_d,   "fsqrt d22, d23",       22, 23)
   2088 GEN_TWOVEC_TEST(fsqrt_s_s,   "fsqrt s22, s23",       22, 23)
   2089 GEN_TWOVEC_TEST(fsqrt_2d_2d, "fsqrt v22.2d, v23.2d", 22, 23)
   2090 GEN_TWOVEC_TEST(fsqrt_4s_4s, "fsqrt v22.4s, v23.4s", 22, 23)
   2091 GEN_TWOVEC_TEST(fsqrt_2s_2s, "fsqrt v22.2s, v23.2s", 22, 23)
   2092 
   2093 GEN_THREEVEC_TEST(fadd_d_d_d,  "fadd d2, d11, d29", 2, 11, 29)
   2094 GEN_THREEVEC_TEST(fadd_s_s_s,  "fadd s2, s11, s29", 2, 11, 29)
   2095 GEN_THREEVEC_TEST(fsub_d_d_d,  "fsub d2, d11, d29", 2, 11, 29)
   2096 GEN_THREEVEC_TEST(fsub_s_s_s,  "fsub s2, s11, s29", 2, 11, 29)
   2097 
   2098 GEN_BINARY_TEST(fadd, 2d, 2d, 2d)
   2099 GEN_BINARY_TEST(fadd, 4s, 4s, 4s)
   2100 GEN_BINARY_TEST(fadd, 2s, 2s, 2s)
   2101 GEN_BINARY_TEST(fsub, 2d, 2d, 2d)
   2102 GEN_BINARY_TEST(fsub, 4s, 4s, 4s)
   2103 GEN_BINARY_TEST(fsub, 2s, 2s, 2s)
   2104 
   2105 GEN_THREEVEC_TEST(fabd_d_d_d,  "fabd d2, d11, d29", 2, 11, 29)
   2106 GEN_THREEVEC_TEST(fabd_s_s_s,  "fabd s2, s11, s29", 2, 11, 29)
   2107 GEN_BINARY_TEST(fabd, 2d, 2d, 2d)
   2108 GEN_BINARY_TEST(fabd, 4s, 4s, 4s)
   2109 GEN_BINARY_TEST(fabd, 2s, 2s, 2s)
   2110 
   2111 GEN_TWOVEC_TEST(faddp_d_2d,     "faddp d2, v23.2d",    2, 23)
   2112 GEN_TWOVEC_TEST(faddp_s_2s,     "faddp s2, v23.2s",    2, 23)
   2113 GEN_THREEVEC_TEST(faddp_2d_2d_2d, "faddp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2114 GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2115 GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2116 
   2117 GEN_test_FCCMP_D_D_0xF_EQ
   2118 GEN_test_FCCMP_D_D_0xF_NE
   2119 GEN_test_FCCMP_D_D_0x0_EQ
   2120 GEN_test_FCCMP_D_D_0x0_NE
   2121 GEN_test_FCCMP_S_S_0xF_EQ
   2122 GEN_test_FCCMP_S_S_0xF_NE
   2123 GEN_test_FCCMP_S_S_0x0_EQ
   2124 GEN_test_FCCMP_S_S_0x0_NE
   2125 GEN_test_FCCMPE_D_D_0xF_EQ
   2126 GEN_test_FCCMPE_D_D_0xF_NE
   2127 GEN_test_FCCMPE_D_D_0x0_EQ
   2128 GEN_test_FCCMPE_D_D_0x0_NE
   2129 GEN_test_FCCMPE_S_S_0xF_EQ
   2130 GEN_test_FCCMPE_S_S_0xF_NE
   2131 GEN_test_FCCMPE_S_S_0x0_EQ
   2132 GEN_test_FCCMPE_S_S_0x0_NE
   2133 
   2134 GEN_test_FCMEQ_D_D_D
   2135 GEN_test_FCMEQ_S_S_S
   2136 GEN_test_FCMGE_D_D_D
   2137 GEN_test_FCMGE_S_S_S
   2138 GEN_test_FCMGT_D_D_D
   2139 GEN_test_FCMGT_S_S_S
   2140 GEN_test_FACGT_D_D_D
   2141 GEN_test_FACGT_S_S_S
   2142 GEN_test_FACGE_D_D_D
   2143 GEN_test_FACGE_S_S_S
   2144 
   2145 GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2146 GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2147 GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2148 GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2149 GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2150 GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2151 GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2152 GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2153 GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2154 GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2155 GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2156 GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2157 GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2158 GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2159 GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2160 
   2161 GEN_test_FCMEQ_D_D_Z
   2162 GEN_test_FCMEQ_S_S_Z
   2163 GEN_test_FCMGE_D_D_Z
   2164 GEN_test_FCMGE_S_S_Z
   2165 GEN_test_FCMGT_D_D_Z
   2166 GEN_test_FCMGT_S_S_Z
   2167 GEN_test_FCMLE_D_D_Z
   2168 GEN_test_FCMLE_S_S_Z
   2169 GEN_test_FCMLT_D_D_Z
   2170 GEN_test_FCMLT_S_S_Z
   2171 
   2172 GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
   2173 GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
   2174 GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
   2175 GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
   2176 GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
   2177 GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
   2178 GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
   2179 GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
   2180 GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
   2181 GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
   2182 GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
   2183 GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
   2184 GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
   2185 GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
   2186 GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
   2187 
   2188 GEN_test_FCMP_D_Z
   2189 GEN_test_FCMP_S_Z
   2190 GEN_test_FCMPE_D_Z
   2191 GEN_test_FCMPE_S_Z
   2192 GEN_test_FCMP_D_D
   2193 GEN_test_FCMP_S_S
   2194 GEN_test_FCMPE_D_D
   2195 GEN_test_FCMPE_S_S
   2196 
   2197 GEN_test_FCSEL_D_D_D_EQ
   2198 GEN_test_FCSEL_D_D_D_NE
   2199 GEN_test_FCSEL_S_S_S_EQ
   2200 GEN_test_FCSEL_S_S_S_NE
   2201 
   2202 GEN_THREEVEC_TEST(fdiv_d_d_d,  "fdiv d2, d11, d29", 2, 11, 29)
   2203 GEN_THREEVEC_TEST(fdiv_s_s_s,  "fdiv s2, s11, s29", 2, 11, 29)
   2204 GEN_BINARY_TEST(fdiv, 2d, 2d, 2d)
   2205 GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
   2206 GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
   2207 
   2208 GEN_FOURVEC_TEST(fmadd_d_d_d_d,  "fmadd  d2, d11, d29, d3", 2, 11, 29, 3)
   2209 GEN_FOURVEC_TEST(fmadd_s_s_s_s,  "fmadd  s2, s11, s29, s3", 2, 11, 29, 3)
   2210 GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
   2211 GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
   2212 GEN_FOURVEC_TEST(fmsub_d_d_d_d,  "fmsub  d2, d11, d29, d3", 2, 11, 29, 3)
   2213 GEN_FOURVEC_TEST(fmsub_s_s_s_s,  "fmsub  s2, s11, s29, s3", 2, 11, 29, 3)
   2214 GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
   2215 GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
   2216 
   2217 GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
   2218 GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
   2219 
   2220 GEN_THREEVEC_TEST(fmax_d_d_d,  "fmax d2, d11, d29", 2, 11, 29)
   2221 GEN_THREEVEC_TEST(fmax_s_s_s,  "fmax s2, s11, s29", 2, 11, 29)
   2222 GEN_THREEVEC_TEST(fmin_d_d_d,  "fmin d2, d11, d29", 2, 11, 29)
   2223 GEN_THREEVEC_TEST(fmin_s_s_s,  "fmin s2, s11, s29", 2, 11, 29)
   2224 GEN_THREEVEC_TEST(fmaxnm_d_d_d,  "fmaxnm d2, d11, d29", 2, 11, 29)
   2225 GEN_THREEVEC_TEST(fmaxnm_s_s_s,  "fmaxnm s2, s11, s29", 2, 11, 29)
   2226 GEN_THREEVEC_TEST(fminnm_d_d_d,  "fminnm d2, d11, d29", 2, 11, 29)
   2227 GEN_THREEVEC_TEST(fminnm_s_s_s,  "fminnm s2, s11, s29", 2, 11, 29)
   2228 
   2229 GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2230 GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2231 GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2232 GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2233 GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2234 GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2235 GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2236 GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2237 GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2238 GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2239 GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2240 GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2241 
   2242 GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
   2243 GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
   2244 GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
   2245 GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
   2246 
   2247 GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2248 GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2249 GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2250 GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2251 GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2252 GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2253 
   2254 GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
   2255 GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
   2256 
   2257 GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
   2258 GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
   2259 GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
   2260 GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
   2261 
   2262 GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2263 GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2264 GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2265 GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2266 GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2267 GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2268 
   2269 GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
   2270 GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
   2271 
   2272 GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2273 GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2274 GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2275 GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2276 GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2277 GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2278 
   2279 GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
   2280 GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
   2281 GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
   2282 GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
   2283 GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
   2284 GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
   2285 GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
   2286 GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
   2287 
   2288 GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2289 GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2290 GEN_THREEVEC_TEST(fmla_4s_4s_s0, "fmla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2291 GEN_THREEVEC_TEST(fmla_4s_4s_s3, "fmla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2292 GEN_THREEVEC_TEST(fmla_2s_2s_s0, "fmla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2293 GEN_THREEVEC_TEST(fmla_2s_2s_s3, "fmla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2294 
   2295 GEN_THREEVEC_TEST(fmls_2d_2d_d0, "fmls v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2296 GEN_THREEVEC_TEST(fmls_2d_2d_d1, "fmls v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2297 GEN_THREEVEC_TEST(fmls_4s_4s_s0, "fmls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2298 GEN_THREEVEC_TEST(fmls_4s_4s_s3, "fmls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2299 GEN_THREEVEC_TEST(fmls_2s_2s_s0, "fmls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2300 GEN_THREEVEC_TEST(fmls_2s_2s_s3, "fmls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2301 
   2302 GEN_TWOVEC_TEST(fmov_2d_imm_01, "fmov v22.2d, #0.125", 22, 23)
   2303 GEN_TWOVEC_TEST(fmov_2d_imm_02, "fmov v22.2d, #-4.0",  22, 23)
   2304 GEN_TWOVEC_TEST(fmov_2d_imm_03, "fmov v22.2d, #1.0",   22, 23)
   2305 GEN_TWOVEC_TEST(fmov_4s_imm_01, "fmov v22.4s, #0.125", 22, 23)
   2306 GEN_TWOVEC_TEST(fmov_4s_imm_02, "fmov v22.4s, #-4.0",  22, 23)
   2307 GEN_TWOVEC_TEST(fmov_4s_imm_03, "fmov v22.4s, #1.0",   22, 23)
   2308 GEN_TWOVEC_TEST(fmov_2s_imm_01, "fmov v22.2s, #0.125", 22, 23)
   2309 GEN_TWOVEC_TEST(fmov_2s_imm_02, "fmov v22.2s, #-4.0",  22, 23)
   2310 GEN_TWOVEC_TEST(fmov_2s_imm_03, "fmov v22.2s, #1.0",   22, 23)
   2311 
   2312 GEN_TWOVEC_TEST(fmov_d_d,  "fmov d22, d23",   22, 23)
   2313 GEN_TWOVEC_TEST(fmov_s_s,  "fmov s22, s23",   22, 23)
   2314 
   2315 GEN_ONEINT_ONEVEC_TEST(fmov_s_w,  "fmov s7,      w15", 15, 7)
   2316 GEN_ONEINT_ONEVEC_TEST(fmov_d_x,  "fmov d7,      x15", 15, 7)
   2317 GEN_ONEINT_ONEVEC_TEST(fmov_d1_x, "fmov v7.d[1], x15", 15, 7)
   2318 GEN_ONEINT_ONEVEC_TEST(fmov_w_s,  "fmov w15,      s7", 15, 7)
   2319 GEN_ONEINT_ONEVEC_TEST(fmov_x_d,  "fmov x15,      d7", 15, 7)
   2320 GEN_ONEINT_ONEVEC_TEST(fmov_x_d1, "fmov x15, v7.d[1]", 15, 7)
   2321 
   2322 /* overkill -- don't need two vecs, only one */
   2323 GEN_TWOVEC_TEST(fmov_d_imm_01, "fmov d22, #0.125", 22, 23)
   2324 GEN_TWOVEC_TEST(fmov_d_imm_02, "fmov d22, #-4.0",  22, 23)
   2325 GEN_TWOVEC_TEST(fmov_d_imm_03, "fmov d22, #1.0",   22, 23)
   2326 GEN_TWOVEC_TEST(fmov_s_imm_01, "fmov s22, #0.125", 22, 23)
   2327 GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0",  22, 23)
   2328 GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0",   22, 23)
   2329 
   2330 GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
   2331 GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
   2332 GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
   2333 GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
   2334 
   2335 GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2336 GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2337 GEN_THREEVEC_TEST(fmul_4s_4s_s0, "fmul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2338 GEN_THREEVEC_TEST(fmul_4s_4s_s3, "fmul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2339 GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2340 GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2341 
   2342 GEN_THREEVEC_TEST(fmul_d_d_d,    "fmul d2, d11, d29", 2, 11, 29)
   2343 GEN_THREEVEC_TEST(fmul_s_s_s,    "fmul s2, s11, s29", 2, 11, 29)
   2344 GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2345 GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2346 GEN_THREEVEC_TEST(fmul_2s_2s_2s, "fmul v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2347 
   2348 GEN_THREEVEC_TEST(fmulx_d_d_d0, "fmulx d2, d11, v29.d[0]", 2, 11, 29)
   2349 GEN_THREEVEC_TEST(fmulx_d_d_d1, "fmulx d2, d11, v29.d[1]", 2, 11, 29)
   2350 GEN_THREEVEC_TEST(fmulx_s_s_s0, "fmulx s2, s11, v29.s[0]", 2, 11, 29)
   2351 GEN_THREEVEC_TEST(fmulx_s_s_s3, "fmulx s2, s11, v29.s[3]", 2, 11, 29)
   2352 GEN_THREEVEC_TEST(fmulx_2d_2d_d0, "fmulx v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2353 GEN_THREEVEC_TEST(fmulx_2d_2d_d1, "fmulx v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2354 GEN_THREEVEC_TEST(fmulx_4s_4s_s0, "fmulx v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2355 GEN_THREEVEC_TEST(fmulx_4s_4s_s3, "fmulx v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2356 GEN_THREEVEC_TEST(fmulx_2s_2s_s0, "fmulx v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2357 GEN_THREEVEC_TEST(fmulx_2s_2s_s3, "fmulx v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2358 
   2359 GEN_THREEVEC_TEST(fmulx_d_d_d,    "fmulx d2, d11, d29", 2, 11, 29)
   2360 GEN_THREEVEC_TEST(fmulx_s_s_s,    "fmulx s2, s11, s29", 2, 11, 29)
   2361 GEN_THREEVEC_TEST(fmulx_2d_2d_2d, "fmulx v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2362 GEN_THREEVEC_TEST(fmulx_4s_4s_4s, "fmulx v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2363 GEN_THREEVEC_TEST(fmulx_2s_2s_2s, "fmulx v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2364 
   2365 GEN_TWOVEC_TEST(frecpe_d_d,   "frecpe d22, d23",       22, 23)
   2366 GEN_TWOVEC_TEST(frecpe_s_s,   "frecpe s22, s23",       22, 23)
   2367 GEN_TWOVEC_TEST(frecpe_2d_2d, "frecpe v22.2d, v23.2d", 22, 23)
   2368 GEN_TWOVEC_TEST(frecpe_4s_4s, "frecpe v22.4s, v23.4s", 22, 23)
   2369 GEN_TWOVEC_TEST(frecpe_2s_2s, "frecpe v22.2s, v23.2s", 22, 23)
   2370 
   2371 GEN_THREEVEC_TEST(frecps_d_d_d,    "frecps d2, d11, d29", 2, 11, 29)
   2372 GEN_THREEVEC_TEST(frecps_s_s_s,    "frecps s2, s11, s29", 2, 11, 29)
   2373 GEN_THREEVEC_TEST(frecps_2d_2d_2d, "frecps v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2374 GEN_THREEVEC_TEST(frecps_4s_4s_4s, "frecps v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2375 GEN_THREEVEC_TEST(frecps_2s_2s_2s, "frecps v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2376 
   2377 GEN_TWOVEC_TEST(frecpx_d_d,   "frecpx d22, d23",       22, 23)
   2378 GEN_TWOVEC_TEST(frecpx_s_s,   "frecpx s22, s23",       22, 23)
   2379 
   2380 GEN_TWOVEC_TEST(frinta_d_d,   "frinta d22, d23",       22, 23)
   2381 GEN_TWOVEC_TEST(frinta_s_s,   "frinta s22, s23",       22, 23)
   2382 GEN_TWOVEC_TEST(frinti_d_d,   "frinti d22, d23",       22, 23)
   2383 GEN_TWOVEC_TEST(frinti_s_s,   "frinti s22, s23",       22, 23)
   2384 GEN_TWOVEC_TEST(frintm_d_d,   "frintm d22, d23",       22, 23)
   2385 GEN_TWOVEC_TEST(frintm_s_s,   "frintm s22, s23",       22, 23)
   2386 GEN_TWOVEC_TEST(frintn_d_d,   "frintn d22, d23",       22, 23)
   2387 GEN_TWOVEC_TEST(frintn_s_s,   "frintn s22, s23",       22, 23)
   2388 GEN_TWOVEC_TEST(frintp_d_d,   "frintp d22, d23",       22, 23)
   2389 GEN_TWOVEC_TEST(frintp_s_s,   "frintp s22, s23",       22, 23)
   2390 GEN_TWOVEC_TEST(frintx_d_d,   "frintx d22, d23",       22, 23)
   2391 GEN_TWOVEC_TEST(frintx_s_s,   "frintx s22, s23",       22, 23)
   2392 GEN_TWOVEC_TEST(frintz_d_d,   "frintz d22, d23",       22, 23)
   2393 GEN_TWOVEC_TEST(frintz_s_s,   "frintz s22, s23",       22, 23)
   2394 
   2395 GEN_TWOVEC_TEST(frinta_2d_2d, "frinta v2.2d, v11.2d", 2, 11)
   2396 GEN_TWOVEC_TEST(frinta_4s_4s, "frinta v2.4s, v11.4s", 2, 11)
   2397 GEN_TWOVEC_TEST(frinta_2s_2s, "frinta v2.2s, v11.2s", 2, 11)
   2398 GEN_TWOVEC_TEST(frinti_2d_2d, "frinti v2.2d, v11.2d", 2, 11)
   2399 GEN_TWOVEC_TEST(frinti_4s_4s, "frinti v2.4s, v11.4s", 2, 11)
   2400 GEN_TWOVEC_TEST(frinti_2s_2s, "frinti v2.2s, v11.2s", 2, 11)
   2401 GEN_TWOVEC_TEST(frintm_2d_2d, "frintm v2.2d, v11.2d", 2, 11)
   2402 GEN_TWOVEC_TEST(frintm_4s_4s, "frintm v2.4s, v11.4s", 2, 11)
   2403 GEN_TWOVEC_TEST(frintm_2s_2s, "frintm v2.2s, v11.2s", 2, 11)
   2404 GEN_TWOVEC_TEST(frintn_2d_2d, "frintn v2.2d, v11.2d", 2, 11)
   2405 GEN_TWOVEC_TEST(frintn_4s_4s, "frintn v2.4s, v11.4s", 2, 11)
   2406 GEN_TWOVEC_TEST(frintn_2s_2s, "frintn v2.2s, v11.2s", 2, 11)
   2407 GEN_TWOVEC_TEST(frintp_2d_2d, "frintp v2.2d, v11.2d", 2, 11)
   2408 GEN_TWOVEC_TEST(frintp_4s_4s, "frintp v2.4s, v11.4s", 2, 11)
   2409 GEN_TWOVEC_TEST(frintp_2s_2s, "frintp v2.2s, v11.2s", 2, 11)
   2410 GEN_TWOVEC_TEST(frintx_2d_2d, "frintx v2.2d, v11.2d", 2, 11)
   2411 GEN_TWOVEC_TEST(frintx_4s_4s, "frintx v2.4s, v11.4s", 2, 11)
   2412 GEN_TWOVEC_TEST(frintx_2s_2s, "frintx v2.2s, v11.2s", 2, 11)
   2413 GEN_TWOVEC_TEST(frintz_2d_2d, "frintz v2.2d, v11.2d", 2, 11)
   2414 GEN_TWOVEC_TEST(frintz_4s_4s, "frintz v2.4s, v11.4s", 2, 11)
   2415 GEN_TWOVEC_TEST(frintz_2s_2s, "frintz v2.2s, v11.2s", 2, 11)
   2416 
   2417 GEN_TWOVEC_TEST(frsqrte_d_d,   "frsqrte d22, d23",       22, 23)
   2418 GEN_TWOVEC_TEST(frsqrte_s_s,   "frsqrte s22, s23",       22, 23)
   2419 GEN_TWOVEC_TEST(frsqrte_2d_2d, "frsqrte v22.2d, v23.2d", 22, 23)
   2420 GEN_TWOVEC_TEST(frsqrte_4s_4s, "frsqrte v22.4s, v23.4s", 22, 23)
   2421 GEN_TWOVEC_TEST(frsqrte_2s_2s, "frsqrte v22.2s, v23.2s", 22, 23)
   2422 
   2423 GEN_THREEVEC_TEST(frsqrts_d_d_d,    "frsqrts d2, d11, d29", 2, 11, 29)
   2424 GEN_THREEVEC_TEST(frsqrts_s_s_s,    "frsqrts s2, s11, s29", 2, 11, 29)
   2425 GEN_THREEVEC_TEST(frsqrts_2d_2d_2d, "frsqrts v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2426 GEN_THREEVEC_TEST(frsqrts_4s_4s_4s, "frsqrts v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2427 GEN_THREEVEC_TEST(frsqrts_2s_2s_2s, "frsqrts v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2428 
   2429 // ======================== CONV ========================
   2430 
   2431 GEN_TWOVEC_TEST(fcvt_s_h, "fcvt s7, h16", 7, 16)
   2432 GEN_TWOVEC_TEST(fcvt_d_h, "fcvt d7, h16", 7, 16)
   2433 GEN_TWOVEC_TEST(fcvt_h_s, "fcvt h7, s16", 7, 16)
   2434 GEN_TWOVEC_TEST(fcvt_d_s, "fcvt d7, s16", 7, 16)
   2435 GEN_TWOVEC_TEST(fcvt_h_d, "fcvt h7, d16", 7, 16)
   2436 GEN_TWOVEC_TEST(fcvt_s_d, "fcvt s7, d16", 7, 16)
   2437 
   2438 GEN_TWOVEC_TEST(fcvtl_4s_4h, "fcvtl  v11.4s, v29.4h", 11, 29)
   2439 GEN_TWOVEC_TEST(fcvtl_4s_8h, "fcvtl2 v11.4s, v29.8h", 11, 29)
   2440 GEN_TWOVEC_TEST(fcvtl_2d_2s, "fcvtl  v11.2d, v29.2s", 11, 29)
   2441 GEN_TWOVEC_TEST(fcvtl_2d_4s, "fcvtl2 v11.2d, v29.4s", 11, 29)
   2442 
   2443 GEN_TWOVEC_TEST(fcvtn_4h_4s, "fcvtn  v22.4h, v23.4s", 22, 23)
   2444 GEN_TWOVEC_TEST(fcvtn_8h_4s, "fcvtn2 v22.8h, v23.4s", 22, 23)
   2445 GEN_TWOVEC_TEST(fcvtn_2s_2d, "fcvtn  v22.2s, v23.2d", 22, 23)
   2446 GEN_TWOVEC_TEST(fcvtn_4s_2d, "fcvtn2 v22.4s, v23.2d", 22, 23)
   2447 
   2448 GEN_TWOVEC_TEST(fcvtas_d_d,   "fcvtas d10, d21",       10, 21)
   2449 GEN_TWOVEC_TEST(fcvtau_d_d,   "fcvtau d21, d10",       21, 10)
   2450 GEN_TWOVEC_TEST(fcvtas_s_s,   "fcvtas s10, s21",       10, 21)
   2451 GEN_TWOVEC_TEST(fcvtau_s_s,   "fcvtau s21, s10",       21, 10)
   2452 GEN_TWOVEC_TEST(fcvtas_2d_2d, "fcvtas v10.2d, v21.2d", 10, 21)
   2453 GEN_TWOVEC_TEST(fcvtau_2d_2d, "fcvtau v10.2d, v21.2d", 10, 21)
   2454 GEN_TWOVEC_TEST(fcvtas_4s_4s, "fcvtas v10.4s, v21.4s", 10, 21)
   2455 GEN_TWOVEC_TEST(fcvtau_4s_4s, "fcvtau v10.4s, v21.4s", 10, 21)
   2456 GEN_TWOVEC_TEST(fcvtas_2s_2s, "fcvtas v10.2s, v21.2s", 10, 21)
   2457 GEN_TWOVEC_TEST(fcvtau_2s_2s, "fcvtau v10.2s, v21.2s", 10, 21)
   2458 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_s, "fcvtas w21, s10", 21, 10)
   2459 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_s, "fcvtau w21, s10", 21, 10)
   2460 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_s, "fcvtas x21, s10", 21, 10)
   2461 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_s, "fcvtau x21, s10", 21, 10)
   2462 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_d, "fcvtas w21, d10", 21, 10)
   2463 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_d, "fcvtau w21, d10", 21, 10)
   2464 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_d, "fcvtas x21, d10", 21, 10)
   2465 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_d, "fcvtau x21, d10", 21, 10)
   2466 
   2467 GEN_TWOVEC_TEST(fcvtms_d_d,   "fcvtms d10, d21",       10, 21)
   2468 GEN_TWOVEC_TEST(fcvtmu_d_d,   "fcvtmu d21, d10",       21, 10)
   2469 GEN_TWOVEC_TEST(fcvtms_s_s,   "fcvtms s10, s21",       10, 21)
   2470 GEN_TWOVEC_TEST(fcvtmu_s_s,   "fcvtmu s21, s10",       21, 10)
   2471 GEN_TWOVEC_TEST(fcvtms_2d_2d, "fcvtms v10.2d, v21.2d", 10, 21)
   2472 GEN_TWOVEC_TEST(fcvtmu_2d_2d, "fcvtmu v10.2d, v21.2d", 10, 21)
   2473 GEN_TWOVEC_TEST(fcvtms_4s_4s, "fcvtms v10.4s, v21.4s", 10, 21)
   2474 GEN_TWOVEC_TEST(fcvtmu_4s_4s, "fcvtmu v10.4s, v21.4s", 10, 21)
   2475 GEN_TWOVEC_TEST(fcvtms_2s_2s, "fcvtms v10.2s, v21.2s", 10, 21)
   2476 GEN_TWOVEC_TEST(fcvtmu_2s_2s, "fcvtmu v10.2s, v21.2s", 10, 21)
   2477 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_s, "fcvtms w21, s10", 21, 10)
   2478 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_s, "fcvtmu w21, s10", 21, 10)
   2479 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_s, "fcvtms x21, s10", 21, 10)
   2480 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_s, "fcvtmu x21, s10", 21, 10)
   2481 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_d, "fcvtms w21, d10", 21, 10)
   2482 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_d, "fcvtmu w21, d10", 21, 10)
   2483 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_d, "fcvtms x21, d10", 21, 10)
   2484 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_d, "fcvtmu x21, d10", 21, 10)
   2485 
   2486 GEN_TWOVEC_TEST(fcvtns_d_d,   "fcvtns d10, d21",       10, 21)
   2487 GEN_TWOVEC_TEST(fcvtnu_d_d,   "fcvtnu d21, d10",       21, 10)
   2488 GEN_TWOVEC_TEST(fcvtns_s_s,   "fcvtns s10, s21",       10, 21)
   2489 GEN_TWOVEC_TEST(fcvtnu_s_s,   "fcvtnu s21, s10",       21, 10)
   2490 GEN_TWOVEC_TEST(fcvtns_2d_2d, "fcvtns v10.2d, v21.2d", 10, 21)
   2491 GEN_TWOVEC_TEST(fcvtnu_2d_2d, "fcvtnu v10.2d, v21.2d", 10, 21)
   2492 GEN_TWOVEC_TEST(fcvtns_4s_4s, "fcvtns v10.4s, v21.4s", 10, 21)
   2493 GEN_TWOVEC_TEST(fcvtnu_4s_4s, "fcvtnu v10.4s, v21.4s", 10, 21)
   2494 GEN_TWOVEC_TEST(fcvtns_2s_2s, "fcvtns v10.2s, v21.2s", 10, 21)
   2495 GEN_TWOVEC_TEST(fcvtnu_2s_2s, "fcvtnu v10.2s, v21.2s", 10, 21)
   2496 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_s, "fcvtns w21, s10", 21, 10)
   2497 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_s, "fcvtnu w21, s10", 21, 10)
   2498 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_s, "fcvtns x21, s10", 21, 10)
   2499 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_s, "fcvtnu x21, s10", 21, 10)
   2500 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_d, "fcvtns w21, d10", 21, 10)
   2501 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_d, "fcvtnu w21, d10", 21, 10)
   2502 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_d, "fcvtns x21, d10", 21, 10)
   2503 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_d, "fcvtnu x21, d10", 21, 10)
   2504 
   2505 GEN_TWOVEC_TEST(fcvtps_d_d,   "fcvtps d10, d21",       10, 21)
   2506 GEN_TWOVEC_TEST(fcvtpu_d_d,   "fcvtpu d21, d10",       21, 10)
   2507 GEN_TWOVEC_TEST(fcvtps_s_s,   "fcvtps s10, s21",       10, 21)
   2508 GEN_TWOVEC_TEST(fcvtpu_s_s,   "fcvtpu s21, s10",       21, 10)
   2509 GEN_TWOVEC_TEST(fcvtps_2d_2d, "fcvtps v10.2d, v21.2d", 10, 21)
   2510 GEN_TWOVEC_TEST(fcvtpu_2d_2d, "fcvtpu v10.2d, v21.2d", 10, 21)
   2511 GEN_TWOVEC_TEST(fcvtps_4s_4s, "fcvtps v10.4s, v21.4s", 10, 21)
   2512 GEN_TWOVEC_TEST(fcvtpu_4s_4s, "fcvtpu v10.4s, v21.4s", 10, 21)
   2513 GEN_TWOVEC_TEST(fcvtps_2s_2s, "fcvtps v10.2s, v21.2s", 10, 21)
   2514 GEN_TWOVEC_TEST(fcvtpu_2s_2s, "fcvtpu v10.2s, v21.2s", 10, 21)
   2515 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_s, "fcvtps w21, s10", 21, 10)
   2516 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_s, "fcvtpu w21, s10", 21, 10)
   2517 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_s, "fcvtps x21, s10", 21, 10)
   2518 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_s, "fcvtpu x21, s10", 21, 10)
   2519 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_d, "fcvtps w21, d10", 21, 10)
   2520 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_d, "fcvtpu w21, d10", 21, 10)
   2521 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_d, "fcvtps x21, d10", 21, 10)
   2522 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_d, "fcvtpu x21, d10", 21, 10)
   2523 
   2524 GEN_TWOVEC_TEST(fcvtzs_d_d,   "fcvtzs d10, d21",       10, 21)
   2525 GEN_TWOVEC_TEST(fcvtzu_d_d,   "fcvtzu d21, d10",       21, 10)
   2526 GEN_TWOVEC_TEST(fcvtzs_s_s,   "fcvtzs s10, s21",       10, 21)
   2527 GEN_TWOVEC_TEST(fcvtzu_s_s,   "fcvtzu s21, s10",       21, 10)
   2528 GEN_TWOVEC_TEST(fcvtzs_2d_2d, "fcvtzs v10.2d, v21.2d", 10, 21)
   2529 GEN_TWOVEC_TEST(fcvtzu_2d_2d, "fcvtzu v10.2d, v21.2d", 10, 21)
   2530 GEN_TWOVEC_TEST(fcvtzs_4s_4s, "fcvtzs v10.4s, v21.4s", 10, 21)
   2531 GEN_TWOVEC_TEST(fcvtzu_4s_4s, "fcvtzu v10.4s, v21.4s", 10, 21)
   2532 GEN_TWOVEC_TEST(fcvtzs_2s_2s, "fcvtzs v10.2s, v21.2s", 10, 21)
   2533 GEN_TWOVEC_TEST(fcvtzu_2s_2s, "fcvtzu v10.2s, v21.2s", 10, 21)
   2534 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s, "fcvtzs w21, s10", 21, 10)
   2535 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s, "fcvtzu w21, s10", 21, 10)
   2536 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s, "fcvtzs x21, s10", 21, 10)
   2537 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s, "fcvtzu x21, s10", 21, 10)
   2538 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d, "fcvtzs w21, d10", 21, 10)
   2539 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d, "fcvtzu w21, d10", 21, 10)
   2540 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d, "fcvtzs x21, d10", 21, 10)
   2541 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d, "fcvtzu x21, d10", 21, 10)
   2542 
   2543 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits1,    "fcvtzs d10, d21, #1",   10, 21)
   2544 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits32,   "fcvtzs d10, d21, #32",  10, 21)
   2545 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits64,   "fcvtzs d10, d21, #64",  10, 21)
   2546 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits1,    "fcvtzu d10, d21, #1",   10, 21)
   2547 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits32,   "fcvtzu d10, d21, #32",  10, 21)
   2548 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits64,   "fcvtzu d10, d21, #64",  10, 21)
   2549 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits1,    "fcvtzs s10, s21, #1",   10, 21)
   2550 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits16,   "fcvtzs s10, s21, #16",  10, 21)
   2551 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits32,   "fcvtzs s10, s21, #32",  10, 21)
   2552 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits1,    "fcvtzu s10, s21, #1",   10, 21)
   2553 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits16,   "fcvtzu s10, s21, #16",  10, 21)
   2554 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits32,   "fcvtzu s10, s21, #32",  10, 21)
   2555 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits1,  "fcvtzs v10.2d, v21.2d, #1",  10, 21)
   2556 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits32, "fcvtzs v10.2d, v21.2d, #32", 10, 21)
   2557 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits64, "fcvtzs v10.2d, v21.2d, #64", 10, 21)
   2558 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits1,  "fcvtzu v10.2d, v21.2d, #1",  10, 21)
   2559 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits32, "fcvtzu v10.2d, v21.2d, #32", 10, 21)
   2560 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits64, "fcvtzu v10.2d, v21.2d, #64", 10, 21)
   2561 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits1,  "fcvtzs v10.4s, v21.4s, #1",  10, 21)
   2562 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits16, "fcvtzs v10.4s, v21.4s, #16", 10, 21)
   2563 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits32, "fcvtzs v10.4s, v21.4s, #32", 10, 21)
   2564 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits1,  "fcvtzu v10.4s, v21.4s, #1",  10, 21)
   2565 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits16, "fcvtzu v10.4s, v21.4s, #16", 10, 21)
   2566 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits32, "fcvtzu v10.4s, v21.4s, #32", 10, 21)
   2567 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits1,  "fcvtzs v10.2s, v21.2s, #1",  10, 21)
   2568 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits16, "fcvtzs v10.2s, v21.2s, #16", 10, 21)
   2569 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits32, "fcvtzs v10.2s, v21.2s, #32", 10, 21)
   2570 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits1,  "fcvtzu v10.2s, v21.2s, #1",  10, 21)
   2571 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits16, "fcvtzu v10.2s, v21.2s, #16", 10, 21)
   2572 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits32, "fcvtzu v10.2s, v21.2s, #32", 10, 21)
   2573 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits1,  "fcvtzs w21, s10, #1",  21, 10)
   2574 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits16, "fcvtzs w21, s10, #16", 21, 10)
   2575 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits32, "fcvtzs w21, s10, #32", 21, 10)
   2576 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits1,  "fcvtzu w21, s10, #1",  21, 10)
   2577 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits16, "fcvtzu w21, s10, #16", 21, 10)
   2578 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits32, "fcvtzu w21, s10, #32", 21, 10)
   2579 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits1,  "fcvtzs x21, s10, #1",  21, 10)
   2580 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits32, "fcvtzs x21, s10, #32", 21, 10)
   2581 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits64, "fcvtzs x21, s10, #64", 21, 10)
   2582 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits1,  "fcvtzu x21, s10, #1",  21, 10)
   2583 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits32, "fcvtzu x21, s10, #32", 21, 10)
   2584 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits64, "fcvtzu x21, s10, #64", 21, 10)
   2585 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits1,  "fcvtzs w21, d10, #1",  21, 10)
   2586 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits16, "fcvtzs w21, d10, #16", 21, 10)
   2587 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits32, "fcvtzs w21, d10, #32", 21, 10)
   2588 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits1,  "fcvtzu w21, d10, #1",  21, 10)
   2589 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits16, "fcvtzu w21, d10, #16", 21, 10)
   2590 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits32, "fcvtzu w21, d10, #32", 21, 10)
   2591 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits1,  "fcvtzs x21, d10, #1",  21, 10)
   2592 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits32, "fcvtzs x21, d10, #32", 21, 10)
   2593 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits64, "fcvtzs x21, d10, #64", 21, 10)
   2594 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits1,  "fcvtzu x21, d10, #1",  21, 10)
   2595 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits32, "fcvtzu x21, d10, #32", 21, 10)
   2596 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits64, "fcvtzu x21, d10, #64", 21, 10)
   2597 
   2598 GEN_TWOVEC_TEST(fcvtxn_s_d,   "fcvtxn s10, d21", 10, 21)
   2599 GEN_TWOVEC_TEST(fcvtxn_2s_2d, "fcvtxn  v10.2s, v21.2d", 10, 21)
   2600 GEN_TWOVEC_TEST(fcvtxn_4s_2d, "fcvtxn2 v10.4s, v21.2d", 10, 21)
   2601 
   2602 GEN_TWOVEC_TEST(scvtf_d_d_fbits1,    "scvtf d10, d21      , #1",  10, 21)
   2603 GEN_TWOVEC_TEST(scvtf_d_d_fbits32,   "scvtf d10, d21      , #32", 10, 21)
   2604 GEN_TWOVEC_TEST(scvtf_d_d_fbits64,   "scvtf d10, d21      , #64", 10, 21)
   2605 GEN_TWOVEC_TEST(ucvtf_d_d_fbits1,    "ucvtf d21, d10      , #1",  21, 10)
   2606 GEN_TWOVEC_TEST(ucvtf_d_d_fbits32,   "ucvtf d21, d10      , #32", 21, 10)
   2607 GEN_TWOVEC_TEST(ucvtf_d_d_fbits64,   "ucvtf d21, d10      , #64", 21, 10)
   2608 GEN_TWOVEC_TEST(scvtf_s_s_fbits1,    "scvtf s10, s21      , #1",  10, 21)
   2609 GEN_TWOVEC_TEST(scvtf_s_s_fbits16,   "scvtf s10, s21      , #16", 10, 21)
   2610 GEN_TWOVEC_TEST(scvtf_s_s_fbits32,   "scvtf s10, s21      , #32", 10, 21)
   2611 GEN_TWOVEC_TEST(ucvtf_s_s_fbits1,    "ucvtf s21, s10      , #1",  21, 10)
   2612 GEN_TWOVEC_TEST(ucvtf_s_s_fbits16,   "ucvtf s21, s10      , #16", 21, 10)
   2613 GEN_TWOVEC_TEST(ucvtf_s_s_fbits32,   "ucvtf s21, s10      , #32", 21, 10)
   2614 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits1,  "scvtf v10.2d, v21.2d, #1",  10, 21)
   2615 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits32, "scvtf v10.2d, v21.2d, #32", 10, 21)
   2616 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits64, "scvtf v10.2d, v21.2d, #64", 10, 21)
   2617 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits1,  "ucvtf v10.2d, v21.2d, #1",  10, 21)
   2618 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits32, "ucvtf v10.2d, v21.2d, #32", 10, 21)
   2619 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits64, "ucvtf v10.2d, v21.2d, #64", 10, 21)
   2620 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits1,  "scvtf v10.4s, v21.4s, #1",  10, 21)
   2621 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits16, "scvtf v10.4s, v21.4s, #16", 10, 21)
   2622 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits32, "scvtf v10.4s, v21.4s, #32", 10, 21)
   2623 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits1,  "ucvtf v10.4s, v21.4s, #1",  10, 21)
   2624 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits16, "ucvtf v10.4s, v21.4s, #16", 10, 21)
   2625 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits32, "ucvtf v10.4s, v21.4s, #32", 10, 21)
   2626 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits1,  "scvtf v10.2s, v21.2s, #1",  10, 21)
   2627 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits16, "scvtf v10.2s, v21.2s, #16", 10, 21)
   2628 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits32, "scvtf v10.2s, v21.2s, #32", 10, 21)
   2629 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits1,  "ucvtf v10.2s, v21.2s, #1",  10, 21)
   2630 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits16, "ucvtf v10.2s, v21.2s, #16", 10, 21)
   2631 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits32, "ucvtf v10.2s, v21.2s, #32", 10, 21)
   2632 
   2633 GEN_TWOVEC_TEST(scvtf_d_d,   "scvtf d10, d21",       10, 21)
   2634 GEN_TWOVEC_TEST(ucvtf_d_d,   "ucvtf d21, d10",       21, 10)
   2635 GEN_TWOVEC_TEST(scvtf_s_s,   "scvtf s10, s21",       10, 21)
   2636 GEN_TWOVEC_TEST(ucvtf_s_s,   "ucvtf s21, s10",       21, 10)
   2637 GEN_TWOVEC_TEST(scvtf_2d_2d, "scvtf v10.2d, v21.2d", 10, 21)
   2638 GEN_TWOVEC_TEST(ucvtf_2d_2d, "ucvtf v10.2d, v21.2d", 10, 21)
   2639 GEN_TWOVEC_TEST(scvtf_4s_4s, "scvtf v10.4s, v21.4s", 10, 21)
   2640 GEN_TWOVEC_TEST(ucvtf_4s_4s, "ucvtf v10.4s, v21.4s", 10, 21)
   2641 GEN_TWOVEC_TEST(scvtf_2s_2s, "scvtf v10.2s, v21.2s", 10, 21)
   2642 GEN_TWOVEC_TEST(ucvtf_2s_2s, "ucvtf v10.2s, v21.2s", 10, 21)
   2643 
   2644 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits1,  "scvtf s7, w15, #1",  15, 7)
   2645 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits16, "scvtf s7, w15, #16", 15, 7)
   2646 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits32, "scvtf s7, w15, #32", 15, 7)
   2647 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits1,  "scvtf d7, w15, #1",  15, 7)
   2648 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits16, "scvtf d7, w15, #16", 15, 7)
   2649 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits32, "scvtf d7, w15, #32", 15, 7)
   2650 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits1,  "scvtf s7, x15, #1",  15, 7)
   2651 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits32, "scvtf s7, x15, #32", 15, 7)
   2652 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits64, "scvtf s7, x15, #64", 15, 7)
   2653 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits1,  "scvtf d7, x15, #1",  15, 7)
   2654 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits32, "scvtf d7, x15, #32", 15, 7)
   2655 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits64, "scvtf d7, x15, #64", 15, 7)
   2656 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits1,  "ucvtf s7, w15, #1",  15, 7)
   2657 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits16, "ucvtf s7, w15, #16", 15, 7)
   2658 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits32, "ucvtf s7, w15, #32", 15, 7)
   2659 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits1,  "ucvtf d7, w15, #1",  15, 7)
   2660 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits16, "ucvtf d7, w15, #16", 15, 7)
   2661 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits32, "ucvtf d7, w15, #32", 15, 7)
   2662 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits1,  "ucvtf s7, x15, #1",  15, 7)
   2663 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits32, "ucvtf s7, x15, #32", 15, 7)
   2664 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits64, "ucvtf s7, x15, #64", 15, 7)
   2665 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits1,  "ucvtf d7, x15, #1",  15, 7)
   2666 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits32, "ucvtf d7, x15, #32", 15, 7)
   2667 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits64, "ucvtf d7, x15, #64", 15, 7)
   2668 
   2669 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w, "scvtf s7, w15", 15, 7)
   2670 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w, "scvtf d7, w15", 15, 7)
   2671 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x, "scvtf s7, x15", 15, 7)
   2672 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x, "scvtf d7, x15", 15, 7)
   2673 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w, "ucvtf s7, w15", 15, 7)
   2674 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w, "ucvtf d7, w15", 15, 7)
   2675 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x, "ucvtf s7, x15", 15, 7)
   2676 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x, "ucvtf d7, x15", 15, 7)
   2677 
   2678 // ======================== INT ========================
   2679 
   2680 GEN_TWOVEC_TEST(abs_d_d,  "abs d22, d23",   22, 23)
   2681 GEN_TWOVEC_TEST(neg_d_d,  "neg d22, d23",   22, 23)
   2682 
   2683 GEN_UNARY_TEST(abs, 2d, 2d)
   2684 GEN_UNARY_TEST(abs, 4s, 4s)
   2685 GEN_UNARY_TEST(abs, 2s, 2s)
   2686 GEN_UNARY_TEST(abs, 8h, 8h)
   2687 GEN_UNARY_TEST(abs, 4h, 4h)
   2688 GEN_UNARY_TEST(abs, 16b, 16b)
   2689 GEN_UNARY_TEST(abs, 8b, 8b)
   2690 GEN_UNARY_TEST(neg, 2d, 2d)
   2691 GEN_UNARY_TEST(neg, 4s, 4s)
   2692 GEN_UNARY_TEST(neg, 2s, 2s)
   2693 GEN_UNARY_TEST(neg, 8h, 8h)
   2694 GEN_UNARY_TEST(neg, 4h, 4h)
   2695 GEN_UNARY_TEST(neg, 16b, 16b)
   2696 GEN_UNARY_TEST(neg, 8b,  8b)
   2697 
   2698 GEN_THREEVEC_TEST(add_d_d_d, "add d21, d22, d23", 21, 22, 23)
   2699 GEN_THREEVEC_TEST(sub_d_d_d, "sub d21, d22, d23", 21, 22, 23)
   2700 
   2701 GEN_BINARY_TEST(add, 2d, 2d, 2d)
   2702 GEN_BINARY_TEST(add, 4s, 4s, 4s)
   2703 GEN_BINARY_TEST(add, 2s, 2s, 2s)
   2704 GEN_BINARY_TEST(add, 8h, 8h, 8h)
   2705 GEN_BINARY_TEST(add, 4h, 4h, 4h)
   2706 GEN_BINARY_TEST(add, 16b, 16b, 16b)
   2707 GEN_BINARY_TEST(add, 8b, 8b, 8b)
   2708 GEN_BINARY_TEST(sub, 2d, 2d, 2d)
   2709 GEN_BINARY_TEST(sub, 4s, 4s, 4s)
   2710 GEN_BINARY_TEST(sub, 2s, 2s, 2s)
   2711 GEN_BINARY_TEST(sub, 8h, 8h, 8h)
   2712 GEN_BINARY_TEST(sub, 4h, 4h, 4h)
   2713 GEN_BINARY_TEST(sub, 16b, 16b, 16b)
   2714 GEN_BINARY_TEST(sub, 8b, 8b, 8b)
   2715 
   2716 GEN_BINARY_TEST(addhn,   2s, 2d, 2d)
   2717 GEN_BINARY_TEST(addhn2,  4s, 2d, 2d)
   2718 GEN_BINARY_TEST(addhn,   4h, 4s, 4s)
   2719 GEN_BINARY_TEST(addhn2,  8h, 4s, 4s)
   2720 GEN_BINARY_TEST(addhn,   8b, 8h, 8h)
   2721 GEN_BINARY_TEST(addhn2,  16b, 8h, 8h)
   2722 GEN_BINARY_TEST(subhn,   2s, 2d, 2d)
   2723 GEN_BINARY_TEST(subhn2,  4s, 2d, 2d)
   2724 GEN_BINARY_TEST(subhn,   4h, 4s, 4s)
   2725 GEN_BINARY_TEST(subhn2,  8h, 4s, 4s)
   2726 GEN_BINARY_TEST(subhn,   8b, 8h, 8h)
   2727 GEN_BINARY_TEST(subhn2,  16b, 8h, 8h)
   2728 GEN_BINARY_TEST(raddhn,  2s, 2d, 2d)
   2729 GEN_BINARY_TEST(raddhn2, 4s, 2d, 2d)
   2730 GEN_BINARY_TEST(raddhn,  4h, 4s, 4s)
   2731 GEN_BINARY_TEST(raddhn2, 8h, 4s, 4s)
   2732 GEN_BINARY_TEST(raddhn,  8b, 8h, 8h)
   2733 GEN_BINARY_TEST(raddhn2, 16b, 8h, 8h)
   2734 GEN_BINARY_TEST(rsubhn,  2s, 2d, 2d)
   2735 GEN_BINARY_TEST(rsubhn2, 4s, 2d, 2d)
   2736 GEN_BINARY_TEST(rsubhn,  4h, 4s, 4s)
   2737 GEN_BINARY_TEST(rsubhn2, 8h, 4s, 4s)
   2738 GEN_BINARY_TEST(rsubhn,  8b, 8h, 8h)
   2739 GEN_BINARY_TEST(rsubhn2, 16b, 8h, 8h)
   2740 
   2741 GEN_TWOVEC_TEST(addp_d_2d,  "addp d22, v23.2d",   22, 23)
   2742 
   2743 GEN_BINARY_TEST(addp, 2d, 2d, 2d)
   2744 GEN_BINARY_TEST(addp, 4s, 4s, 4s)
   2745 GEN_BINARY_TEST(addp, 2s, 2s, 2s)
   2746 GEN_BINARY_TEST(addp, 8h, 8h, 8h)
   2747 GEN_BINARY_TEST(addp, 4h, 4h, 4h)
   2748 GEN_BINARY_TEST(addp, 16b, 16b, 16b)
   2749 GEN_BINARY_TEST(addp, 8b, 8b, 8b)
   2750 
   2751 GEN_TWOVEC_TEST(addv_s_4s,  "addv s22, v23.4s",  22, 23)
   2752 GEN_TWOVEC_TEST(addv_h_8h,  "addv h22, v23.8h",  22, 23)
   2753 GEN_TWOVEC_TEST(addv_h_4h,  "addv h22, v23.4h",  22, 23)
   2754 GEN_TWOVEC_TEST(addv_b_16b, "addv b22, v23.16b", 22, 23)
   2755 GEN_TWOVEC_TEST(addv_b_8b,  "addv b22, v23.8b",  22, 23)
   2756 
   2757 GEN_BINARY_TEST(and, 16b, 16b, 16b)
   2758 GEN_BINARY_TEST(and, 8b, 8b, 8b)
   2759 GEN_BINARY_TEST(bic, 16b, 16b, 16b)
   2760 GEN_BINARY_TEST(bic, 8b, 8b, 8b)
   2761 GEN_BINARY_TEST(orr, 16b, 16b, 16b)
   2762 GEN_BINARY_TEST(orr, 8b, 8b, 8b)
   2763 GEN_BINARY_TEST(orn, 16b, 16b, 16b)
   2764 GEN_BINARY_TEST(orn, 8b, 8b, 8b)
   2765 
   2766 /* overkill -- don't need two vecs, only one */
   2767 GEN_TWOVEC_TEST(orr_8h_0x5A_lsl0, "orr v22.8h, #0x5A, LSL #0", 22, 23)
   2768 GEN_TWOVEC_TEST(orr_8h_0xA5_lsl8, "orr v22.8h, #0xA5, LSL #8", 22, 23)
   2769 GEN_TWOVEC_TEST(orr_4h_0x5A_lsl0, "orr v22.4h, #0x5A, LSL #0", 22, 23)
   2770 GEN_TWOVEC_TEST(orr_4h_0xA5_lsl8, "orr v22.4h, #0xA5, LSL #8", 22, 23)
   2771 GEN_TWOVEC_TEST(orr_4s_0x5A_lsl0,  "orr v22.4s, #0x5A, LSL #0",  22, 23)
   2772 GEN_TWOVEC_TEST(orr_4s_0x6B_lsl8,  "orr v22.4s, #0x6B, LSL #8",  22, 23)
   2773 GEN_TWOVEC_TEST(orr_4s_0x49_lsl16, "orr v22.4s, #0x49, LSL #16", 22, 23)
   2774 GEN_TWOVEC_TEST(orr_4s_0x3D_lsl24, "orr v22.4s, #0x3D, LSL #24", 22, 23)
   2775 GEN_TWOVEC_TEST(orr_2s_0x5A_lsl0,  "orr v22.2s, #0x5A, LSL #0",  22, 23)
   2776 GEN_TWOVEC_TEST(orr_2s_0x6B_lsl8,  "orr v22.2s, #0x6B, LSL #8",  22, 23)
   2777 GEN_TWOVEC_TEST(orr_2s_0x49_lsl16, "orr v22.2s, #0x49, LSL #16", 22, 23)
   2778 GEN_TWOVEC_TEST(orr_2s_0x3D_lsl24, "orr v22.2s, #0x3D, LSL #24", 22, 23)
   2779 GEN_TWOVEC_TEST(bic_8h_0x5A_lsl0, "bic v22.8h, #0x5A, LSL #0", 22, 23)
   2780 GEN_TWOVEC_TEST(bic_8h_0xA5_lsl8, "bic v22.8h, #0xA5, LSL #8", 22, 23)
   2781 GEN_TWOVEC_TEST(bic_4h_0x5A_lsl0, "bic v22.4h, #0x5A, LSL #0", 22, 23)
   2782 GEN_TWOVEC_TEST(bic_4h_0xA5_lsl8, "bic v22.4h, #0xA5, LSL #8", 22, 23)
   2783 GEN_TWOVEC_TEST(bic_4s_0x5A_lsl0,  "bic v22.4s, #0x5A, LSL #0",  22, 23)
   2784 GEN_TWOVEC_TEST(bic_4s_0x6B_lsl8,  "bic v22.4s, #0x6B, LSL #8",  22, 23)
   2785 GEN_TWOVEC_TEST(bic_4s_0x49_lsl16, "bic v22.4s, #0x49, LSL #16", 22, 23)
   2786 GEN_TWOVEC_TEST(bic_4s_0x3D_lsl24, "bic v22.4s, #0x3D, LSL #24", 22, 23)
   2787 GEN_TWOVEC_TEST(bic_2s_0x5A_lsl0,  "bic v22.2s, #0x5A, LSL #0",  22, 23)
   2788 GEN_TWOVEC_TEST(bic_2s_0x6B_lsl8,  "bic v22.2s, #0x6B, LSL #8",  22, 23)
   2789 GEN_TWOVEC_TEST(bic_2s_0x49_lsl16, "bic v22.2s, #0x49, LSL #16", 22, 23)
   2790 GEN_TWOVEC_TEST(bic_2s_0x3D_lsl24, "bic v22.2s, #0x3D, LSL #24", 22, 23)
   2791 
   2792 GEN_BINARY_TEST(bif, 16b, 16b, 16b)
   2793 GEN_BINARY_TEST(bif, 8b, 8b, 8b)
   2794 GEN_BINARY_TEST(bit, 16b, 16b, 16b)
   2795 GEN_BINARY_TEST(bit, 8b, 8b, 8b)
   2796 GEN_BINARY_TEST(bsl, 16b, 16b, 16b)
   2797 GEN_BINARY_TEST(bsl, 8b, 8b, 8b)
   2798 GEN_BINARY_TEST(eor, 16b, 16b, 16b)
   2799 GEN_BINARY_TEST(eor, 8b, 8b, 8b)
   2800 
   2801 GEN_UNARY_TEST(cls, 4s, 4s)
   2802 GEN_UNARY_TEST(cls, 2s, 2s)
   2803 GEN_UNARY_TEST(cls, 8h, 8h)
   2804 GEN_UNARY_TEST(cls, 4h, 4h)
   2805 GEN_UNARY_TEST(cls, 16b, 16b)
   2806 GEN_UNARY_TEST(cls, 8b, 8b)
   2807 GEN_UNARY_TEST(clz, 4s, 4s)
   2808 GEN_UNARY_TEST(clz, 2s, 2s)
   2809 GEN_UNARY_TEST(clz, 8h, 8h)
   2810 GEN_UNARY_TEST(clz, 4h, 4h)
   2811 GEN_UNARY_TEST(clz, 16b, 16b)
   2812 GEN_UNARY_TEST(clz, 8b, 8b)
   2813 
   2814 GEN_THREEVEC_TEST(cmeq_d_d_d,  "cmeq  d2, d11, d29", 2, 11, 29)
   2815 GEN_THREEVEC_TEST(cmge_d_d_d,  "cmge  d2, d11, d29", 2, 11, 29)
   2816 GEN_THREEVEC_TEST(cmgt_d_d_d,  "cmgt  d2, d11, d29", 2, 11, 29)
   2817 GEN_THREEVEC_TEST(cmhi_d_d_d,  "cmhi  d2, d11, d29", 2, 11, 29)
   2818 GEN_THREEVEC_TEST(cmhs_d_d_d,  "cmhs  d2, d11, d29", 2, 11, 29)
   2819 GEN_THREEVEC_TEST(cmtst_d_d_d, "cmtst d2, d11, d29", 2, 11, 29)
   2820 
   2821 GEN_BINARY_TEST(cmeq, 2d, 2d, 2d)
   2822 GEN_BINARY_TEST(cmeq, 4s, 4s, 4s)
   2823 GEN_BINARY_TEST(cmeq, 2s, 2s, 2s)
   2824 GEN_BINARY_TEST(cmeq, 8h, 8h, 8h)
   2825 GEN_BINARY_TEST(cmeq, 4h, 4h, 4h)
   2826 GEN_BINARY_TEST(cmeq, 16b, 16b, 16b)
   2827 GEN_BINARY_TEST(cmeq, 8b, 8b, 8b)
   2828 GEN_BINARY_TEST(cmge, 2d, 2d, 2d)
   2829 GEN_BINARY_TEST(cmge, 4s, 4s, 4s)
   2830 GEN_BINARY_TEST(cmge, 2s, 2s, 2s)
   2831 GEN_BINARY_TEST(cmge, 8h, 8h, 8h)
   2832 GEN_BINARY_TEST(cmge, 4h, 4h, 4h)
   2833 GEN_BINARY_TEST(cmge, 16b, 16b, 16b)
   2834 GEN_BINARY_TEST(cmge, 8b, 8b, 8b)
   2835 GEN_BINARY_TEST(cmgt, 2d, 2d, 2d)
   2836 GEN_BINARY_TEST(cmgt, 4s, 4s, 4s)
   2837 GEN_BINARY_TEST(cmgt, 2s, 2s, 2s)
   2838 GEN_BINARY_TEST(cmgt, 8h, 8h, 8h)
   2839 GEN_BINARY_TEST(cmgt, 4h, 4h, 4h)
   2840 GEN_BINARY_TEST(cmgt, 16b, 16b, 16b)
   2841 GEN_BINARY_TEST(cmgt, 8b, 8b, 8b)
   2842 GEN_BINARY_TEST(cmhi, 2d, 2d, 2d)
   2843 GEN_BINARY_TEST(cmhi, 4s, 4s, 4s)
   2844 GEN_BINARY_TEST(cmhi, 2s, 2s, 2s)
   2845 GEN_BINARY_TEST(cmhi, 8h, 8h, 8h)
   2846 GEN_BINARY_TEST(cmhi, 4h, 4h, 4h)
   2847 GEN_BINARY_TEST(cmhi, 16b, 16b, 16b)
   2848 GEN_BINARY_TEST(cmhi, 8b, 8b, 8b)
   2849 GEN_BINARY_TEST(cmhs, 2d, 2d, 2d)
   2850 GEN_BINARY_TEST(cmhs, 4s, 4s, 4s)
   2851 GEN_BINARY_TEST(cmhs, 2s, 2s, 2s)
   2852 GEN_BINARY_TEST(cmhs, 8h, 8h, 8h)
   2853 GEN_BINARY_TEST(cmhs, 4h, 4h, 4h)
   2854 GEN_BINARY_TEST(cmhs, 16b, 16b, 16b)
   2855 GEN_BINARY_TEST(cmhs, 8b, 8b, 8b)
   2856 GEN_BINARY_TEST(cmtst, 2d, 2d, 2d)
   2857 GEN_BINARY_TEST(cmtst, 4s, 4s, 4s)
   2858 GEN_BINARY_TEST(cmtst, 2s, 2s, 2s)
   2859 GEN_BINARY_TEST(cmtst, 8h, 8h, 8h)
   2860 GEN_BINARY_TEST(cmtst, 4h, 4h, 4h)
   2861 GEN_BINARY_TEST(cmtst, 16b, 16b, 16b)
   2862 GEN_BINARY_TEST(cmtst, 8b, 8b, 8b)
   2863 
   2864 GEN_TWOVEC_TEST(cmeq_zero_d_d,  "cmeq  d2, d11, #0", 2, 11)
   2865 GEN_TWOVEC_TEST(cmge_zero_d_d,  "cmge  d2, d11, #0", 2, 11)
   2866 GEN_TWOVEC_TEST(cmgt_zero_d_d,  "cmgt  d2, d11, #0", 2, 11)
   2867 GEN_TWOVEC_TEST(cmle_zero_d_d,  "cmle  d2, d11, #0", 2, 11)
   2868 GEN_TWOVEC_TEST(cmlt_zero_d_d,  "cmlt  d2, d11, #0", 2, 11)
   2869 
   2870 GEN_TWOVEC_TEST(cmeq_zero_2d_2d,   "cmeq v5.2d,  v22.2d,  #0", 5, 22)
   2871 GEN_TWOVEC_TEST(cmeq_zero_4s_4s,   "cmeq v5.4s,  v22.4s,  #0", 5, 22)
   2872 GEN_TWOVEC_TEST(cmeq_zero_2s_2s,   "cmeq v5.2s,  v22.2s,  #0", 5, 22)
   2873 GEN_TWOVEC_TEST(cmeq_zero_8h_8h,   "cmeq v5.8h,  v22.8h,  #0", 5, 22)
   2874 GEN_TWOVEC_TEST(cmeq_zero_4h_4h,   "cmeq v5.4h,  v22.4h,  #0", 5, 22)
   2875 GEN_TWOVEC_TEST(cmeq_zero_16b_16b, "cmeq v5.16b, v22.16b, #0", 5, 22)
   2876 GEN_TWOVEC_TEST(cmeq_zero_8b_8b,   "cmeq v5.8b,  v22.8b,  #0", 5, 22)
   2877 GEN_TWOVEC_TEST(cmge_zero_2d_2d,   "cmge v5.2d,  v22.2d,  #0", 5, 22)
   2878 GEN_TWOVEC_TEST(cmge_zero_4s_4s,   "cmge v5.4s,  v22.4s,  #0", 5, 22)
   2879 GEN_TWOVEC_TEST(cmge_zero_2s_2s,   "cmge v5.2s,  v22.2s,  #0", 5, 22)
   2880 GEN_TWOVEC_TEST(cmge_zero_8h_8h,   "cmge v5.8h,  v22.8h,  #0", 5, 22)
   2881 GEN_TWOVEC_TEST(cmge_zero_4h_4h,   "cmge v5.4h,  v22.4h,  #0", 5, 22)
   2882 GEN_TWOVEC_TEST(cmge_zero_16b_16b, "cmge v5.16b, v22.16b, #0", 5, 22)
   2883 GEN_TWOVEC_TEST(cmge_zero_8b_8b,   "cmge v5.8b,  v22.8b,  #0", 5, 22)
   2884 GEN_TWOVEC_TEST(cmgt_zero_2d_2d,   "cmgt v5.2d,  v22.2d,  #0", 5, 22)
   2885 GEN_TWOVEC_TEST(cmgt_zero_4s_4s,   "cmgt v5.4s,  v22.4s,  #0", 5, 22)
   2886 GEN_TWOVEC_TEST(cmgt_zero_2s_2s,   "cmgt v5.2s,  v22.2s,  #0", 5, 22)
   2887 GEN_TWOVEC_TEST(cmgt_zero_8h_8h,   "cmgt v5.8h,  v22.8h,  #0", 5, 22)
   2888 GEN_TWOVEC_TEST(cmgt_zero_4h_4h,   "cmgt v5.4h,  v22.4h,  #0", 5, 22)
   2889 GEN_TWOVEC_TEST(cmgt_zero_16b_16b, "cmgt v5.16b, v22.16b, #0", 5, 22)
   2890 GEN_TWOVEC_TEST(cmgt_zero_8b_8b,   "cmgt v5.8b,  v22.8b,  #0", 5, 22)
   2891 GEN_TWOVEC_TEST(cmle_zero_2d_2d,   "cmle v5.2d,  v22.2d,  #0", 5, 22)
   2892 GEN_TWOVEC_TEST(cmle_zero_4s_4s,   "cmle v5.4s,  v22.4s,  #0", 5, 22)
   2893 GEN_TWOVEC_TEST(cmle_zero_2s_2s,   "cmle v5.2s,  v22.2s,  #0", 5, 22)
   2894 GEN_TWOVEC_TEST(cmle_zero_8h_8h,   "cmle v5.8h,  v22.8h,  #0", 5, 22)
   2895 GEN_TWOVEC_TEST(cmle_zero_4h_4h,   "cmle v5.4h,  v22.4h,  #0", 5, 22)
   2896 GEN_TWOVEC_TEST(cmle_zero_16b_16b, "cmle v5.16b, v22.16b, #0", 5, 22)
   2897 GEN_TWOVEC_TEST(cmle_zero_8b_8b,   "cmle v5.8b,  v22.8b,  #0", 5, 22)
   2898 GEN_TWOVEC_TEST(cmlt_zero_2d_2d,   "cmlt v5.2d,  v22.2d,  #0", 5, 22)
   2899 GEN_TWOVEC_TEST(cmlt_zero_4s_4s,   "cmlt v5.4s,  v22.4s,  #0", 5, 22)
   2900 GEN_TWOVEC_TEST(cmlt_zero_2s_2s,   "cmlt v5.2s,  v22.2s,  #0", 5, 22)
   2901 GEN_TWOVEC_TEST(cmlt_zero_8h_8h,   "cmlt v5.8h,  v22.8h,  #0", 5, 22)
   2902 GEN_TWOVEC_TEST(cmlt_zero_4h_4h,   "cmlt v5.4h,  v22.4h,  #0", 5, 22)
   2903 GEN_TWOVEC_TEST(cmlt_zero_16b_16b, "cmlt v5.16b, v22.16b, #0", 5, 22)
   2904 GEN_TWOVEC_TEST(cmlt_zero_8b_8b,   "cmlt v5.8b,  v22.8b,  #0", 5, 22)
   2905 
   2906 GEN_UNARY_TEST(cnt, 16b, 16b)
   2907 GEN_UNARY_TEST(cnt, 8b, 8b)
   2908 
   2909 GEN_TWOVEC_TEST(dup_d_d0,  "dup d22, v23.d[0]", 22, 23)
   2910 GEN_TWOVEC_TEST(dup_d_d1,  "dup d22, v23.d[1]", 22, 23)
   2911 GEN_TWOVEC_TEST(dup_s_s0,  "dup s22, v23.s[0]", 22, 23)
   2912 GEN_TWOVEC_TEST(dup_s_s3,  "dup s22, v23.s[3]", 22, 23)
   2913 GEN_TWOVEC_TEST(dup_h_h0,  "dup h22, v23.h[0]", 22, 23)
   2914 GEN_TWOVEC_TEST(dup_h_h6,  "dup h22, v23.h[6]", 22, 23)
   2915 GEN_TWOVEC_TEST(dup_b_b0,  "dup b0,  v23.b[0]",  22, 23)
   2916 GEN_TWOVEC_TEST(dup_b_b13, "dup b13, v23.b[13]", 22, 23)
   2917 
   2918 GEN_TWOVEC_TEST(dup_2d_d0,  "dup v9.2d, v17.d[0]", 9, 17)
   2919 GEN_TWOVEC_TEST(dup_2d_d1,  "dup v9.2d, v17.d[1]", 9, 17)
   2920 GEN_TWOVEC_TEST(dup_4s_s0,  "dup v9.4s, v17.s[0]", 9, 17)
   2921 GEN_TWOVEC_TEST(dup_4s_s3,  "dup v9.4s, v17.s[3]", 9, 17)
   2922 GEN_TWOVEC_TEST(dup_2s_s0,  "dup v9.2s, v17.s[0]", 9, 17)
   2923 GEN_TWOVEC_TEST(dup_2s_s2,  "dup v9.2s, v17.s[2]", 9, 17)
   2924 GEN_TWOVEC_TEST(dup_8h_h0,  "dup v9.8h, v17.h[0]", 9, 17)
   2925 GEN_TWOVEC_TEST(dup_8h_h6,  "dup v9.8h, v17.h[6]", 9, 17)
   2926 GEN_TWOVEC_TEST(dup_4h_h1,  "dup v9.4h, v17.h[1]", 9, 17)
   2927 GEN_TWOVEC_TEST(dup_4h_h5,  "dup v9.4h, v17.h[5]", 9, 17)
   2928 GEN_TWOVEC_TEST(dup_16b_b2,  "dup v9.16b, v17.b[2]", 9, 17)
   2929 GEN_TWOVEC_TEST(dup_16b_b12, "dup v9.16b, v17.b[12]", 9, 17)
   2930 GEN_TWOVEC_TEST(dup_8b_b3,  "dup v9.8b, v17.b[3]", 9, 17)
   2931 GEN_TWOVEC_TEST(dup_8b_b13, "dup v9.8b, v17.b[13]", 9, 17)
   2932 
   2933 GEN_TWOVEC_TEST(dup_2d_x,  "mov x10, v17.d[0];  dup v9.2d,  x10", 9, 17)
   2934 GEN_TWOVEC_TEST(dup_4s_w,  "mov x10, v17.d[0];  dup v9.4s,  w10", 9, 17)
   2935 GEN_TWOVEC_TEST(dup_2s_w,  "mov x10, v17.d[0];  dup v9.2s,  w10", 9, 17)
   2936 GEN_TWOVEC_TEST(dup_8h_w,  "mov x10, v17.d[0];  dup v9.8h,  w10",  9, 17)
   2937 GEN_TWOVEC_TEST(dup_4h_w,  "mov x10, v17.d[0];  dup v9.4h,  w10",  9, 17)
   2938 GEN_TWOVEC_TEST(dup_16b_w, "mov x10, v17.d[0];  dup v9.16b, w10", 9, 17)
   2939 GEN_TWOVEC_TEST(dup_8b_w,  "mov x10, v17.d[0];  dup v9.8b,  w10",  9, 17)
   2940 
   2941 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x0,
   2942                   "ext  v2.16b, v11.16b, v29.16b, #0", 2, 11, 29)
   2943 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x1,
   2944                   "ext  v2.16b, v11.16b, v29.16b, #1", 2, 11, 29)
   2945 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x2,
   2946                   "ext  v2.16b, v11.16b, v29.16b, #2", 2, 11, 29)
   2947 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x3,
   2948                   "ext  v2.16b, v11.16b, v29.16b, #3", 2, 11, 29)
   2949 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x4,
   2950                   "ext  v2.16b, v11.16b, v29.16b, #4", 2, 11, 29)
   2951 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x5,
   2952                   "ext  v2.16b, v11.16b, v29.16b, #5", 2, 11, 29)
   2953 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x6,
   2954                   "ext  v2.16b, v11.16b, v29.16b, #6", 2, 11, 29)
   2955 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x7,
   2956                   "ext  v2.16b, v11.16b, v29.16b, #7", 2, 11, 29)
   2957 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x8,
   2958                   "ext  v2.16b, v11.16b, v29.16b, #8", 2, 11, 29)
   2959 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x9,
   2960                   "ext  v2.16b, v11.16b, v29.16b, #9", 2, 11, 29)
   2961 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xA,
   2962                   "ext  v2.16b, v11.16b, v29.16b, #10", 2, 11, 29)
   2963 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xB,
   2964                   "ext  v2.16b, v11.16b, v29.16b, #11", 2, 11, 29)
   2965 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xC,
   2966                   "ext  v2.16b, v11.16b, v29.16b, #12", 2, 11, 29)
   2967 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xD,
   2968                   "ext  v2.16b, v11.16b, v29.16b, #13", 2, 11, 29)
   2969 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xE,
   2970                   "ext  v2.16b, v11.16b, v29.16b, #14", 2, 11, 29)
   2971 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xF,
   2972                   "ext  v2.16b, v11.16b, v29.16b, #15", 2, 11, 29)
   2973 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x0,
   2974                   "ext  v2.8b, v11.8b, v29.8b, #0", 2, 11, 29)
   2975 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x1,
   2976                   "ext  v2.8b, v11.8b, v29.8b, #1", 2, 11, 29)
   2977 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x2,
   2978                   "ext  v2.8b, v11.8b, v29.8b, #2", 2, 11, 29)
   2979 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x3,
   2980                   "ext  v2.8b, v11.8b, v29.8b, #3", 2, 11, 29)
   2981 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x4,
   2982                   "ext  v2.8b, v11.8b, v29.8b, #4", 2, 11, 29)
   2983 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x5,
   2984                   "ext  v2.8b, v11.8b, v29.8b, #5", 2, 11, 29)
   2985 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x6,
   2986                   "ext  v2.8b, v11.8b, v29.8b, #6", 2, 11, 29)
   2987 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x7,
   2988                   "ext  v2.8b, v11.8b, v29.8b, #7", 2, 11, 29)
   2989 
   2990 GEN_TWOVEC_TEST(ins_d0_d0, "ins v3.d[0], v24.d[0]", 3, 24)
   2991 GEN_TWOVEC_TEST(ins_d0_d1, "ins v3.d[0], v24.d[1]", 3, 24)
   2992 GEN_TWOVEC_TEST(ins_d1_d0, "ins v3.d[1], v24.d[0]", 3, 24)
   2993 GEN_TWOVEC_TEST(ins_d1_d1, "ins v3.d[1], v24.d[1]", 3, 24)
   2994 GEN_TWOVEC_TEST(ins_s0_s2, "ins v3.s[0], v24.s[2]", 3, 24)
   2995 GEN_TWOVEC_TEST(ins_s3_s0, "ins v3.s[3], v24.s[0]", 3, 24)
   2996 GEN_TWOVEC_TEST(ins_s2_s1, "ins v3.s[2], v24.s[1]", 3, 24)
   2997 GEN_TWOVEC_TEST(ins_s1_s3, "ins v3.s[1], v24.s[3]", 3, 24)
   2998 GEN_TWOVEC_TEST(ins_h0_h6, "ins v3.h[0], v24.h[6]", 3, 24)
   2999 GEN_TWOVEC_TEST(ins_h7_h0, "ins v3.h[7], v24.h[0]", 3, 24)
   3000 GEN_TWOVEC_TEST(ins_h6_h1, "ins v3.h[6], v24.h[1]", 3, 24)
   3001 GEN_TWOVEC_TEST(ins_h1_h7, "ins v3.h[1], v24.h[7]", 3, 24)
   3002 GEN_TWOVEC_TEST(ins_b0_b14, "ins v3.b[0],  v24.b[14]", 3, 24)
   3003 GEN_TWOVEC_TEST(ins_b15_b8, "ins v3.b[15], v24.b[8]",  3, 24)
   3004 GEN_TWOVEC_TEST(ins_b13_b9, "ins v3.b[13], v24.b[9]",  3, 24)
   3005 GEN_TWOVEC_TEST(ins_b5_b12, "ins v3.b[5],  v24.b[12]", 3, 24)
   3006 
   3007 // test_INS_general is a handwritten function
   3008 
   3009 GEN_THREEVEC_TEST(mla_4s_4s_s0, "mla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3010 GEN_THREEVEC_TEST(mla_4s_4s_s3, "mla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3011 GEN_THREEVEC_TEST(mla_2s_2s_s0, "mla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3012 GEN_THREEVEC_TEST(mla_2s_2s_s3, "mla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3013 // For the 'h' version of these, Rm can only be <= 15 (!)
   3014 GEN_THREEVEC_TEST(mla_8h_8h_h1, "mla v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3015 GEN_THREEVEC_TEST(mla_8h_8h_h5, "mla v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3016 GEN_THREEVEC_TEST(mla_4h_4h_h2, "mla v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3017 GEN_THREEVEC_TEST(mla_4h_4h_h7, "mla v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3018 GEN_THREEVEC_TEST(mls_4s_4s_s0, "mls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3019 GEN_THREEVEC_TEST(mls_4s_4s_s3, "mls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3020 GEN_THREEVEC_TEST(mls_2s_2s_s0, "mls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3021 GEN_THREEVEC_TEST(mls_2s_2s_s3, "mls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3022 // For the 'h' version of these, Rm can only be <= 15 (!)
   3023 GEN_THREEVEC_TEST(mls_8h_8h_h1, "mls v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3024 GEN_THREEVEC_TEST(mls_8h_8h_h5, "mls v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3025 GEN_THREEVEC_TEST(mls_4h_4h_h2, "mls v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3026 GEN_THREEVEC_TEST(mls_4h_4h_h7, "mls v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3027 GEN_THREEVEC_TEST(mul_4s_4s_s0, "mul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3028 GEN_THREEVEC_TEST(mul_4s_4s_s3, "mul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3029 GEN_THREEVEC_TEST(mul_2s_2s_s0, "mul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3030 GEN_THREEVEC_TEST(mul_2s_2s_s3, "mul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3031 // For the 'h' version of these, Rm can only be <= 15 (!)
   3032 GEN_THREEVEC_TEST(mul_8h_8h_h1, "mul v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3033 GEN_THREEVEC_TEST(mul_8h_8h_h5, "mul v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3034 GEN_THREEVEC_TEST(mul_4h_4h_h2, "mul v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3035 GEN_THREEVEC_TEST(mul_4h_4h_h7, "mul v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3036 
   3037 GEN_BINARY_TEST(mla, 4s, 4s, 4s)
   3038 GEN_BINARY_TEST(mla, 2s, 2s, 2s)
   3039 GEN_BINARY_TEST(mla, 8h, 8h, 8h)
   3040 GEN_BINARY_TEST(mla, 4h, 4h, 4h)
   3041 GEN_BINARY_TEST(mla, 16b, 16b, 16b)
   3042 GEN_BINARY_TEST(mla, 8b, 8b, 8b)
   3043 GEN_BINARY_TEST(mls, 4s, 4s, 4s)
   3044 GEN_BINARY_TEST(mls, 2s, 2s, 2s)
   3045 GEN_BINARY_TEST(mls, 8h, 8h, 8h)
   3046 GEN_BINARY_TEST(mls, 4h, 4h, 4h)
   3047 GEN_BINARY_TEST(mls, 16b, 16b, 16b)
   3048 GEN_BINARY_TEST(mls, 8b, 8b, 8b)
   3049 GEN_BINARY_TEST(mul, 4s, 4s, 4s)
   3050 GEN_BINARY_TEST(mul, 2s, 2s, 2s)
   3051 GEN_BINARY_TEST(mul, 8h, 8h, 8h)
   3052 GEN_BINARY_TEST(mul, 4h, 4h, 4h)
   3053 GEN_BINARY_TEST(mul, 16b, 16b, 16b)
   3054 GEN_BINARY_TEST(mul, 8b, 8b, 8b)
   3055 
   3056 /* overkill -- don't need two vecs, only one */
   3057 GEN_TWOVEC_TEST(movi_16b_0x9C_lsl0, "movi v22.16b, #0x9C, LSL #0", 22, 23)
   3058 GEN_TWOVEC_TEST(movi_8b_0x8B_lsl0,  "movi v22.8b,  #0x8B, LSL #0", 22, 23)
   3059 
   3060 GEN_TWOVEC_TEST(movi_8h_0x5A_lsl0,  "movi v22.8h,  #0x5A, LSL #0", 22, 23)
   3061 GEN_TWOVEC_TEST(movi_8h_0xA5_lsl8,  "movi v22.8h,  #0xA5, LSL #8", 22, 23)
   3062 GEN_TWOVEC_TEST(movi_4h_0x5A_lsl0,  "movi v22.4h,  #0x5A, LSL #0", 22, 23)
   3063 GEN_TWOVEC_TEST(movi_4h_0xA5_lsl8,  "movi v22.4h,  #0xA5, LSL #8", 22, 23)
   3064 GEN_TWOVEC_TEST(mvni_8h_0x5A_lsl0,  "mvni v22.8h,  #0x5A, LSL #0", 22, 23)
   3065 GEN_TWOVEC_TEST(mvni_8h_0xA5_lsl8,  "mvni v22.8h,  #0xA5, LSL #8", 22, 23)
   3066 GEN_TWOVEC_TEST(mvni_4h_0x5A_lsl0,  "mvni v22.4h,  #0x5A, LSL #0", 22, 23)
   3067 GEN_TWOVEC_TEST(mvni_4h_0xA5_lsl8,  "mvni v22.4h,  #0xA5, LSL #8", 22, 23)
   3068 
   3069 GEN_TWOVEC_TEST(movi_4s_0x5A_lsl0,  "movi v22.4s,  #0x5A, LSL #0",  22, 23)
   3070 GEN_TWOVEC_TEST(movi_4s_0x6B_lsl8,  "movi v22.4s,  #0x6B, LSL #8",  22, 23)
   3071 GEN_TWOVEC_TEST(movi_4s_0x49_lsl16, "movi v22.4s,  #0x49, LSL #16", 22, 23)
   3072 GEN_TWOVEC_TEST(movi_4s_0x3D_lsl24, "movi v22.4s,  #0x3D, LSL #24", 22, 23)
   3073 GEN_TWOVEC_TEST(movi_2s_0x5A_lsl0,  "movi v22.2s,  #0x5A, LSL #0",  22, 23)
   3074 GEN_TWOVEC_TEST(movi_2s_0x6B_lsl8,  "movi v22.2s,  #0x6B, LSL #8",  22, 23)
   3075 GEN_TWOVEC_TEST(movi_2s_0x49_lsl16, "movi v22.2s,  #0x49, LSL #16", 22, 23)
   3076 GEN_TWOVEC_TEST(movi_2s_0x3D_lsl24, "movi v22.2s,  #0x3D, LSL #24", 22, 23)
   3077 GEN_TWOVEC_TEST(mvni_4s_0x5A_lsl0,  "mvni v22.4s,  #0x5A, LSL #0",  22, 23)
   3078 GEN_TWOVEC_TEST(mvni_4s_0x6B_lsl8,  "mvni v22.4s,  #0x6B, LSL #8",  22, 23)
   3079 GEN_TWOVEC_TEST(mvni_4s_0x49_lsl16, "mvni v22.4s,  #0x49, LSL #16", 22, 23)
   3080 GEN_TWOVEC_TEST(mvni_4s_0x3D_lsl24, "mvni v22.4s,  #0x3D, LSL #24", 22, 23)
   3081 GEN_TWOVEC_TEST(mvni_2s_0x5A_lsl0,  "mvni v22.2s,  #0x5A, LSL #0",  22, 23)
   3082 GEN_TWOVEC_TEST(mvni_2s_0x6B_lsl8,  "mvni v22.2s,  #0x6B, LSL #8",  22, 23)
   3083 GEN_TWOVEC_TEST(mvni_2s_0x49_lsl16, "mvni v22.2s,  #0x49, LSL #16", 22, 23)
   3084 GEN_TWOVEC_TEST(mvni_2s_0x3D_lsl24, "mvni v22.2s,  #0x3D, LSL #24", 22, 23)
   3085 
   3086 /* overkill -- don't need two vecs, only one */
   3087 GEN_TWOVEC_TEST(movi_4s_0x6B_msl8,  "movi v22.4s,  #0x6B, MSL #8", 22, 23)
   3088 GEN_TWOVEC_TEST(movi_4s_0x94_msl16, "movi v22.4s,  #0x94, MSL #16", 22, 23)
   3089 GEN_TWOVEC_TEST(movi_2s_0x7A_msl8,  "movi v22.2s,  #0x7A, MSL #8", 22, 23)
   3090 GEN_TWOVEC_TEST(movi_2s_0xA5_msl16, "movi v22.2s,  #0xA5, MSL #16", 22, 23)
   3091 GEN_TWOVEC_TEST(mvni_4s_0x6B_msl8,  "mvni v22.4s,  #0x6B, MSL #8", 22, 23)
   3092 GEN_TWOVEC_TEST(mvni_4s_0x94_msl16, "mvni v22.4s,  #0x94, MSL #16", 22, 23)
   3093 GEN_TWOVEC_TEST(mvni_2s_0x7A_msl8,  "mvni v22.2s,  #0x7A, MSL #8", 22, 23)
   3094 GEN_TWOVEC_TEST(mvni_2s_0xA5_msl16, "mvni v22.2s,  #0xA5, MSL #16", 22, 23)
   3095 
   3096 GEN_TWOVEC_TEST(movi_d_0xA5,  "movi d22,    #0xFF00FF0000FF00FF", 22, 23)
   3097 GEN_TWOVEC_TEST(movi_2d_0xB4, "movi v22.2d, #0xFF00FFFF00FF0000", 22, 23)
   3098 
   3099 GEN_UNARY_TEST(not, 16b, 16b)
   3100 GEN_UNARY_TEST(not, 8b,  8b)
   3101 
   3102 GEN_BINARY_TEST(pmul, 16b, 16b, 16b)
   3103 GEN_BINARY_TEST(pmul, 8b, 8b, 8b)
   3104 
   3105 GEN_BINARY_TEST(pmull,  8h, 8b,  8b)
   3106 GEN_BINARY_TEST(pmull2, 8h, 16b, 16b)
   3107 //GEN_BINARY_TEST(pmull,  1q, 1d,  1d)
   3108 //GEN_BINARY_TEST(pmull,  1q, 2d,  2d)
   3109 
   3110 GEN_UNARY_TEST(rbit, 16b, 16b)
   3111 GEN_UNARY_TEST(rbit, 8b, 8b)
   3112 GEN_UNARY_TEST(rev16, 16b, 16b)
   3113 GEN_UNARY_TEST(rev16, 8b, 8b)
   3114 GEN_UNARY_TEST(rev32, 16b, 16b)
   3115 GEN_UNARY_TEST(rev32, 8b, 8b)
   3116 GEN_UNARY_TEST(rev32, 8h, 8h)
   3117 GEN_UNARY_TEST(rev32, 4h, 4h)
   3118 GEN_UNARY_TEST(rev64, 16b, 16b)
   3119 GEN_UNARY_TEST(rev64, 8b, 8b)
   3120 GEN_UNARY_TEST(rev64, 8h, 8h)
   3121 GEN_UNARY_TEST(rev64, 4h, 4h)
   3122 GEN_UNARY_TEST(rev64, 4s, 4s)
   3123 GEN_UNARY_TEST(rev64, 2s, 2s)
   3124 
   3125 GEN_BINARY_TEST(saba, 4s, 4s, 4s)
   3126 GEN_BINARY_TEST(saba, 2s, 2s, 2s)
   3127 GEN_BINARY_TEST(saba, 8h, 8h, 8h)
   3128 GEN_BINARY_TEST(saba, 4h, 4h, 4h)
   3129 GEN_BINARY_TEST(saba, 16b, 16b, 16b)
   3130 GEN_BINARY_TEST(saba, 8b, 8b, 8b)
   3131 GEN_BINARY_TEST(uaba, 4s, 4s, 4s)
   3132 GEN_BINARY_TEST(uaba, 2s, 2s, 2s)
   3133 GEN_BINARY_TEST(uaba, 8h, 8h, 8h)
   3134 GEN_BINARY_TEST(uaba, 4h, 4h, 4h)
   3135 GEN_BINARY_TEST(uaba, 16b, 16b, 16b)
   3136 GEN_BINARY_TEST(uaba, 8b, 8b, 8b)
   3137 
   3138 GEN_THREEVEC_TEST(sabal_2d_2s_2s,  "sabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3139 GEN_THREEVEC_TEST(sabal2_2d_4s_4s, "sabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3140 GEN_THREEVEC_TEST(sabal_4s_4h_4h,  "sabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3141 GEN_THREEVEC_TEST(sabal2_4s_8h_8h, "sabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3142 GEN_THREEVEC_TEST(sabal_8h_8b_8b,  "sabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3143 GEN_THREEVEC_TEST(sabal2_8h_16b_16b,
   3144                                    "sabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3145 GEN_THREEVEC_TEST(uabal_2d_2s_2s,  "uabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3146 GEN_THREEVEC_TEST(uabal2_2d_4s_4s, "uabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3147 GEN_THREEVEC_TEST(uabal_4s_4h_4h,  "uabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3148 GEN_THREEVEC_TEST(uabal2_4s_8h_8h, "uabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3149 GEN_THREEVEC_TEST(uabal_8h_8b_8b,  "uabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3150 GEN_THREEVEC_TEST(uabal2_8h_16b_16b,
   3151                                    "uabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3152 
   3153 GEN_THREEVEC_TEST(sabd_4s_4s_4s,    "sabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3154 GEN_THREEVEC_TEST(sabd_2s_2s_2s,    "sabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3155 GEN_THREEVEC_TEST(sabd_8h_8h_8h,    "sabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3156 GEN_THREEVEC_TEST(sabd_4h_4h_4h,    "sabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3157 GEN_THREEVEC_TEST(sabd_16b_16b_16b, "sabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3158 GEN_THREEVEC_TEST(sabd_8b_8b_8b,    "sabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3159 GEN_THREEVEC_TEST(uabd_4s_4s_4s,    "uabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3160 GEN_THREEVEC_TEST(uabd_2s_2s_2s,    "uabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3161 GEN_THREEVEC_TEST(uabd_8h_8h_8h,    "uabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3162 GEN_THREEVEC_TEST(uabd_4h_4h_4h,    "uabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3163 GEN_THREEVEC_TEST(uabd_16b_16b_16b, "uabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3164 GEN_THREEVEC_TEST(uabd_8b_8b_8b,    "uabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3165 
   3166 GEN_THREEVEC_TEST(sabdl_2d_2s_2s,  "sabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3167 GEN_THREEVEC_TEST(sabdl2_2d_4s_4s, "sabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3168 GEN_THREEVEC_TEST(sabdl_4s_4h_4h,  "sabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3169 GEN_THREEVEC_TEST(sabdl2_4s_8h_8h, "sabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3170 GEN_THREEVEC_TEST(sabdl_8h_8b_8b,  "sabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3171 GEN_THREEVEC_TEST(sabdl2_8h_16b_16b,
   3172                                    "sabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3173 GEN_THREEVEC_TEST(uabdl_2d_2s_2s,  "uabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3174 GEN_THREEVEC_TEST(uabdl2_2d_4s_4s, "uabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3175 GEN_THREEVEC_TEST(uabdl_4s_4h_4h,  "uabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3176 GEN_THREEVEC_TEST(uabdl2_4s_8h_8h, "uabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3177 GEN_THREEVEC_TEST(uabdl_8h_8b_8b,  "uabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3178 GEN_THREEVEC_TEST(uabdl2_8h_16b_16b,
   3179                                    "uabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3180 
   3181 GEN_TWOVEC_TEST(sadalp_4h_8b,  "sadalp v3.4h, v19.8b",  3, 19)
   3182 GEN_TWOVEC_TEST(sadalp_8h_16b, "sadalp v3.8h, v19.16b", 3, 19)
   3183 GEN_TWOVEC_TEST(sadalp_2s_4h,  "sadalp v3.2s, v19.4h",  3, 19)
   3184 GEN_TWOVEC_TEST(sadalp_4s_8h,  "sadalp v3.4s, v19.8h",  3, 19)
   3185 GEN_TWOVEC_TEST(sadalp_1d_2s,  "sadalp v3.1d, v19.2s",  3, 19)
   3186 GEN_TWOVEC_TEST(sadalp_2d_4s,  "sadalp v3.2d, v19.4s",  3, 19)
   3187 GEN_TWOVEC_TEST(uadalp_4h_8b,  "uadalp v3.4h, v19.8b",  3, 19)
   3188 GEN_TWOVEC_TEST(uadalp_8h_16b, "uadalp v3.8h, v19.16b", 3, 19)
   3189 GEN_TWOVEC_TEST(uadalp_2s_4h,  "uadalp v3.2s, v19.4h",  3, 19)
   3190 GEN_TWOVEC_TEST(uadalp_4s_8h,  "uadalp v3.4s, v19.8h",  3, 19)
   3191 GEN_TWOVEC_TEST(uadalp_1d_2s,  "uadalp v3.1d, v19.2s",  3, 19)
   3192 GEN_TWOVEC_TEST(uadalp_2d_4s,  "uadalp v3.2d, v19.4s",  3, 19)
   3193 
   3194 GEN_THREEVEC_TEST(saddl_2d_2s_2s,  "saddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3195 GEN_THREEVEC_TEST(saddl2_2d_4s_4s, "saddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3196 GEN_THREEVEC_TEST(saddl_4s_4h_4h,  "saddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3197 GEN_THREEVEC_TEST(saddl2_4s_8h_8h, "saddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3198 GEN_THREEVEC_TEST(saddl_8h_8b_8b,  "saddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3199 GEN_THREEVEC_TEST(saddl2_8h_16b_16b,
   3200                                    "saddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3201 GEN_THREEVEC_TEST(uaddl_2d_2s_2s,  "uaddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3202 GEN_THREEVEC_TEST(uaddl2_2d_4s_4s, "uaddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3203 GEN_THREEVEC_TEST(uaddl_4s_4h_4h,  "uaddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3204 GEN_THREEVEC_TEST(uaddl2_4s_8h_8h, "uaddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3205 GEN_THREEVEC_TEST(uaddl_8h_8b_8b,  "uaddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3206 GEN_THREEVEC_TEST(uaddl2_8h_16b_16b,
   3207                                    "uaddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3208 GEN_THREEVEC_TEST(ssubl_2d_2s_2s,  "ssubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3209 GEN_THREEVEC_TEST(ssubl2_2d_4s_4s, "ssubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3210 GEN_THREEVEC_TEST(ssubl_4s_4h_4h,  "ssubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3211 GEN_THREEVEC_TEST(ssubl2_4s_8h_8h, "ssubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3212 GEN_THREEVEC_TEST(ssubl_8h_8b_8b,  "ssubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3213 GEN_THREEVEC_TEST(ssubl2_8h_16b_16b,
   3214                                    "ssubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3215 GEN_THREEVEC_TEST(usubl_2d_2s_2s,  "usubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3216 GEN_THREEVEC_TEST(usubl2_2d_4s_4s, "usubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3217 GEN_THREEVEC_TEST(usubl_4s_4h_4h,  "usubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3218 GEN_THREEVEC_TEST(usubl2_4s_8h_8h, "usubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3219 GEN_THREEVEC_TEST(usubl_8h_8b_8b,  "usubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3220 GEN_THREEVEC_TEST(usubl2_8h_16b_16b,
   3221                                    "usubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3222 
   3223 GEN_TWOVEC_TEST(saddlp_4h_8b,  "saddlp v3.4h, v19.8b",  3, 19)
   3224 GEN_TWOVEC_TEST(saddlp_8h_16b, "saddlp v3.8h, v19.16b", 3, 19)
   3225 GEN_TWOVEC_TEST(saddlp_2s_4h,  "saddlp v3.2s, v19.4h",  3, 19)
   3226 GEN_TWOVEC_TEST(saddlp_4s_8h,  "saddlp v3.4s, v19.8h",  3, 19)
   3227 GEN_TWOVEC_TEST(saddlp_1d_2s,  "saddlp v3.1d, v19.2s",  3, 19)
   3228 GEN_TWOVEC_TEST(saddlp_2d_4s,  "saddlp v3.2d, v19.4s",  3, 19)
   3229 GEN_TWOVEC_TEST(uaddlp_4h_8b,  "uaddlp v3.4h, v19.8b",  3, 19)
   3230 GEN_TWOVEC_TEST(uaddlp_8h_16b, "uaddlp v3.8h, v19.16b", 3, 19)
   3231 GEN_TWOVEC_TEST(uaddlp_2s_4h,  "uaddlp v3.2s, v19.4h",  3, 19)
   3232 GEN_TWOVEC_TEST(uaddlp_4s_8h,  "uaddlp v3.4s, v19.8h",  3, 19)
   3233 GEN_TWOVEC_TEST(uaddlp_1d_2s,  "uaddlp v3.1d, v19.2s",  3, 19)
   3234 GEN_TWOVEC_TEST(uaddlp_2d_4s,  "uaddlp v3.2d, v19.4s",  3, 19)
   3235 
   3236 GEN_TWOVEC_TEST(saddlv_h_16b, "saddlv h3, v19.16b",  3, 19)
   3237 GEN_TWOVEC_TEST(saddlv_h_8b,  "saddlv h3, v19.8b",   3, 19)
   3238 GEN_TWOVEC_TEST(saddlv_s_8h,  "saddlv s3, v19.8h",   3, 19)
   3239 GEN_TWOVEC_TEST(saddlv_s_4h,  "saddlv s3, v19.4h",   3, 19)
   3240 GEN_TWOVEC_TEST(saddlv_d_4s,  "saddlv d3, v19.4s",   3, 19)
   3241 GEN_TWOVEC_TEST(uaddlv_h_16b, "uaddlv h3, v19.16b",  3, 19)
   3242 GEN_TWOVEC_TEST(uaddlv_h_8b,  "uaddlv h3, v19.8b",   3, 19)
   3243 GEN_TWOVEC_TEST(uaddlv_s_8h,  "uaddlv s3, v19.8h",   3, 19)
   3244 GEN_TWOVEC_TEST(uaddlv_s_4h,  "uaddlv s3, v19.4h",   3, 19)
   3245 GEN_TWOVEC_TEST(uaddlv_d_4s,  "uaddlv d3, v19.4s",   3, 19)
   3246 
   3247 GEN_THREEVEC_TEST(saddw2_8h_8h_16b, "saddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3248 GEN_THREEVEC_TEST(saddw_8h_8h_8b,   "saddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3249 GEN_THREEVEC_TEST(saddw2_4s_4s_8h,  "saddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3250 GEN_THREEVEC_TEST(saddw_4s_4s_4h,   "saddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3251 GEN_THREEVEC_TEST(saddw2_2d_2d_4s,  "saddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3252 GEN_THREEVEC_TEST(saddw_2d_2d_2s,   "saddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3253 GEN_THREEVEC_TEST(uaddw2_8h_8h_16b, "uaddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3254 GEN_THREEVEC_TEST(uaddw_8h_8h_8b,   "uaddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3255 GEN_THREEVEC_TEST(uaddw2_4s_4s_8h,  "uaddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3256 GEN_THREEVEC_TEST(uaddw_4s_4s_4h,   "uaddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3257 GEN_THREEVEC_TEST(uaddw2_2d_2d_4s,  "uaddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3258 GEN_THREEVEC_TEST(uaddw_2d_2d_2s,   "uaddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3259 GEN_THREEVEC_TEST(ssubw2_8h_8h_16b, "ssubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3260 GEN_THREEVEC_TEST(ssubw_8h_8h_8b,   "ssubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3261 GEN_THREEVEC_TEST(ssubw2_4s_4s_8h,  "ssubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3262 GEN_THREEVEC_TEST(ssubw_4s_4s_4h,   "ssubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3263 GEN_THREEVEC_TEST(ssubw2_2d_2d_4s,  "ssubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3264 GEN_THREEVEC_TEST(ssubw_2d_2d_2s,   "ssubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3265 GEN_THREEVEC_TEST(usubw2_8h_8h_16b, "usubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3266 GEN_THREEVEC_TEST(usubw_8h_8h_8b,   "usubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3267 GEN_THREEVEC_TEST(usubw2_4s_4s_8h,  "usubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3268 GEN_THREEVEC_TEST(usubw_4s_4s_4h,   "usubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3269 GEN_THREEVEC_TEST(usubw2_2d_2d_4s,  "usubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3270 GEN_THREEVEC_TEST(usubw_2d_2d_2s,   "usubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3271 
   3272 GEN_THREEVEC_TEST(shadd_4s_4s_4s,   "shadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3273 GEN_THREEVEC_TEST(shadd_2s_2s_2s,   "shadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3274 GEN_THREEVEC_TEST(shadd_8h_8h_8h,   "shadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3275 GEN_THREEVEC_TEST(shadd_4h_4h_4h,   "shadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3276 GEN_THREEVEC_TEST(shadd_16b_16b_16b,"shadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3277 GEN_THREEVEC_TEST(shadd_8b_8b_8b,   "shadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3278 GEN_THREEVEC_TEST(uhadd_4s_4s_4s,   "uhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3279 GEN_THREEVEC_TEST(uhadd_2s_2s_2s,   "uhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3280 GEN_THREEVEC_TEST(uhadd_8h_8h_8h,   "uhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3281 GEN_THREEVEC_TEST(uhadd_4h_4h_4h,   "uhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3282 GEN_THREEVEC_TEST(uhadd_16b_16b_16b,"uhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3283 GEN_THREEVEC_TEST(uhadd_8b_8b_8b,   "uhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3284 GEN_THREEVEC_TEST(shsub_4s_4s_4s,   "shsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3285 GEN_THREEVEC_TEST(shsub_2s_2s_2s,   "shsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3286 GEN_THREEVEC_TEST(shsub_8h_8h_8h,   "shsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3287 GEN_THREEVEC_TEST(shsub_4h_4h_4h,   "shsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3288 GEN_THREEVEC_TEST(shsub_16b_16b_16b,"shsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3289 GEN_THREEVEC_TEST(shsub_8b_8b_8b,   "shsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3290 GEN_THREEVEC_TEST(uhsub_4s_4s_4s,   "uhsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3291 GEN_THREEVEC_TEST(uhsub_2s_2s_2s,   "uhsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3292 GEN_THREEVEC_TEST(uhsub_8h_8h_8h,   "uhsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3293 GEN_THREEVEC_TEST(uhsub_4h_4h_4h,   "uhsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3294 GEN_THREEVEC_TEST(uhsub_16b_16b_16b,"uhsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3295 GEN_THREEVEC_TEST(uhsub_8b_8b_8b,   "uhsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3296 
   3297 GEN_TWOVEC_TEST(shll_8h_8b_8,   "shll  v3.8h, v24.8b,  #8", 3, 24)
   3298 GEN_TWOVEC_TEST(shll2_8h_16b_8, "shll2 v3.8h, v24.16b, #8", 3, 24)
   3299 GEN_TWOVEC_TEST(shll_4s_4h_16,  "shll  v3.4s, v24.4h, #16", 3, 24)
   3300 GEN_TWOVEC_TEST(shll2_4s_8h_16, "shll2 v3.4s, v24.8h, #16", 3, 24)
   3301 GEN_TWOVEC_TEST(shll_2d_2s_32,  "shll  v3.2d, v24.2s, #32", 3, 24)
   3302 GEN_TWOVEC_TEST(shll2_2d_4s_32, "shll2 v3.2d, v24.4s, #32", 3, 24)
   3303 
   3304 GEN_TWOVEC_TEST(shrn_2s_2d_1,   "shrn  v4.2s,  v29.2d, #1",  4, 29)
   3305 GEN_TWOVEC_TEST(shrn_2s_2d_32,  "shrn  v4.2s,  v29.2d, #32", 4, 29)
   3306 GEN_TWOVEC_TEST(shrn2_4s_2d_1,  "shrn2 v4.4s,  v29.2d, #1",  4, 29)
   3307 GEN_TWOVEC_TEST(shrn2_4s_2d_32, "shrn2 v4.4s,  v29.2d, #32", 4, 29)
   3308 GEN_TWOVEC_TEST(shrn_4h_4s_1,   "shrn  v4.4h,  v29.4s, #1",  4, 29)
   3309 GEN_TWOVEC_TEST(shrn_4h_4s_16,  "shrn  v4.4h,  v29.4s, #16", 4, 29)
   3310 GEN_TWOVEC_TEST(shrn2_8h_4s_1,  "shrn2 v4.8h,  v29.4s, #1",  4, 29)
   3311 GEN_TWOVEC_TEST(shrn2_8h_4s_16, "shrn2 v4.8h,  v29.4s, #16", 4, 29)
   3312 GEN_TWOVEC_TEST(shrn_8b_8h_1,   "shrn  v4.8b,  v29.8h, #1",  4, 29)
   3313 GEN_TWOVEC_TEST(shrn_8b_8h_8,   "shrn  v4.8b,  v29.8h, #8",  4, 29)
   3314 GEN_TWOVEC_TEST(shrn2_16b_8h_1, "shrn2 v4.16b, v29.8h, #1",  4, 29)
   3315 GEN_TWOVEC_TEST(shrn2_16b_8h_8, "shrn2 v4.16b, v29.8h, #8",  4, 29)
   3316 GEN_TWOVEC_TEST(rshrn_2s_2d_1,   "rshrn  v4.2s,  v29.2d, #1",  4, 29)
   3317 GEN_TWOVEC_TEST(rshrn_2s_2d_32,  "rshrn  v4.2s,  v29.2d, #32", 4, 29)
   3318 GEN_TWOVEC_TEST(rshrn2_4s_2d_1,  "rshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3319 GEN_TWOVEC_TEST(rshrn2_4s_2d_32, "rshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3320 GEN_TWOVEC_TEST(rshrn_4h_4s_1,   "rshrn  v4.4h,  v29.4s, #1",  4, 29)
   3321 GEN_TWOVEC_TEST(rshrn_4h_4s_16,  "rshrn  v4.4h,  v29.4s, #16", 4, 29)
   3322 GEN_TWOVEC_TEST(rshrn2_8h_4s_1,  "rshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3323 GEN_TWOVEC_TEST(rshrn2_8h_4s_16, "rshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3324 GEN_TWOVEC_TEST(rshrn_8b_8h_1,   "rshrn  v4.8b,  v29.8h, #1",  4, 29)
   3325 GEN_TWOVEC_TEST(rshrn_8b_8h_8,   "rshrn  v4.8b,  v29.8h, #8",  4, 29)
   3326 GEN_TWOVEC_TEST(rshrn2_16b_8h_1, "rshrn2 v4.16b, v29.8h, #1",  4, 29)
   3327 GEN_TWOVEC_TEST(rshrn2_16b_8h_8, "rshrn2 v4.16b, v29.8h, #8",  4, 29)
   3328 
   3329 GEN_TWOVEC_TEST(sli_d_d_0,  "sli d5, d28, #0",  5, 28)
   3330 GEN_TWOVEC_TEST(sli_d_d_32, "sli d5, d28, #32", 5, 28)
   3331 GEN_TWOVEC_TEST(sli_d_d_63, "sli d5, d28, #63", 5, 28)
   3332 GEN_TWOVEC_TEST(sri_d_d_1,  "sri d5, d28, #1",  5, 28)
   3333 GEN_TWOVEC_TEST(sri_d_d_33, "sri d5, d28, #33", 5, 28)
   3334 GEN_TWOVEC_TEST(sri_d_d_64, "sri d5, d28, #64", 5, 28)
   3335 
   3336 GEN_TWOVEC_TEST(sli_2d_2d_0,   "sli v6.2d,  v27.2d, #0",  6, 27)
   3337 GEN_TWOVEC_TEST(sli_2d_2d_32,  "sli v6.2d,  v27.2d, #32", 6, 27)
   3338 GEN_TWOVEC_TEST(sli_2d_2d_63,  "sli v6.2d,  v27.2d, #63", 6, 27)
   3339 GEN_TWOVEC_TEST(sli_4s_4s_0,   "sli v6.4s,  v27.4s, #0",  6, 27)
   3340 GEN_TWOVEC_TEST(sli_4s_4s_16,  "sli v6.4s,  v27.4s, #16", 6, 27)
   3341 GEN_TWOVEC_TEST(sli_4s_4s_31,  "sli v6.4s,  v27.4s, #31", 6, 27)
   3342 GEN_TWOVEC_TEST(sli_2s_2s_0,   "sli v6.2s,  v27.2s, #0",  6, 27)
   3343 GEN_TWOVEC_TEST(sli_2s_2s_16,  "sli v6.2s,  v27.2s, #16", 6, 27)
   3344 GEN_TWOVEC_TEST(sli_2s_2s_31,  "sli v6.2s,  v27.2s, #31", 6, 27)
   3345 GEN_TWOVEC_TEST(sli_8h_8h_0,   "sli v6.8h,  v27.8h, #0",  6, 27)
   3346 GEN_TWOVEC_TEST(sli_8h_8h_8,   "sli v6.8h,  v27.8h, #8",  6, 27)
   3347 GEN_TWOVEC_TEST(sli_8h_8h_15,  "sli v6.8h,  v27.8h, #15", 6, 27)
   3348 GEN_TWOVEC_TEST(sli_4h_4h_0,   "sli v6.4h,  v27.4h, #0",  6, 27)
   3349 GEN_TWOVEC_TEST(sli_4h_4h_8,   "sli v6.4h,  v27.4h, #8",  6, 27)
   3350 GEN_TWOVEC_TEST(sli_4h_4h_15,  "sli v6.4h,  v27.4h, #15", 6, 27)
   3351 GEN_TWOVEC_TEST(sli_16b_16b_0, "sli v6.16b, v27.16b, #0", 6, 27)
   3352 GEN_TWOVEC_TEST(sli_16b_16b_3, "sli v6.16b, v27.16b, #3", 6, 27)
   3353 GEN_TWOVEC_TEST(sli_16b_16b_7, "sli v6.16b, v27.16b, #7", 6, 27)
   3354 GEN_TWOVEC_TEST(sli_8b_8b_0,   "sli v6.8b,  v27.8b, #0",  6, 27)
   3355 GEN_TWOVEC_TEST(sli_8b_8b_3,   "sli v6.8b,  v27.8b, #3",  6, 27)
   3356 GEN_TWOVEC_TEST(sli_8b_8b_7,   "sli v6.8b,  v27.8b, #7",  6, 27)
   3357 GEN_TWOVEC_TEST(sri_2d_2d_1,   "sri v6.2d,  v27.2d,  #1",  6, 27)
   3358 GEN_TWOVEC_TEST(sri_2d_2d_33,  "sri v6.2d,  v27.2d,  #33", 6, 27)
   3359 GEN_TWOVEC_TEST(sri_2d_2d_64,  "sri v6.2d,  v27.2d,  #64", 6, 27)
   3360 GEN_TWOVEC_TEST(sri_4s_4s_1,   "sri v6.4s,  v27.4s,  #1",  6, 27)
   3361 GEN_TWOVEC_TEST(sri_4s_4s_17,  "sri v6.4s,  v27.4s,  #17", 6, 27)
   3362 GEN_TWOVEC_TEST(sri_4s_4s_32,  "sri v6.4s,  v27.4s,  #32", 6, 27)
   3363 GEN_TWOVEC_TEST(sri_2s_2s_1,   "sri v6.2s,  v27.2s,  #1",  6, 27)
   3364 GEN_TWOVEC_TEST(sri_2s_2s_17,  "sri v6.2s,  v27.2s,  #17", 6, 27)
   3365 GEN_TWOVEC_TEST(sri_2s_2s_32,  "sri v6.2s,  v27.2s,  #32", 6, 27)
   3366 GEN_TWOVEC_TEST(sri_8h_8h_1,   "sri v6.8h,  v27.8h,  #1",  6, 27)
   3367 GEN_TWOVEC_TEST(sri_8h_8h_8,   "sri v6.8h,  v27.8h,  #8",  6, 27)
   3368 GEN_TWOVEC_TEST(sri_8h_8h_16,  "sri v6.8h,  v27.8h,  #16", 6, 27)
   3369 GEN_TWOVEC_TEST(sri_4h_4h_1,   "sri v6.4h,  v27.4h,  #1",  6, 27)
   3370 GEN_TWOVEC_TEST(sri_4h_4h_8,   "sri v6.4h,  v27.4h,  #8",  6, 27)
   3371 GEN_TWOVEC_TEST(sri_4h_4h_16,  "sri v6.4h,  v27.4h,  #16", 6, 27)
   3372 GEN_TWOVEC_TEST(sri_16b_16b_1, "sri v6.16b, v27.16b, #1", 6, 27)
   3373 GEN_TWOVEC_TEST(sri_16b_16b_4, "sri v6.16b, v27.16b, #4", 6, 27)
   3374 GEN_TWOVEC_TEST(sri_16b_16b_8, "sri v6.16b, v27.16b, #8", 6, 27)
   3375 GEN_TWOVEC_TEST(sri_8b_8b_1,   "sri v6.8b,  v27.8b,  #1",  6, 27)
   3376 GEN_TWOVEC_TEST(sri_8b_8b_4,   "sri v6.8b,  v27.8b,  #4",  6, 27)
   3377 GEN_TWOVEC_TEST(sri_8b_8b_8,   "sri v6.8b,  v27.8b,  #8",  6, 27)
   3378 
   3379 GEN_BINARY_TEST(smax, 4s, 4s, 4s)
   3380 GEN_BINARY_TEST(smax, 2s, 2s, 2s)
   3381 GEN_BINARY_TEST(smax, 8h, 8h, 8h)
   3382 GEN_BINARY_TEST(smax, 4h, 4h, 4h)
   3383 GEN_BINARY_TEST(smax, 16b, 16b, 16b)
   3384 GEN_BINARY_TEST(smax, 8b, 8b, 8b)
   3385 GEN_BINARY_TEST(umax, 4s, 4s, 4s)
   3386 GEN_BINARY_TEST(umax, 2s, 2s, 2s)
   3387 GEN_BINARY_TEST(umax, 8h, 8h, 8h)
   3388 GEN_BINARY_TEST(umax, 4h, 4h, 4h)
   3389 GEN_BINARY_TEST(umax, 16b, 16b, 16b)
   3390 GEN_BINARY_TEST(umax, 8b, 8b, 8b)
   3391 GEN_BINARY_TEST(smin, 4s, 4s, 4s)
   3392 GEN_BINARY_TEST(smin, 2s, 2s, 2s)
   3393 GEN_BINARY_TEST(smin, 8h, 8h, 8h)
   3394 GEN_BINARY_TEST(smin, 4h, 4h, 4h)
   3395 GEN_BINARY_TEST(smin, 16b, 16b, 16b)
   3396 GEN_BINARY_TEST(smin, 8b, 8b, 8b)
   3397 GEN_BINARY_TEST(umin, 4s, 4s, 4s)
   3398 GEN_BINARY_TEST(umin, 2s, 2s, 2s)
   3399 GEN_BINARY_TEST(umin, 8h, 8h, 8h)
   3400 GEN_BINARY_TEST(umin, 4h, 4h, 4h)
   3401 GEN_BINARY_TEST(umin, 16b, 16b, 16b)
   3402 GEN_BINARY_TEST(umin, 8b, 8b, 8b)
   3403 
   3404 GEN_BINARY_TEST(smaxp, 4s, 4s, 4s)
   3405 GEN_BINARY_TEST(smaxp, 2s, 2s, 2s)
   3406 GEN_BINARY_TEST(smaxp, 8h, 8h, 8h)
   3407 GEN_BINARY_TEST(smaxp, 4h, 4h, 4h)
   3408 GEN_BINARY_TEST(smaxp, 16b, 16b, 16b)
   3409 GEN_BINARY_TEST(smaxp, 8b, 8b, 8b)
   3410 GEN_BINARY_TEST(umaxp, 4s, 4s, 4s)
   3411 GEN_BINARY_TEST(umaxp, 2s, 2s, 2s)
   3412 GEN_BINARY_TEST(umaxp, 8h, 8h, 8h)
   3413 GEN_BINARY_TEST(umaxp, 4h, 4h, 4h)
   3414 GEN_BINARY_TEST(umaxp, 16b, 16b, 16b)
   3415 GEN_BINARY_TEST(umaxp, 8b, 8b, 8b)
   3416 GEN_BINARY_TEST(sminp, 4s, 4s, 4s)
   3417 GEN_BINARY_TEST(sminp, 2s, 2s, 2s)
   3418 GEN_BINARY_TEST(sminp, 8h, 8h, 8h)
   3419 GEN_BINARY_TEST(sminp, 4h, 4h, 4h)
   3420 GEN_BINARY_TEST(sminp, 16b, 16b, 16b)
   3421 GEN_BINARY_TEST(sminp, 8b, 8b, 8b)
   3422 GEN_BINARY_TEST(uminp, 4s, 4s, 4s)
   3423 GEN_BINARY_TEST(uminp, 2s, 2s, 2s)
   3424 GEN_BINARY_TEST(uminp, 8h, 8h, 8h)
   3425 GEN_BINARY_TEST(uminp, 4h, 4h, 4h)
   3426 GEN_BINARY_TEST(uminp, 16b, 16b, 16b)
   3427 GEN_BINARY_TEST(uminp, 8b, 8b, 8b)
   3428 
   3429 // test_SMAXV is a handwritten function
   3430 // test_UMAXV is a handwritten function
   3431 // test_SMINV is a handwritten function
   3432 // test_UMINV is a handwritten function
   3433 
   3434 GEN_THREEVEC_TEST(smlal_2d_2s_s0,  "smlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3435 GEN_THREEVEC_TEST(smlal_2d_2s_s3,  "smlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3436 GEN_THREEVEC_TEST(smlal2_2d_4s_s1, "smlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3437 GEN_THREEVEC_TEST(smlal2_2d_4s_s2, "smlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3438 GEN_THREEVEC_TEST(smlal_4s_4h_h0,  "smlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3439 GEN_THREEVEC_TEST(smlal_4s_4h_h7,  "smlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3440 GEN_THREEVEC_TEST(smlal2_4s_8h_h1, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3441 GEN_THREEVEC_TEST(smlal2_4s_8h_h4, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3442 GEN_THREEVEC_TEST(umlal_2d_2s_s0,  "umlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3443 GEN_THREEVEC_TEST(umlal_2d_2s_s3,  "umlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3444 GEN_THREEVEC_TEST(umlal2_2d_4s_s1, "umlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3445 GEN_THREEVEC_TEST(umlal2_2d_4s_s2, "umlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3446 GEN_THREEVEC_TEST(umlal_4s_4h_h0,  "umlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3447 GEN_THREEVEC_TEST(umlal_4s_4h_h7,  "umlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3448 GEN_THREEVEC_TEST(umlal2_4s_8h_h1, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3449 GEN_THREEVEC_TEST(umlal2_4s_8h_h4, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3450 GEN_THREEVEC_TEST(smlsl_2d_2s_s0,  "smlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3451 GEN_THREEVEC_TEST(smlsl_2d_2s_s3,  "smlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3452 GEN_THREEVEC_TEST(smlsl2_2d_4s_s1, "smlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3453 GEN_THREEVEC_TEST(smlsl2_2d_4s_s2, "smlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3454 GEN_THREEVEC_TEST(smlsl_4s_4h_h0,  "smlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3455 GEN_THREEVEC_TEST(smlsl_4s_4h_h7,  "smlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3456 GEN_THREEVEC_TEST(smlsl2_4s_8h_h1, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3457 GEN_THREEVEC_TEST(smlsl2_4s_8h_h4, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3458 GEN_THREEVEC_TEST(umlsl_2d_2s_s0,  "umlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3459 GEN_THREEVEC_TEST(umlsl_2d_2s_s3,  "umlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3460 GEN_THREEVEC_TEST(umlsl2_2d_4s_s1, "umlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3461 GEN_THREEVEC_TEST(umlsl2_2d_4s_s2, "umlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3462 GEN_THREEVEC_TEST(umlsl_4s_4h_h0,  "umlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3463 GEN_THREEVEC_TEST(umlsl_4s_4h_h7,  "umlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3464 GEN_THREEVEC_TEST(umlsl2_4s_8h_h1, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3465 GEN_THREEVEC_TEST(umlsl2_4s_8h_h4, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3466 GEN_THREEVEC_TEST(smull_2d_2s_s0,  "smull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3467 GEN_THREEVEC_TEST(smull_2d_2s_s3,  "smull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3468 GEN_THREEVEC_TEST(smull2_2d_4s_s1, "smull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3469 GEN_THREEVEC_TEST(smull2_2d_4s_s2, "smull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3470 GEN_THREEVEC_TEST(smull_4s_4h_h0,  "smull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3471 GEN_THREEVEC_TEST(smull_4s_4h_h7,  "smull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3472 GEN_THREEVEC_TEST(smull2_4s_8h_h1, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3473 GEN_THREEVEC_TEST(smull2_4s_8h_h4, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3474 GEN_THREEVEC_TEST(umull_2d_2s_s0,  "umull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3475 GEN_THREEVEC_TEST(umull_2d_2s_s3,  "umull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3476 GEN_THREEVEC_TEST(umull2_2d_4s_s1, "umull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3477 GEN_THREEVEC_TEST(umull2_2d_4s_s2, "umull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3478 GEN_THREEVEC_TEST(umull_4s_4h_h0,  "umull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3479 GEN_THREEVEC_TEST(umull_4s_4h_h7,  "umull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3480 GEN_THREEVEC_TEST(umull2_4s_8h_h1, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3481 GEN_THREEVEC_TEST(umull2_4s_8h_h4, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3482 
   3483 GEN_THREEVEC_TEST(smlal_2d_2s_2s,  "smlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3484 GEN_THREEVEC_TEST(smlal2_2d_4s_4s, "smlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3485 GEN_THREEVEC_TEST(smlal_4s_4h_4h,  "smlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3486 GEN_THREEVEC_TEST(smlal2_4s_8h_8h, "smlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3487 GEN_THREEVEC_TEST(smlal_8h_8b_8b,  "smlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3488 GEN_THREEVEC_TEST(smlal2_8h_16b_16b,
   3489                                    "smlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3490 GEN_THREEVEC_TEST(umlal_2d_2s_2s,  "umlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3491 GEN_THREEVEC_TEST(umlal2_2d_4s_4s, "umlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3492 GEN_THREEVEC_TEST(umlal_4s_4h_4h,  "umlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3493 GEN_THREEVEC_TEST(umlal2_4s_8h_8h, "umlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3494 GEN_THREEVEC_TEST(umlal_8h_8b_8b,  "umlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3495 GEN_THREEVEC_TEST(umlal2_8h_16b_16b,
   3496                                    "umlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3497 GEN_THREEVEC_TEST(smlsl_2d_2s_2s,  "smlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3498 GEN_THREEVEC_TEST(smlsl2_2d_4s_4s, "smlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3499 GEN_THREEVEC_TEST(smlsl_4s_4h_4h,  "smlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3500 GEN_THREEVEC_TEST(smlsl2_4s_8h_8h, "smlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3501 GEN_THREEVEC_TEST(smlsl_8h_8b_8b,  "smlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3502 GEN_THREEVEC_TEST(smlsl2_8h_16b_16b,
   3503                                    "smlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3504 GEN_THREEVEC_TEST(umlsl_2d_2s_2s,  "umlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3505 GEN_THREEVEC_TEST(umlsl2_2d_4s_4s, "umlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3506 GEN_THREEVEC_TEST(umlsl_4s_4h_4h,  "umlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3507 GEN_THREEVEC_TEST(umlsl2_4s_8h_8h, "umlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3508 GEN_THREEVEC_TEST(umlsl_8h_8b_8b,  "umlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3509 GEN_THREEVEC_TEST(umlsl2_8h_16b_16b,
   3510                                    "umlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3511 GEN_THREEVEC_TEST(smull_2d_2s_2s,  "smull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3512 GEN_THREEVEC_TEST(smull2_2d_4s_4s, "smull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3513 GEN_THREEVEC_TEST(smull_4s_4h_4h,  "smull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3514 GEN_THREEVEC_TEST(smull2_4s_8h_8h, "smull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3515 GEN_THREEVEC_TEST(smull_8h_8b_8b,  "smull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3516 GEN_THREEVEC_TEST(smull2_8h_16b_16b,
   3517                                    "smull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3518 GEN_THREEVEC_TEST(umull_2d_2s_2s,  "umull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3519 GEN_THREEVEC_TEST(umull2_2d_4s_4s, "umull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3520 GEN_THREEVEC_TEST(umull_4s_4h_4h,  "umull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3521 GEN_THREEVEC_TEST(umull2_4s_8h_8h, "umull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3522 GEN_THREEVEC_TEST(umull_8h_8b_8b,  "umull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3523 GEN_THREEVEC_TEST(umull2_8h_16b_16b,
   3524                                    "umull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3525 
   3526 GEN_ONEINT_ONEVEC_TEST(umov_x_d0,  "umov x9, v10.d[0]", 9, 10)
   3527 GEN_ONEINT_ONEVEC_TEST(umov_x_d1,  "umov x9, v10.d[1]", 9, 10)
   3528 GEN_ONEINT_ONEVEC_TEST(umov_w_s0,  "umov w9, v10.s[0]", 9, 10)
   3529 GEN_ONEINT_ONEVEC_TEST(umov_w_s3,  "umov w9, v10.s[3]", 9, 10)
   3530 GEN_ONEINT_ONEVEC_TEST(umov_w_h0,  "umov w9, v10.h[0]", 9, 10)
   3531 GEN_ONEINT_ONEVEC_TEST(umov_w_h7,  "umov w9, v10.h[7]", 9, 10)
   3532 GEN_ONEINT_ONEVEC_TEST(umov_w_b0,  "umov w9, v10.b[0]", 9, 10)
   3533 GEN_ONEINT_ONEVEC_TEST(umov_w_b15, "umov w9, v10.b[15]", 9, 10)
   3534 GEN_ONEINT_ONEVEC_TEST(smov_x_s0,  "smov x9, v10.s[0]", 9, 10)
   3535 GEN_ONEINT_ONEVEC_TEST(smov_x_s3,  "smov x9, v10.s[3]", 9, 10)
   3536 GEN_ONEINT_ONEVEC_TEST(smov_x_h0,  "smov x9, v10.h[0]", 9, 10)
   3537 GEN_ONEINT_ONEVEC_TEST(smov_x_h7,  "smov x9, v10.h[7]", 9, 10)
   3538 GEN_ONEINT_ONEVEC_TEST(smov_w_h0,  "smov w9, v10.h[0]", 9, 10)
   3539 GEN_ONEINT_ONEVEC_TEST(smov_w_h7,  "smov w9, v10.h[7]", 9, 10)
   3540 GEN_ONEINT_ONEVEC_TEST(smov_x_b0,  "smov x9, v10.b[0]", 9, 10)
   3541 GEN_ONEINT_ONEVEC_TEST(smov_x_b15, "smov x9, v10.b[15]", 9, 10)
   3542 GEN_ONEINT_ONEVEC_TEST(smov_w_b0,  "smov w9, v10.b[0]", 9, 10)
   3543 GEN_ONEINT_ONEVEC_TEST(smov_w_b15, "smov w9, v10.b[15]", 9, 10)
   3544 
   3545 GEN_TWOVEC_TEST(sqabs_d_d, "sqabs d7, d30", 7, 30)
   3546 GEN_TWOVEC_TEST(sqabs_s_s, "sqabs s7, s30", 7, 30)
   3547 GEN_TWOVEC_TEST(sqabs_h_h, "sqabs h7, h30", 7, 30)
   3548 GEN_TWOVEC_TEST(sqabs_b_b, "sqabs b7, b30", 7, 30)
   3549 GEN_TWOVEC_TEST(sqneg_d_d, "sqneg d7, d30", 7, 30)
   3550 GEN_TWOVEC_TEST(sqneg_s_s, "sqneg s7, s30", 7, 30)
   3551 GEN_TWOVEC_TEST(sqneg_h_h, "sqneg h7, h30", 7, 30)
   3552 GEN_TWOVEC_TEST(sqneg_b_b, "sqneg b7, b30", 7, 30)
   3553 
   3554 GEN_UNARY_TEST(sqabs, 2d, 2d)
   3555 GEN_UNARY_TEST(sqabs, 4s, 4s)
   3556 GEN_UNARY_TEST(sqabs, 2s, 2s)
   3557 GEN_UNARY_TEST(sqabs, 8h, 8h)
   3558 GEN_UNARY_TEST(sqabs, 4h, 4h)
   3559 GEN_UNARY_TEST(sqabs, 16b, 16b)
   3560 GEN_UNARY_TEST(sqabs, 8b, 8b)
   3561 GEN_UNARY_TEST(sqneg, 2d, 2d)
   3562 GEN_UNARY_TEST(sqneg, 4s, 4s)
   3563 GEN_UNARY_TEST(sqneg, 2s, 2s)
   3564 GEN_UNARY_TEST(sqneg, 8h, 8h)
   3565 GEN_UNARY_TEST(sqneg, 4h, 4h)
   3566 GEN_UNARY_TEST(sqneg, 16b, 16b)
   3567 GEN_UNARY_TEST(sqneg, 8b, 8b)
   3568 
   3569 GEN_THREEVEC_TEST(sqadd_d_d_d, "sqadd d1, d2, d4", 1, 2, 4)
   3570 GEN_THREEVEC_TEST(sqadd_s_s_s, "sqadd s1, s2, s4", 1, 2, 4)
   3571 GEN_THREEVEC_TEST(sqadd_h_h_h, "sqadd h1, h2, h4", 1, 2, 4)
   3572 GEN_THREEVEC_TEST(sqadd_b_b_b, "sqadd b1, b2, b4", 1, 2, 4)
   3573 GEN_THREEVEC_TEST(uqadd_d_d_d, "uqadd d1, d2, d4", 1, 2, 4)
   3574 GEN_THREEVEC_TEST(uqadd_s_s_s, "uqadd s1, s2, s4", 1, 2, 4)
   3575 GEN_THREEVEC_TEST(uqadd_h_h_h, "uqadd h1, h2, h4", 1, 2, 4)
   3576 GEN_THREEVEC_TEST(uqadd_b_b_b, "uqadd b1, b2, b4", 1, 2, 4)
   3577 GEN_THREEVEC_TEST(sqsub_d_d_d, "sqsub d1, d2, d4", 1, 2, 4)
   3578 GEN_THREEVEC_TEST(sqsub_s_s_s, "sqsub s1, s2, s4", 1, 2, 4)
   3579 GEN_THREEVEC_TEST(sqsub_h_h_h, "sqsub h1, h2, h4", 1, 2, 4)
   3580 GEN_THREEVEC_TEST(sqsub_b_b_b, "sqsub b1, b2, b4", 1, 2, 4)
   3581 GEN_THREEVEC_TEST(uqsub_d_d_d, "uqsub d1, d2, d4", 1, 2, 4)
   3582 GEN_THREEVEC_TEST(uqsub_s_s_s, "uqsub s1, s2, s4", 1, 2, 4)
   3583 GEN_THREEVEC_TEST(uqsub_h_h_h, "uqsub h1, h2, h4", 1, 2, 4)
   3584 GEN_THREEVEC_TEST(uqsub_b_b_b, "uqsub b1, b2, b4", 1, 2, 4)
   3585 
   3586 GEN_THREEVEC_TEST(sqadd_2d_2d_2d,    "sqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3587 GEN_THREEVEC_TEST(sqadd_4s_4s_4s,    "sqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3588 GEN_THREEVEC_TEST(sqadd_2s_2s_2s,    "sqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3589 GEN_THREEVEC_TEST(sqadd_8h_8h_8h,    "sqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3590 GEN_THREEVEC_TEST(sqadd_4h_4h_4h,    "sqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3591 GEN_THREEVEC_TEST(sqadd_16b_16b_16b, "sqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3592 GEN_THREEVEC_TEST(sqadd_8b_8b_8b,    "sqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3593 GEN_THREEVEC_TEST(uqadd_2d_2d_2d,    "uqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3594 GEN_THREEVEC_TEST(uqadd_4s_4s_4s,    "uqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3595 GEN_THREEVEC_TEST(uqadd_2s_2s_2s,    "uqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3596 GEN_THREEVEC_TEST(uqadd_8h_8h_8h,    "uqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3597 GEN_THREEVEC_TEST(uqadd_4h_4h_4h,    "uqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3598 GEN_THREEVEC_TEST(uqadd_16b_16b_16b, "uqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3599 GEN_THREEVEC_TEST(uqadd_8b_8b_8b,    "uqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3600 GEN_THREEVEC_TEST(sqsub_2d_2d_2d,    "sqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3601 GEN_THREEVEC_TEST(sqsub_4s_4s_4s,    "sqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3602 GEN_THREEVEC_TEST(sqsub_2s_2s_2s,    "sqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3603 GEN_THREEVEC_TEST(sqsub_8h_8h_8h,    "sqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3604 GEN_THREEVEC_TEST(sqsub_4h_4h_4h,    "sqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3605 GEN_THREEVEC_TEST(sqsub_16b_16b_16b, "sqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3606 GEN_THREEVEC_TEST(sqsub_8b_8b_8b,    "sqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3607 GEN_THREEVEC_TEST(uqsub_2d_2d_2d,    "uqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3608 GEN_THREEVEC_TEST(uqsub_4s_4s_4s,    "uqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3609 GEN_THREEVEC_TEST(uqsub_2s_2s_2s,    "uqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3610 GEN_THREEVEC_TEST(uqsub_8h_8h_8h,    "uqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3611 GEN_THREEVEC_TEST(uqsub_4h_4h_4h,    "uqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3612 GEN_THREEVEC_TEST(uqsub_16b_16b_16b, "uqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3613 GEN_THREEVEC_TEST(uqsub_8b_8b_8b,    "uqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3614 
   3615 GEN_THREEVEC_TEST(sqdmlal_d_s_s0, "sqdmlal d31, s30, v29.s[0]", 31,30,29)
   3616 GEN_THREEVEC_TEST(sqdmlal_d_s_s3, "sqdmlal d31, s30, v29.s[3]", 31,30,29)
   3617 GEN_THREEVEC_TEST(sqdmlal_s_h_h1, "sqdmlal s31, h30, v13.h[1]", 31,30,13)
   3618 GEN_THREEVEC_TEST(sqdmlal_s_h_h5, "sqdmlal s31, h30, v13.h[5]", 31,30,13)
   3619 GEN_THREEVEC_TEST(sqdmlsl_d_s_s0, "sqdmlsl d31, s30, v29.s[0]", 31,30,29)
   3620 GEN_THREEVEC_TEST(sqdmlsl_d_s_s3, "sqdmlsl d31, s30, v29.s[3]", 31,30,29)
   3621 GEN_THREEVEC_TEST(sqdmlsl_s_h_h1, "sqdmlsl s31, h30, v13.h[1]", 31,30,13)
   3622 GEN_THREEVEC_TEST(sqdmlsl_s_h_h5, "sqdmlsl s31, h30, v13.h[5]", 31,30,13)
   3623 GEN_THREEVEC_TEST(sqdmull_d_s_s0, "sqdmull d31, s30, v29.s[0]", 31,30,29)
   3624 GEN_THREEVEC_TEST(sqdmull_d_s_s3, "sqdmull d31, s30, v29.s[3]", 31,30,29)
   3625 GEN_THREEVEC_TEST(sqdmull_s_h_h1, "sqdmull s31, h30, v13.h[1]", 31,30,13)
   3626 GEN_THREEVEC_TEST(sqdmull_s_h_h5, "sqdmull s31, h30, v13.h[5]", 31,30,13)
   3627 
   3628 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s0, "sqdmlal  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3629 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s3, "sqdmlal  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3630 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s1,"sqdmlal2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3631 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s2,"sqdmlal2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3632 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h0, "sqdmlal  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3633 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h7, "sqdmlal  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3634 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h1,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3635 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h4,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3636 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s0, "sqdmlsl  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3637 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s3, "sqdmlsl  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3638 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s1,"sqdmlsl2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3639 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s2,"sqdmlsl2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3640 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h0, "sqdmlsl  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3641 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h7, "sqdmlsl  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3642 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h1,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3643 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h4,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3644 GEN_THREEVEC_TEST(sqdmull_2d_2s_s0, "sqdmull  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3645 GEN_THREEVEC_TEST(sqdmull_2d_2s_s3, "sqdmull  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3646 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s1,"sqdmull2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3647 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s2,"sqdmull2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3648 GEN_THREEVEC_TEST(sqdmull_4s_4h_h0, "sqdmull  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3649 GEN_THREEVEC_TEST(sqdmull_4s_4h_h7, "sqdmull  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3650 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h1,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3651 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h4,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3652 
   3653 GEN_THREEVEC_TEST(sqdmlal_d_s_s, "sqdmlal d0, s8, s16", 0, 8, 16)
   3654 GEN_THREEVEC_TEST(sqdmlal_s_h_h, "sqdmlal s0, h8, h16", 0, 8, 16)
   3655 GEN_THREEVEC_TEST(sqdmlsl_d_s_s, "sqdmlsl d0, s8, s16", 0, 8, 16)
   3656 GEN_THREEVEC_TEST(sqdmlsl_s_h_h, "sqdmlsl s0, h8, h16", 0, 8, 16)
   3657 GEN_THREEVEC_TEST(sqdmull_d_s_s, "sqdmull d0, s8, s16", 0, 8, 16)
   3658 GEN_THREEVEC_TEST(sqdmull_s_h_h, "sqdmull s0, h8, h16", 0, 8, 16)
   3659 
   3660 GEN_THREEVEC_TEST(sqdmlal_2d_2s_2s,  "sqdmlal  v2.2d, v11.2s, v29.2s", 2,11,29)
   3661 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_4s, "sqdmlal2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3662 GEN_THREEVEC_TEST(sqdmlal_4s_4h_4h,  "sqdmlal  v2.4s, v11.4h, v29.4h", 2,11,29)
   3663 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_8h, "sqdmlal2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3664 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_2s,  "sqdmlsl  v2.2d, v11.2s, v29.2s", 2,11,29)
   3665 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_4s, "sqdmlsl2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3666 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_4h,  "sqdmlsl  v2.4s, v11.4h, v29.4h", 2,11,29)
   3667 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_8h, "sqdmlsl2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3668 GEN_THREEVEC_TEST(sqdmull_2d_2s_2s,  "sqdmull  v2.2d, v11.2s, v29.2s", 2,11,29)
   3669 GEN_THREEVEC_TEST(sqdmull2_2d_4s_4s, "sqdmull2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3670 GEN_THREEVEC_TEST(sqdmull_4s_4h_4h,  "sqdmull  v2.4s, v11.4h, v29.4h", 2,11,29)
   3671 GEN_THREEVEC_TEST(sqdmull2_4s_8h_8h, "sqdmull2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3672 
   3673 GEN_THREEVEC_TEST(sqdmulh_s_s_s1, "sqdmulh s0, s1, v2.s[1]", 0,1,2)
   3674 GEN_THREEVEC_TEST(sqdmulh_s_s_s3, "sqdmulh s0, s1, v2.s[3]", 0,1,2)
   3675 GEN_THREEVEC_TEST(sqdmulh_h_h_h2, "sqdmulh h0, h1, v2.h[2]", 0,1,2)
   3676 GEN_THREEVEC_TEST(sqdmulh_h_h_h7, "sqdmulh h0, h1, v2.h[7]", 0,1,2)
   3677 GEN_THREEVEC_TEST(sqrdmulh_s_s_s1, "sqrdmulh s0, s1, v2.s[1]", 0,1,2)
   3678 GEN_THREEVEC_TEST(sqrdmulh_s_s_s3, "sqrdmulh s0, s1, v2.s[3]", 0,1,2)
   3679 GEN_THREEVEC_TEST(sqrdmulh_h_h_h2, "sqrdmulh h0, h1, v2.h[2]", 0,1,2)
   3680 GEN_THREEVEC_TEST(sqrdmulh_h_h_h7, "sqrdmulh h0, h1, v2.h[7]", 0,1,2)
   3681 
   3682 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s1, "sqdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3683 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s3, "sqdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3684 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s1, "sqdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3685 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s3, "sqdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3686 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h2, "sqdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3687 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h7, "sqdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3688 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h2, "sqdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3689 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h7, "sqdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3690 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s1, "sqrdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3691 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s3, "sqrdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3692 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s1, "sqrdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3693 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s3, "sqrdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3694 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h2, "sqrdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3695 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h7, "sqrdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3696 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h2, "sqrdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3697 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h7, "sqrdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3698 
   3699 GEN_THREEVEC_TEST(sqdmulh_s_s_s,  "sqdmulh  s1, s2, s4", 1, 2, 4)
   3700 GEN_THREEVEC_TEST(sqdmulh_h_h_h,  "sqdmulh  h1, h2, h4", 1, 2, 4)
   3701 GEN_THREEVEC_TEST(sqrdmulh_s_s_s, "sqrdmulh s1, s2, s4", 1, 2, 4)
   3702 GEN_THREEVEC_TEST(sqrdmulh_h_h_h, "sqrdmulh h1, h2, h4", 1, 2, 4)
   3703 
   3704 GEN_THREEVEC_TEST(sqdmulh_4s_4s_4s, "sqdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3705 GEN_THREEVEC_TEST(sqdmulh_2s_2s_2s, "sqdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3706 GEN_THREEVEC_TEST(sqdmulh_8h_8h_8h, "sqdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3707 GEN_THREEVEC_TEST(sqdmulh_4h_4h_4h, "sqdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3708 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_4s, "sqrdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3709 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_2s, "sqrdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3710 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_8h, "sqrdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3711 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_4h, "sqrdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3712 
   3713 GEN_THREEVEC_TEST(sqshl_d_d_d, "sqshl d1, d2, d4", 1, 2, 4)
   3714 GEN_THREEVEC_TEST(sqshl_s_s_s, "sqshl s1, s2, s4", 1, 2, 4)
   3715 GEN_THREEVEC_TEST(sqshl_h_h_h, "sqshl h1, h2, h4", 1, 2, 4)
   3716 GEN_THREEVEC_TEST(sqshl_b_b_b, "sqshl b1, b2, b4", 1, 2, 4)
   3717 GEN_THREEVEC_TEST(uqshl_d_d_d, "uqshl d1, d2, d4", 1, 2, 4)
   3718 GEN_THREEVEC_TEST(uqshl_s_s_s, "uqshl s1, s2, s4", 1, 2, 4)
   3719 GEN_THREEVEC_TEST(uqshl_h_h_h, "uqshl h1, h2, h4", 1, 2, 4)
   3720 GEN_THREEVEC_TEST(uqshl_b_b_b, "uqshl b1, b2, b4", 1, 2, 4)
   3721 GEN_THREEVEC_TEST(sqrshl_d_d_d, "sqrshl d1, d2, d4", 1, 2, 4)
   3722 GEN_THREEVEC_TEST(sqrshl_s_s_s, "sqrshl s1, s2, s4", 1, 2, 4)
   3723 GEN_THREEVEC_TEST(sqrshl_h_h_h, "sqrshl h1, h2, h4", 1, 2, 4)
   3724 GEN_THREEVEC_TEST(sqrshl_b_b_b, "sqrshl b1, b2, b4", 1, 2, 4)
   3725 GEN_THREEVEC_TEST(uqrshl_d_d_d, "uqrshl d1, d2, d4", 1, 2, 4)
   3726 GEN_THREEVEC_TEST(uqrshl_s_s_s, "uqrshl s1, s2, s4", 1, 2, 4)
   3727 GEN_THREEVEC_TEST(uqrshl_h_h_h, "uqrshl h1, h2, h4", 1, 2, 4)
   3728 GEN_THREEVEC_TEST(uqrshl_b_b_b, "uqrshl b1, b2, b4", 1, 2, 4)
   3729 
   3730 GEN_THREEVEC_TEST(sqshl_2d_2d_2d,    "sqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3731 GEN_THREEVEC_TEST(sqshl_4s_4s_4s,    "sqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3732 GEN_THREEVEC_TEST(sqshl_2s_2s_2s,    "sqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3733 GEN_THREEVEC_TEST(sqshl_8h_8h_8h,    "sqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3734 GEN_THREEVEC_TEST(sqshl_4h_4h_4h,    "sqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3735 GEN_THREEVEC_TEST(sqshl_16b_16b_16b, "sqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3736 GEN_THREEVEC_TEST(sqshl_8b_8b_8b,    "sqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3737 GEN_THREEVEC_TEST(uqshl_2d_2d_2d,    "uqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3738 GEN_THREEVEC_TEST(uqshl_4s_4s_4s,    "uqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3739 GEN_THREEVEC_TEST(uqshl_2s_2s_2s,    "uqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3740 GEN_THREEVEC_TEST(uqshl_8h_8h_8h,    "uqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3741 GEN_THREEVEC_TEST(uqshl_4h_4h_4h,    "uqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3742 GEN_THREEVEC_TEST(uqshl_16b_16b_16b, "uqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3743 GEN_THREEVEC_TEST(uqshl_8b_8b_8b,    "uqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3744 GEN_THREEVEC_TEST(sqrshl_2d_2d_2d,    "sqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3745 GEN_THREEVEC_TEST(sqrshl_4s_4s_4s,    "sqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3746 GEN_THREEVEC_TEST(sqrshl_2s_2s_2s,    "sqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3747 GEN_THREEVEC_TEST(sqrshl_8h_8h_8h,    "sqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3748 GEN_THREEVEC_TEST(sqrshl_4h_4h_4h,    "sqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3749 GEN_THREEVEC_TEST(sqrshl_16b_16b_16b, "sqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3750 GEN_THREEVEC_TEST(sqrshl_8b_8b_8b,    "sqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3751 GEN_THREEVEC_TEST(uqrshl_2d_2d_2d,    "uqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3752 GEN_THREEVEC_TEST(uqrshl_4s_4s_4s,    "uqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3753 GEN_THREEVEC_TEST(uqrshl_2s_2s_2s,    "uqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3754 GEN_THREEVEC_TEST(uqrshl_8h_8h_8h,    "uqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3755 GEN_THREEVEC_TEST(uqrshl_4h_4h_4h,    "uqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3756 GEN_THREEVEC_TEST(uqrshl_16b_16b_16b, "uqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3757 GEN_THREEVEC_TEST(uqrshl_8b_8b_8b,    "uqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3758 
   3759 GEN_TWOVEC_TEST(sqrshrn_s_d_1,  "sqrshrn s2, d5, #1",  2, 5)
   3760 GEN_TWOVEC_TEST(sqrshrn_s_d_17, "sqrshrn s2, d5, #17", 2, 5)
   3761 GEN_TWOVEC_TEST(sqrshrn_s_d_32, "sqrshrn s2, d5, #32", 2, 5)
   3762 GEN_TWOVEC_TEST(sqrshrn_h_s_1,  "sqrshrn h2, s5, #1",  2, 5)
   3763 GEN_TWOVEC_TEST(sqrshrn_h_s_9,  "sqrshrn h2, s5, #9",  2, 5)
   3764 GEN_TWOVEC_TEST(sqrshrn_h_s_16, "sqrshrn h2, s5, #16", 2, 5)
   3765 GEN_TWOVEC_TEST(sqrshrn_b_h_1,  "sqrshrn b2, h5, #1",  2, 5)
   3766 GEN_TWOVEC_TEST(sqrshrn_b_h_4,  "sqrshrn b2, h5, #4",  2, 5)
   3767 GEN_TWOVEC_TEST(sqrshrn_b_h_8,  "sqrshrn b2, h5, #8",  2, 5)
   3768 GEN_TWOVEC_TEST(uqrshrn_s_d_1,  "uqrshrn s2, d5, #1",  2, 5)
   3769 GEN_TWOVEC_TEST(uqrshrn_s_d_17, "uqrshrn s2, d5, #17", 2, 5)
   3770 GEN_TWOVEC_TEST(uqrshrn_s_d_32, "uqrshrn s2, d5, #32", 2, 5)
   3771 GEN_TWOVEC_TEST(uqrshrn_h_s_1,  "uqrshrn h2, s5, #1",  2, 5)
   3772 GEN_TWOVEC_TEST(uqrshrn_h_s_9,  "uqrshrn h2, s5, #9",  2, 5)
   3773 GEN_TWOVEC_TEST(uqrshrn_h_s_16, "uqrshrn h2, s5, #16", 2, 5)
   3774 GEN_TWOVEC_TEST(uqrshrn_b_h_1,  "uqrshrn b2, h5, #1",  2, 5)
   3775 GEN_TWOVEC_TEST(uqrshrn_b_h_4,  "uqrshrn b2, h5, #4",  2, 5)
   3776 GEN_TWOVEC_TEST(uqrshrn_b_h_8,  "uqrshrn b2, h5, #8",  2, 5)
   3777 GEN_TWOVEC_TEST(sqshrn_s_d_1,  "sqshrn s2, d5, #1",  2, 5)
   3778 GEN_TWOVEC_TEST(sqshrn_s_d_17, "sqshrn s2, d5, #17", 2, 5)
   3779 GEN_TWOVEC_TEST(sqshrn_s_d_32, "sqshrn s2, d5, #32", 2, 5)
   3780 GEN_TWOVEC_TEST(sqshrn_h_s_1,  "sqshrn h2, s5, #1",  2, 5)
   3781 GEN_TWOVEC_TEST(sqshrn_h_s_9,  "sqshrn h2, s5, #9",  2, 5)
   3782 GEN_TWOVEC_TEST(sqshrn_h_s_16, "sqshrn h2, s5, #16", 2, 5)
   3783 GEN_TWOVEC_TEST(sqshrn_b_h_1,  "sqshrn b2, h5, #1",  2, 5)
   3784 GEN_TWOVEC_TEST(sqshrn_b_h_4,  "sqshrn b2, h5, #4",  2, 5)
   3785 GEN_TWOVEC_TEST(sqshrn_b_h_8,  "sqshrn b2, h5, #8",  2, 5)
   3786 GEN_TWOVEC_TEST(uqshrn_s_d_1,  "uqshrn s2, d5, #1",  2, 5)
   3787 GEN_TWOVEC_TEST(uqshrn_s_d_17, "uqshrn s2, d5, #17", 2, 5)
   3788 GEN_TWOVEC_TEST(uqshrn_s_d_32, "uqshrn s2, d5, #32", 2, 5)
   3789 GEN_TWOVEC_TEST(uqshrn_h_s_1,  "uqshrn h2, s5, #1",  2, 5)
   3790 GEN_TWOVEC_TEST(uqshrn_h_s_9,  "uqshrn h2, s5, #9",  2, 5)
   3791 GEN_TWOVEC_TEST(uqshrn_h_s_16, "uqshrn h2, s5, #16", 2, 5)
   3792 GEN_TWOVEC_TEST(uqshrn_b_h_1,  "uqshrn b2, h5, #1",  2, 5)
   3793 GEN_TWOVEC_TEST(uqshrn_b_h_4,  "uqshrn b2, h5, #4",  2, 5)
   3794 GEN_TWOVEC_TEST(uqshrn_b_h_8,  "uqshrn b2, h5, #8",  2, 5)
   3795 GEN_TWOVEC_TEST(sqrshrun_s_d_1,  "sqrshrun s2, d5, #1",  2, 5)
   3796 GEN_TWOVEC_TEST(sqrshrun_s_d_17, "sqrshrun s2, d5, #17", 2, 5)
   3797 GEN_TWOVEC_TEST(sqrshrun_s_d_32, "sqrshrun s2, d5, #32", 2, 5)
   3798 GEN_TWOVEC_TEST(sqrshrun_h_s_1,  "sqrshrun h2, s5, #1",  2, 5)
   3799 GEN_TWOVEC_TEST(sqrshrun_h_s_9,  "sqrshrun h2, s5, #9",  2, 5)
   3800 GEN_TWOVEC_TEST(sqrshrun_h_s_16, "sqrshrun h2, s5, #16", 2, 5)
   3801 GEN_TWOVEC_TEST(sqrshrun_b_h_1,  "sqrshrun b2, h5, #1",  2, 5)
   3802 GEN_TWOVEC_TEST(sqrshrun_b_h_4,  "sqrshrun b2, h5, #4",  2, 5)
   3803 GEN_TWOVEC_TEST(sqrshrun_b_h_8,  "sqrshrun b2, h5, #8",  2, 5)
   3804 GEN_TWOVEC_TEST(sqshrun_s_d_1,  "sqshrun s2, d5, #1",  2, 5)
   3805 GEN_TWOVEC_TEST(sqshrun_s_d_17, "sqshrun s2, d5, #17", 2, 5)
   3806 GEN_TWOVEC_TEST(sqshrun_s_d_32, "sqshrun s2, d5, #32", 2, 5)
   3807 GEN_TWOVEC_TEST(sqshrun_h_s_1,  "sqshrun h2, s5, #1",  2, 5)
   3808 GEN_TWOVEC_TEST(sqshrun_h_s_9,  "sqshrun h2, s5, #9",  2, 5)
   3809 GEN_TWOVEC_TEST(sqshrun_h_s_16, "sqshrun h2, s5, #16", 2, 5)
   3810 GEN_TWOVEC_TEST(sqshrun_b_h_1,  "sqshrun b2, h5, #1",  2, 5)
   3811 GEN_TWOVEC_TEST(sqshrun_b_h_4,  "sqshrun b2, h5, #4",  2, 5)
   3812 GEN_TWOVEC_TEST(sqshrun_b_h_8,  "sqshrun b2, h5, #8",  2, 5)
   3813 
   3814 GEN_TWOVEC_TEST(sqrshrn_2s_2d_1,   "sqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3815 GEN_TWOVEC_TEST(sqrshrn_2s_2d_17,  "sqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3816 GEN_TWOVEC_TEST(sqrshrn_2s_2d_32,  "sqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3817 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_1,  "sqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3818 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_17, "sqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3819 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_32, "sqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3820 GEN_TWOVEC_TEST(sqrshrn_4h_4s_1,   "sqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3821 GEN_TWOVEC_TEST(sqrshrn_4h_4s_9,   "sqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3822 GEN_TWOVEC_TEST(sqrshrn_4h_4s_16,  "sqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3823 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_1,  "sqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3824 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_9,  "sqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3825 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_16, "sqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3826 GEN_TWOVEC_TEST(sqrshrn_8b_8h_1,   "sqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3827 GEN_TWOVEC_TEST(sqrshrn_8b_8h_4,   "sqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3828 GEN_TWOVEC_TEST(sqrshrn_8b_8h_8,   "sqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3829 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_1, "sqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3830 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_4, "sqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3831 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_8, "sqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3832 GEN_TWOVEC_TEST(uqrshrn_2s_2d_1,   "uqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3833 GEN_TWOVEC_TEST(uqrshrn_2s_2d_17,  "uqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3834 GEN_TWOVEC_TEST(uqrshrn_2s_2d_32,  "uqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3835 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_1,  "uqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3836 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_17, "uqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3837 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_32, "uqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3838 GEN_TWOVEC_TEST(uqrshrn_4h_4s_1,   "uqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3839 GEN_TWOVEC_TEST(uqrshrn_4h_4s_9,   "uqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3840 GEN_TWOVEC_TEST(uqrshrn_4h_4s_16,  "uqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3841 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_1,  "uqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3842 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_9,  "uqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3843 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_16, "uqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3844 GEN_TWOVEC_TEST(uqrshrn_8b_8h_1,   "uqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3845 GEN_TWOVEC_TEST(uqrshrn_8b_8h_4,   "uqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3846 GEN_TWOVEC_TEST(uqrshrn_8b_8h_8,   "uqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3847 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_1, "uqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3848 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_4, "uqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3849 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_8, "uqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3850 GEN_TWOVEC_TEST(sqshrn_2s_2d_1,   "sqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3851 GEN_TWOVEC_TEST(sqshrn_2s_2d_17,  "sqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3852 GEN_TWOVEC_TEST(sqshrn_2s_2d_32,  "sqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3853 GEN_TWOVEC_TEST(sqshrn2_4s_2d_1,  "sqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3854 GEN_TWOVEC_TEST(sqshrn2_4s_2d_17, "sqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3855 GEN_TWOVEC_TEST(sqshrn2_4s_2d_32, "sqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3856 GEN_TWOVEC_TEST(sqshrn_4h_4s_1,   "sqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3857 GEN_TWOVEC_TEST(sqshrn_4h_4s_9,   "sqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3858 GEN_TWOVEC_TEST(sqshrn_4h_4s_16,  "sqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3859 GEN_TWOVEC_TEST(sqshrn2_8h_4s_1,  "sqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3860 GEN_TWOVEC_TEST(sqshrn2_8h_4s_9,  "sqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3861 GEN_TWOVEC_TEST(sqshrn2_8h_4s_16, "sqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3862 GEN_TWOVEC_TEST(sqshrn_8b_8h_1,   "sqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3863 GEN_TWOVEC_TEST(sqshrn_8b_8h_4,   "sqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3864 GEN_TWOVEC_TEST(sqshrn_8b_8h_8,   "sqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3865 GEN_TWOVEC_TEST(sqshrn2_16b_8h_1, "sqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3866 GEN_TWOVEC_TEST(sqshrn2_16b_8h_4, "sqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3867 GEN_TWOVEC_TEST(sqshrn2_16b_8h_8, "sqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3868 GEN_TWOVEC_TEST(uqshrn_2s_2d_1,   "uqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3869 GEN_TWOVEC_TEST(uqshrn_2s_2d_17,  "uqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3870 GEN_TWOVEC_TEST(uqshrn_2s_2d_32,  "uqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3871 GEN_TWOVEC_TEST(uqshrn2_4s_2d_1,  "uqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3872 GEN_TWOVEC_TEST(uqshrn2_4s_2d_17, "uqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3873 GEN_TWOVEC_TEST(uqshrn2_4s_2d_32, "uqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3874 GEN_TWOVEC_TEST(uqshrn_4h_4s_1,   "uqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3875 GEN_TWOVEC_TEST(uqshrn_4h_4s_9,   "uqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3876 GEN_TWOVEC_TEST(uqshrn_4h_4s_16,  "uqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3877 GEN_TWOVEC_TEST(uqshrn2_8h_4s_1,  "uqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3878 GEN_TWOVEC_TEST(uqshrn2_8h_4s_9,  "uqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3879 GEN_TWOVEC_TEST(uqshrn2_8h_4s_16, "uqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3880 GEN_TWOVEC_TEST(uqshrn_8b_8h_1,   "uqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3881 GEN_TWOVEC_TEST(uqshrn_8b_8h_4,   "uqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3882 GEN_TWOVEC_TEST(uqshrn_8b_8h_8,   "uqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3883 GEN_TWOVEC_TEST(uqshrn2_16b_8h_1, "uqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3884 GEN_TWOVEC_TEST(uqshrn2_16b_8h_4, "uqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3885 GEN_TWOVEC_TEST(uqshrn2_16b_8h_8, "uqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3886 GEN_TWOVEC_TEST(sqrshrun_2s_2d_1,   "sqrshrun  v4.2s,  v29.2d, #1",  4, 29)
   3887 GEN_TWOVEC_TEST(sqrshrun_2s_2d_17,  "sqrshrun  v4.2s,  v29.2d, #17", 4, 29)
   3888 GEN_TWOVEC_TEST(sqrshrun_2s_2d_32,  "sqrshrun  v4.2s,  v29.2d, #32", 4, 29)
   3889 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_1,  "sqrshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3890 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_17, "sqrshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3891 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_32, "sqrshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3892 GEN_TWOVEC_TEST(sqrshrun_4h_4s_1,   "sqrshrun  v4.4h,  v29.4s, #1",  4, 29)
   3893 GEN_TWOVEC_TEST(sqrshrun_4h_4s_9,   "sqrshrun  v4.4h,  v29.4s, #9",  4, 29)
   3894 GEN_TWOVEC_TEST(sqrshrun_4h_4s_16,  "sqrshrun  v4.4h,  v29.4s, #16", 4, 29)
   3895 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_1,  "sqrshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3896 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_9,  "sqrshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3897 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_16, "sqrshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3898 GEN_TWOVEC_TEST(sqrshrun_8b_8h_1,   "sqrshrun  v4.8b,  v29.8h, #1",  4, 29)
   3899 GEN_TWOVEC_TEST(sqrshrun_8b_8h_4,   "sqrshrun  v4.8b,  v29.8h, #4",  4, 29)
   3900 GEN_TWOVEC_TEST(sqrshrun_8b_8h_8,   "sqrshrun  v4.8b,  v29.8h, #8",  4, 29)
   3901 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_1, "sqrshrun2 v4.16b, v29.8h, #1",  4, 29)
   3902 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_4, "sqrshrun2 v4.16b, v29.8h, #4",  4, 29)
   3903 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_8, "sqrshrun2 v4.16b, v29.8h, #8",  4, 29)
   3904 GEN_TWOVEC_TEST(sqshrun_2s_2d_1,   "sqshrun  v4.2s,  v29.2d, #1",  4, 29)
   3905 GEN_TWOVEC_TEST(sqshrun_2s_2d_17,  "sqshrun  v4.2s,  v29.2d, #17", 4, 29)
   3906 GEN_TWOVEC_TEST(sqshrun_2s_2d_32,  "sqshrun  v4.2s,  v29.2d, #32", 4, 29)
   3907 GEN_TWOVEC_TEST(sqshrun2_4s_2d_1,  "sqshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3908 GEN_TWOVEC_TEST(sqshrun2_4s_2d_17, "sqshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3909 GEN_TWOVEC_TEST(sqshrun2_4s_2d_32, "sqshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3910 GEN_TWOVEC_TEST(sqshrun_4h_4s_1,   "sqshrun  v4.4h,  v29.4s, #1",  4, 29)
   3911 GEN_TWOVEC_TEST(sqshrun_4h_4s_9,   "sqshrun  v4.4h,  v29.4s, #9",  4, 29)
   3912 GEN_TWOVEC_TEST(sqshrun_4h_4s_16,  "sqshrun  v4.4h,  v29.4s, #16", 4, 29)
   3913 GEN_TWOVEC_TEST(sqshrun2_8h_4s_1,  "sqshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3914 GEN_TWOVEC_TEST(sqshrun2_8h_4s_9,  "sqshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3915 GEN_TWOVEC_TEST(sqshrun2_8h_4s_16, "sqshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3916 GEN_TWOVEC_TEST(sqshrun_8b_8h_1,   "sqshrun  v4.8b,  v29.8h, #1",  4, 29)
   3917 GEN_TWOVEC_TEST(sqshrun_8b_8h_4,   "sqshrun  v4.8b,  v29.8h, #4",  4, 29)
   3918 GEN_TWOVEC_TEST(sqshrun_8b_8h_8,   "sqshrun  v4.8b,  v29.8h, #8",  4, 29)
   3919 GEN_TWOVEC_TEST(sqshrun2_16b_8h_1, "sqshrun2 v4.16b, v29.8h, #1",  4, 29)
   3920 GEN_TWOVEC_TEST(sqshrun2_16b_8h_4, "sqshrun2 v4.16b, v29.8h, #4",  4, 29)
   3921 GEN_TWOVEC_TEST(sqshrun2_16b_8h_8, "sqshrun2 v4.16b, v29.8h, #8",  4, 29)
   3922 
   3923 GEN_TWOVEC_TEST(sqshl_d_d_0,  "sqshl d5, d28, #0",  5, 28)
   3924 GEN_TWOVEC_TEST(sqshl_d_d_32, "sqshl d5, d28, #32", 5, 28)
   3925 GEN_TWOVEC_TEST(sqshl_d_d_63, "sqshl d5, d28, #63", 5, 28)
   3926 GEN_TWOVEC_TEST(sqshl_s_s_0,  "sqshl s5, s28, #0",  5, 28)
   3927 GEN_TWOVEC_TEST(sqshl_s_s_16, "sqshl s5, s28, #16", 5, 28)
   3928 GEN_TWOVEC_TEST(sqshl_s_s_31, "sqshl s5, s28, #31", 5, 28)
   3929 GEN_TWOVEC_TEST(sqshl_h_h_0,  "sqshl h5, h28, #0",  5, 28)
   3930 GEN_TWOVEC_TEST(sqshl_h_h_8,  "sqshl h5, h28, #8",  5, 28)
   3931 GEN_TWOVEC_TEST(sqshl_h_h_15, "sqshl h5, h28, #15", 5, 28)
   3932 GEN_TWOVEC_TEST(sqshl_b_b_0,  "sqshl b5, b28, #0",  5, 28)
   3933 GEN_TWOVEC_TEST(sqshl_b_b_1,  "sqshl b5, b28, #1",  5, 28)
   3934 GEN_TWOVEC_TEST(sqshl_b_b_4,  "sqshl b5, b28, #4",  5, 28)
   3935 GEN_TWOVEC_TEST(sqshl_b_b_6,  "sqshl b5, b28, #6",  5, 28)
   3936 GEN_TWOVEC_TEST(sqshl_b_b_7,  "sqshl b5, b28, #7",  5, 28)
   3937 GEN_TWOVEC_TEST(uqshl_d_d_0,  "uqshl d5, d28, #0",  5, 28)
   3938 GEN_TWOVEC_TEST(uqshl_d_d_32, "uqshl d5, d28, #32", 5, 28)
   3939 GEN_TWOVEC_TEST(uqshl_d_d_63, "uqshl d5, d28, #63", 5, 28)
   3940 GEN_TWOVEC_TEST(uqshl_s_s_0,  "uqshl s5, s28, #0",  5, 28)
   3941 GEN_TWOVEC_TEST(uqshl_s_s_16, "uqshl s5, s28, #16", 5, 28)
   3942 GEN_TWOVEC_TEST(uqshl_s_s_31, "uqshl s5, s28, #31", 5, 28)
   3943 GEN_TWOVEC_TEST(uqshl_h_h_0,  "uqshl h5, h28, #0",  5, 28)
   3944 GEN_TWOVEC_TEST(uqshl_h_h_8,  "uqshl h5, h28, #8",  5, 28)
   3945 GEN_TWOVEC_TEST(uqshl_h_h_15, "uqshl h5, h28, #15", 5, 28)
   3946 GEN_TWOVEC_TEST(uqshl_b_b_0,  "uqshl b5, b28, #0",  5, 28)
   3947 GEN_TWOVEC_TEST(uqshl_b_b_1,  "uqshl b5, b28, #1",  5, 28)
   3948 GEN_TWOVEC_TEST(uqshl_b_b_4,  "uqshl b5, b28, #4",  5, 28)
   3949 GEN_TWOVEC_TEST(uqshl_b_b_6,  "uqshl b5, b28, #6",  5, 28)
   3950 GEN_TWOVEC_TEST(uqshl_b_b_7,  "uqshl b5, b28, #7",  5, 28)
   3951 GEN_TWOVEC_TEST(sqshlu_d_d_0,  "sqshlu d5, d28, #0",  5, 28)
   3952 GEN_TWOVEC_TEST(sqshlu_d_d_32, "sqshlu d5, d28, #32", 5, 28)
   3953 GEN_TWOVEC_TEST(sqshlu_d_d_63, "sqshlu d5, d28, #63", 5, 28)
   3954 GEN_TWOVEC_TEST(sqshlu_s_s_0,  "sqshlu s5, s28, #0",  5, 28)
   3955 GEN_TWOVEC_TEST(sqshlu_s_s_16, "sqshlu s5, s28, #16", 5, 28)
   3956 GEN_TWOVEC_TEST(sqshlu_s_s_31, "sqshlu s5, s28, #31", 5, 28)
   3957 GEN_TWOVEC_TEST(sqshlu_h_h_0,  "sqshlu h5, h28, #0",  5, 28)
   3958 GEN_TWOVEC_TEST(sqshlu_h_h_8,  "sqshlu h5, h28, #8",  5, 28)
   3959 GEN_TWOVEC_TEST(sqshlu_h_h_15, "sqshlu h5, h28, #15", 5, 28)
   3960 GEN_TWOVEC_TEST(sqshlu_b_b_0,  "sqshlu b5, b28, #0",  5, 28)
   3961 GEN_TWOVEC_TEST(sqshlu_b_b_1,  "sqshlu b5, b28, #1",  5, 28)
   3962 GEN_TWOVEC_TEST(sqshlu_b_b_2,  "sqshlu b5, b28, #2",  5, 28)
   3963 GEN_TWOVEC_TEST(sqshlu_b_b_3,  "sqshlu b5, b28, #3",  5, 28)
   3964 GEN_TWOVEC_TEST(sqshlu_b_b_4,  "sqshlu b5, b28, #4",  5, 28)
   3965 GEN_TWOVEC_TEST(sqshlu_b_b_5,  "sqshlu b5, b28, #5",  5, 28)
   3966 GEN_TWOVEC_TEST(sqshlu_b_b_6,  "sqshlu b5, b28, #6",  5, 28)
   3967 GEN_TWOVEC_TEST(sqshlu_b_b_7,  "sqshlu b5, b28, #7",  5, 28)
   3968 
   3969 GEN_TWOVEC_TEST(sqshl_2d_2d_0,   "sqshl v6.2d,  v27.2d, #0",  6, 27)
   3970 GEN_TWOVEC_TEST(sqshl_2d_2d_32,  "sqshl v6.2d,  v27.2d, #32", 6, 27)
   3971 GEN_TWOVEC_TEST(sqshl_2d_2d_63,  "sqshl v6.2d,  v27.2d, #63", 6, 27)
   3972 GEN_TWOVEC_TEST(sqshl_4s_4s_0,   "sqshl v6.4s,  v27.4s, #0",  6, 27)
   3973 GEN_TWOVEC_TEST(sqshl_4s_4s_16,  "sqshl v6.4s,  v27.4s, #16", 6, 27)
   3974 GEN_TWOVEC_TEST(sqshl_4s_4s_31,  "sqshl v6.4s,  v27.4s, #31", 6, 27)
   3975 GEN_TWOVEC_TEST(sqshl_2s_2s_0,   "sqshl v6.2s,  v27.2s, #0",  6, 27)
   3976 GEN_TWOVEC_TEST(sqshl_2s_2s_16,  "sqshl v6.2s,  v27.2s, #16", 6, 27)
   3977 GEN_TWOVEC_TEST(sqshl_2s_2s_31,  "sqshl v6.2s,  v27.2s, #31", 6, 27)
   3978 GEN_TWOVEC_TEST(sqshl_8h_8h_0,   "sqshl v6.8h,  v27.8h, #0",  6, 27)
   3979 GEN_TWOVEC_TEST(sqshl_8h_8h_8,   "sqshl v6.8h,  v27.8h, #8",  6, 27)
   3980 GEN_TWOVEC_TEST(sqshl_8h_8h_15,  "sqshl v6.8h,  v27.8h, #15", 6, 27)
   3981 GEN_TWOVEC_TEST(sqshl_4h_4h_0,   "sqshl v6.4h,  v27.4h, #0",  6, 27)
   3982 GEN_TWOVEC_TEST(sqshl_4h_4h_8,   "sqshl v6.4h,  v27.4h, #8",  6, 27)
   3983 GEN_TWOVEC_TEST(sqshl_4h_4h_15,  "sqshl v6.4h,  v27.4h, #15", 6, 27)
   3984 GEN_TWOVEC_TEST(sqshl_16b_16b_0, "sqshl v6.16b, v27.16b, #0", 6, 27)
   3985 GEN_TWOVEC_TEST(sqshl_16b_16b_3, "sqshl v6.16b, v27.16b, #3", 6, 27)
   3986 GEN_TWOVEC_TEST(sqshl_16b_16b_7, "sqshl v6.16b, v27.16b, #7", 6, 27)
   3987 GEN_TWOVEC_TEST(sqshl_8b_8b_0,   "sqshl v6.8b,  v27.8b, #0",  6, 27)
   3988 GEN_TWOVEC_TEST(sqshl_8b_8b_3,   "sqshl v6.8b,  v27.8b, #3",  6, 27)
   3989 GEN_TWOVEC_TEST(sqshl_8b_8b_7,   "sqshl v6.8b,  v27.8b, #7",  6, 27)
   3990 GEN_TWOVEC_TEST(uqshl_2d_2d_0,   "uqshl v6.2d,  v27.2d, #0",  6, 27)
   3991 GEN_TWOVEC_TEST(uqshl_2d_2d_32,  "uqshl v6.2d,  v27.2d, #32", 6, 27)
   3992 GEN_TWOVEC_TEST(uqshl_2d_2d_63,  "uqshl v6.2d,  v27.2d, #63", 6, 27)
   3993 GEN_TWOVEC_TEST(uqshl_4s_4s_0,   "uqshl v6.4s,  v27.4s, #0",  6, 27)
   3994 GEN_TWOVEC_TEST(uqshl_4s_4s_16,  "uqshl v6.4s,  v27.4s, #16", 6, 27)
   3995 GEN_TWOVEC_TEST(uqshl_4s_4s_31,  "uqshl v6.4s,  v27.4s, #31", 6, 27)
   3996 GEN_TWOVEC_TEST(uqshl_2s_2s_0,   "uqshl v6.2s,  v27.2s, #0",  6, 27)
   3997 GEN_TWOVEC_TEST(uqshl_2s_2s_16,  "uqshl v6.2s,  v27.2s, #16", 6, 27)
   3998 GEN_TWOVEC_TEST(uqshl_2s_2s_31,  "uqshl v6.2s,  v27.2s, #31", 6, 27)
   3999 GEN_TWOVEC_TEST(uqshl_8h_8h_0,   "uqshl v6.8h,  v27.8h, #0",  6, 27)
   4000 GEN_TWOVEC_TEST(uqshl_8h_8h_8,   "uqshl v6.8h,  v27.8h, #8",  6, 27)
   4001 GEN_TWOVEC_TEST(uqshl_8h_8h_15,  "uqshl v6.8h,  v27.8h, #15", 6, 27)
   4002 GEN_TWOVEC_TEST(uqshl_4h_4h_0,   "uqshl v6.4h,  v27.4h, #0",  6, 27)
   4003 GEN_TWOVEC_TEST(uqshl_4h_4h_8,   "uqshl v6.4h,  v27.4h, #8",  6, 27)
   4004 GEN_TWOVEC_TEST(uqshl_4h_4h_15,  "uqshl v6.4h,  v27.4h, #15", 6, 27)
   4005 GEN_TWOVEC_TEST(uqshl_16b_16b_0, "uqshl v6.16b, v27.16b, #0", 6, 27)
   4006 GEN_TWOVEC_TEST(uqshl_16b_16b_3, "uqshl v6.16b, v27.16b, #3", 6, 27)
   4007 GEN_TWOVEC_TEST(uqshl_16b_16b_7, "uqshl v6.16b, v27.16b, #7", 6, 27)
   4008 GEN_TWOVEC_TEST(uqshl_8b_8b_0,   "uqshl v6.8b,  v27.8b, #0",  6, 27)
   4009 GEN_TWOVEC_TEST(uqshl_8b_8b_3,   "uqshl v6.8b,  v27.8b, #3",  6, 27)
   4010 GEN_TWOVEC_TEST(uqshl_8b_8b_7,   "uqshl v6.8b,  v27.8b, #7",  6, 27)
   4011 GEN_TWOVEC_TEST(sqshlu_2d_2d_0,   "sqshlu v6.2d,  v27.2d, #0",  6, 27)
   4012 GEN_TWOVEC_TEST(sqshlu_2d_2d_32,  "sqshlu v6.2d,  v27.2d, #32", 6, 27)
   4013 GEN_TWOVEC_TEST(sqshlu_2d_2d_63,  "sqshlu v6.2d,  v27.2d, #63", 6, 27)
   4014 GEN_TWOVEC_TEST(sqshlu_4s_4s_0,   "sqshlu v6.4s,  v27.4s, #0",  6, 27)
   4015 GEN_TWOVEC_TEST(sqshlu_4s_4s_16,  "sqshlu v6.4s,  v27.4s, #16", 6, 27)
   4016 GEN_TWOVEC_TEST(sqshlu_4s_4s_31,  "sqshlu v6.4s,  v27.4s, #31", 6, 27)
   4017 GEN_TWOVEC_TEST(sqshlu_2s_2s_0,   "sqshlu v6.2s,  v27.2s, #0",  6, 27)
   4018 GEN_TWOVEC_TEST(sqshlu_2s_2s_16,  "sqshlu v6.2s,  v27.2s, #16", 6, 27)
   4019 GEN_TWOVEC_TEST(sqshlu_2s_2s_31,  "sqshlu v6.2s,  v27.2s, #31", 6, 27)
   4020 GEN_TWOVEC_TEST(sqshlu_8h_8h_0,   "sqshlu v6.8h,  v27.8h, #0",  6, 27)
   4021 GEN_TWOVEC_TEST(sqshlu_8h_8h_8,   "sqshlu v6.8h,  v27.8h, #8",  6, 27)
   4022 GEN_TWOVEC_TEST(sqshlu_8h_8h_15,  "sqshlu v6.8h,  v27.8h, #15", 6, 27)
   4023 GEN_TWOVEC_TEST(sqshlu_4h_4h_0,   "sqshlu v6.4h,  v27.4h, #0",  6, 27)
   4024 GEN_TWOVEC_TEST(sqshlu_4h_4h_8,   "sqshlu v6.4h,  v27.4h, #8",  6, 27)
   4025 GEN_TWOVEC_TEST(sqshlu_4h_4h_15,  "sqshlu v6.4h,  v27.4h, #15", 6, 27)
   4026 GEN_TWOVEC_TEST(sqshlu_16b_16b_0, "sqshlu v6.16b, v27.16b, #0", 6, 27)
   4027 GEN_TWOVEC_TEST(sqshlu_16b_16b_3, "sqshlu v6.16b, v27.16b, #3", 6, 27)
   4028 GEN_TWOVEC_TEST(sqshlu_16b_16b_7, "sqshlu v6.16b, v27.16b, #7", 6, 27)
   4029 GEN_TWOVEC_TEST(sqshlu_8b_8b_0,   "sqshlu v6.8b,  v27.8b, #0",  6, 27)
   4030 GEN_TWOVEC_TEST(sqshlu_8b_8b_3,   "sqshlu v6.8b,  v27.8b, #3",  6, 27)
   4031 GEN_TWOVEC_TEST(sqshlu_8b_8b_7,   "sqshlu v6.8b,  v27.8b, #7",  6, 27)
   4032 
   4033 GEN_TWOVEC_TEST(sqxtn_s_d,  "sqxtn s31,  d0", 31, 0)
   4034 GEN_TWOVEC_TEST(sqxtn_h_s,  "sqxtn h31,  s0", 31, 0)
   4035 GEN_TWOVEC_TEST(sqxtn_b_h,  "sqxtn b31,  h0", 31, 0)
   4036 GEN_TWOVEC_TEST(uqxtn_s_d,  "uqxtn s31,  d0", 31, 0)
   4037 GEN_TWOVEC_TEST(uqxtn_h_s,  "uqxtn h31,  s0", 31, 0)
   4038 GEN_TWOVEC_TEST(uqxtn_b_h,  "uqxtn b31,  h0", 31, 0)
   4039 GEN_TWOVEC_TEST(sqxtun_s_d, "sqxtun s31, d0", 31, 0)
   4040 GEN_TWOVEC_TEST(sqxtun_h_s, "sqxtun h31, s0", 31, 0)
   4041 GEN_TWOVEC_TEST(sqxtun_b_h, "sqxtun b31, h0", 31, 0)
   4042 
   4043 GEN_UNARY_TEST(sqxtn,   2s, 2d)
   4044 GEN_UNARY_TEST(sqxtn2,  4s, 2d)
   4045 GEN_UNARY_TEST(sqxtn,   4h, 4s)
   4046 GEN_UNARY_TEST(sqxtn2,  8h, 4s)
   4047 GEN_UNARY_TEST(sqxtn,   8b, 8h)
   4048 GEN_UNARY_TEST(sqxtn2, 16b, 8h)
   4049 GEN_UNARY_TEST(uqxtn,   2s, 2d)
   4050 GEN_UNARY_TEST(uqxtn2,  4s, 2d)
   4051 GEN_UNARY_TEST(uqxtn,   4h, 4s)
   4052 GEN_UNARY_TEST(uqxtn2,  8h, 4s)
   4053 GEN_UNARY_TEST(uqxtn,   8b, 8h)
   4054 GEN_UNARY_TEST(uqxtn2, 16b, 8h)
   4055 GEN_UNARY_TEST(sqxtun,   2s, 2d)
   4056 GEN_UNARY_TEST(sqxtun2,  4s, 2d)
   4057 GEN_UNARY_TEST(sqxtun,   4h, 4s)
   4058 GEN_UNARY_TEST(sqxtun2,  8h, 4s)
   4059 GEN_UNARY_TEST(sqxtun,   8b, 8h)
   4060 GEN_UNARY_TEST(sqxtun2, 16b, 8h)
   4061 
   4062 GEN_THREEVEC_TEST(srhadd_4s_4s_4s,"srhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4063 GEN_THREEVEC_TEST(srhadd_2s_2s_2s,"srhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4064 GEN_THREEVEC_TEST(srhadd_8h_8h_8h,"srhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4065 GEN_THREEVEC_TEST(srhadd_4h_4h_4h,"srhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4066 GEN_THREEVEC_TEST(srhadd_16b_16b_16b,
   4067                                   "srhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4068 GEN_THREEVEC_TEST(srhadd_8b_8b_8b,"srhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4069 GEN_THREEVEC_TEST(urhadd_4s_4s_4s,"urhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4070 GEN_THREEVEC_TEST(urhadd_2s_2s_2s,"urhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4071 GEN_THREEVEC_TEST(urhadd_8h_8h_8h,"urhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4072 GEN_THREEVEC_TEST(urhadd_4h_4h_4h,"urhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4073 GEN_THREEVEC_TEST(urhadd_16b_16b_16b,
   4074                                   "urhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4075 GEN_THREEVEC_TEST(urhadd_8b_8b_8b,"urhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4076 
   4077 GEN_THREEVEC_TEST(sshl_d_d_d, "sshl d29, d28, d27", 29, 28, 27)
   4078 GEN_THREEVEC_TEST(ushl_d_d_d, "ushl d29, d28, d27", 29, 28, 27)
   4079 
   4080 GEN_THREEVEC_TEST(sshl_2d_2d_2d,    "sshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4081 GEN_THREEVEC_TEST(sshl_4s_4s_4s,    "sshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4082 GEN_THREEVEC_TEST(sshl_2s_2s_2s,    "sshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4083 GEN_THREEVEC_TEST(sshl_8h_8h_8h,    "sshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4084 GEN_THREEVEC_TEST(sshl_4h_4h_4h,    "sshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4085 GEN_THREEVEC_TEST(sshl_16b_16b_16b, "sshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4086 GEN_THREEVEC_TEST(sshl_8b_8b_8b,    "sshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4087 GEN_THREEVEC_TEST(ushl_2d_2d_2d,    "ushl v29.2d, v28.2d, v27.2d", 29,28,27)
   4088 GEN_THREEVEC_TEST(ushl_4s_4s_4s,    "ushl v29.4s, v28.4s, v27.4s", 29,28,27)
   4089 GEN_THREEVEC_TEST(ushl_2s_2s_2s,    "ushl v29.2s, v28.2s, v27.2s", 29,28,27)
   4090 GEN_THREEVEC_TEST(ushl_8h_8h_8h,    "ushl v29.8h, v28.8h, v27.8h", 29,28,27)
   4091 GEN_THREEVEC_TEST(ushl_4h_4h_4h,    "ushl v29.4h, v28.4h, v27.4h", 29,28,27)
   4092 GEN_THREEVEC_TEST(ushl_16b_16b_16b, "ushl v29.16b, v28.16b, v27.16b", 29,28,27)
   4093 GEN_THREEVEC_TEST(ushl_8b_8b_8b,    "ushl v29.8b, v28.8b, v27.8b", 29,28,27)
   4094 
   4095 GEN_TWOVEC_TEST(shl_d_d_0,  "shl d5, d28, #0",  5, 28)
   4096 GEN_TWOVEC_TEST(shl_d_d_32, "shl d5, d28, #32", 5, 28)
   4097 GEN_TWOVEC_TEST(shl_d_d_63, "shl d5, d28, #63", 5, 28)
   4098 GEN_TWOVEC_TEST(sshr_d_d_1,  "sshr d5, d28, #1",  5, 28)
   4099 GEN_TWOVEC_TEST(sshr_d_d_32, "sshr d5, d28, #32", 5, 28)
   4100 GEN_TWOVEC_TEST(sshr_d_d_64, "sshr d5, d28, #64", 5, 28)
   4101 GEN_TWOVEC_TEST(ushr_d_d_1,  "ushr d5, d28, #1",  5, 28)
   4102 GEN_TWOVEC_TEST(ushr_d_d_32, "ushr d5, d28, #32", 5, 28)
   4103 GEN_TWOVEC_TEST(ushr_d_d_64, "ushr d5, d28, #64", 5, 28)
   4104 
   4105 GEN_SHIFT_TEST(shl,  2d, 2d, 0)
   4106 GEN_SHIFT_TEST(shl,  2d, 2d, 13)
   4107 GEN_SHIFT_TEST(shl,  2d, 2d, 63)
   4108 GEN_SHIFT_TEST(shl,  4s, 4s, 0)
   4109 GEN_SHIFT_TEST(shl,  4s, 4s, 13)
   4110 GEN_SHIFT_TEST(shl,  4s, 4s, 31)
   4111 GEN_SHIFT_TEST(shl,  2s, 2s, 0)
   4112 GEN_SHIFT_TEST(shl,  2s, 2s, 13)
   4113 GEN_SHIFT_TEST(shl,  2s, 2s, 31)
   4114 GEN_SHIFT_TEST(shl,  8h, 8h, 0)
   4115 GEN_SHIFT_TEST(shl,  8h, 8h, 13)
   4116 GEN_SHIFT_TEST(shl,  8h, 8h, 15)
   4117 GEN_SHIFT_TEST(shl,  4h, 4h, 0)
   4118 GEN_SHIFT_TEST(shl,  4h, 4h, 13)
   4119 GEN_SHIFT_TEST(shl,  4h, 4h, 15)
   4120 GEN_SHIFT_TEST(shl,  16b, 16b, 0)
   4121 GEN_SHIFT_TEST(shl,  16b, 16b, 7)
   4122 GEN_SHIFT_TEST(shl,  8b, 8b, 0)
   4123 GEN_SHIFT_TEST(shl,  8b, 8b, 7)
   4124 GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
   4125 GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
   4126 GEN_SHIFT_TEST(sshr, 2d, 2d, 64)
   4127 GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
   4128 GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
   4129 GEN_SHIFT_TEST(sshr, 4s, 4s, 32)
   4130 GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
   4131 GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
   4132 GEN_SHIFT_TEST(sshr, 2s, 2s, 32)
   4133 GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
   4134 GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
   4135 GEN_SHIFT_TEST(sshr, 8h, 8h, 16)
   4136 GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
   4137 GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
   4138 GEN_SHIFT_TEST(sshr, 4h, 4h, 16)
   4139 GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
   4140 GEN_SHIFT_TEST(sshr, 16b, 16b, 8)
   4141 GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
   4142 GEN_SHIFT_TEST(sshr, 8b, 8b, 8)
   4143 GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
   4144 GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
   4145 GEN_SHIFT_TEST(ushr, 2d, 2d, 64)
   4146 GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
   4147 GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
   4148 GEN_SHIFT_TEST(ushr, 4s, 4s, 32)
   4149 GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
   4150 GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
   4151 GEN_SHIFT_TEST(ushr, 2s, 2s, 32)
   4152 GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
   4153 GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
   4154 GEN_SHIFT_TEST(ushr, 8h, 8h, 16)
   4155 GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
   4156 GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
   4157 GEN_SHIFT_TEST(ushr, 4h, 4h, 16)
   4158 GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
   4159 GEN_SHIFT_TEST(ushr, 16b, 16b, 8)
   4160 GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
   4161 GEN_SHIFT_TEST(ushr, 8b, 8b, 8)
   4162 
   4163 GEN_TWOVEC_TEST(ssra_d_d_1,  "ssra d5, d28, #1",  5, 28)
   4164 GEN_TWOVEC_TEST(ssra_d_d_32, "ssra d5, d28, #32", 5, 28)
   4165 GEN_TWOVEC_TEST(ssra_d_d_64, "ssra d5, d28, #64", 5, 28)
   4166 GEN_TWOVEC_TEST(usra_d_d_1,  "usra d5, d28, #1",  5, 28)
   4167 GEN_TWOVEC_TEST(usra_d_d_32, "usra d5, d28, #32", 5, 28)
   4168 GEN_TWOVEC_TEST(usra_d_d_64, "usra d5, d28, #64", 5, 28)
   4169 
   4170 GEN_TWOVEC_TEST(ssra_2d_2d_1,   "ssra v6.2d,  v27.2d, #1",  6, 27)
   4171 GEN_TWOVEC_TEST(ssra_2d_2d_32,  "ssra v6.2d,  v27.2d, #32", 6, 27)
   4172 GEN_TWOVEC_TEST(ssra_2d_2d_64,  "ssra v6.2d,  v27.2d, #64", 6, 27)
   4173 GEN_TWOVEC_TEST(ssra_4s_4s_1,   "ssra v6.4s,  v27.4s, #1",  6, 27)
   4174 GEN_TWOVEC_TEST(ssra_4s_4s_16,  "ssra v6.4s,  v27.4s, #16", 6, 27)
   4175 GEN_TWOVEC_TEST(ssra_4s_4s_32,  "ssra v6.4s,  v27.4s, #32", 6, 27)
   4176 GEN_TWOVEC_TEST(ssra_2s_2s_1,   "ssra v6.2s,  v27.2s, #1",  6, 27)
   4177 GEN_TWOVEC_TEST(ssra_2s_2s_16,  "ssra v6.2s,  v27.2s, #16", 6, 27)
   4178 GEN_TWOVEC_TEST(ssra_2s_2s_32,  "ssra v6.2s,  v27.2s, #32", 6, 27)
   4179 GEN_TWOVEC_TEST(ssra_8h_8h_1,   "ssra v6.8h,  v27.8h, #1",  6, 27)
   4180 GEN_TWOVEC_TEST(ssra_8h_8h_8,   "ssra v6.8h,  v27.8h, #8",  6, 27)
   4181 GEN_TWOVEC_TEST(ssra_8h_8h_16,  "ssra v6.8h,  v27.8h, #16", 6, 27)
   4182 GEN_TWOVEC_TEST(ssra_4h_4h_1,   "ssra v6.4h,  v27.4h, #1",  6, 27)
   4183 GEN_TWOVEC_TEST(ssra_4h_4h_8,   "ssra v6.4h,  v27.4h, #8",  6, 27)
   4184 GEN_TWOVEC_TEST(ssra_4h_4h_16,  "ssra v6.4h,  v27.4h, #16", 6, 27)
   4185 GEN_TWOVEC_TEST(ssra_16b_16b_1, "ssra v6.16b, v27.16b, #1", 6, 27)
   4186 GEN_TWOVEC_TEST(ssra_16b_16b_3, "ssra v6.16b, v27.16b, #3", 6, 27)
   4187 GEN_TWOVEC_TEST(ssra_16b_16b_8, "ssra v6.16b, v27.16b, #8", 6, 27)
   4188 GEN_TWOVEC_TEST(ssra_8b_8b_1,   "ssra v6.8b,  v27.8b, #1",  6, 27)
   4189 GEN_TWOVEC_TEST(ssra_8b_8b_3,   "ssra v6.8b,  v27.8b, #3",  6, 27)
   4190 GEN_TWOVEC_TEST(ssra_8b_8b_8,   "ssra v6.8b,  v27.8b, #8",  6, 27)
   4191 GEN_TWOVEC_TEST(usra_2d_2d_1,   "usra v6.2d,  v27.2d, #1",  6, 27)
   4192 GEN_TWOVEC_TEST(usra_2d_2d_32,  "usra v6.2d,  v27.2d, #32", 6, 27)
   4193 GEN_TWOVEC_TEST(usra_2d_2d_64,  "usra v6.2d,  v27.2d, #64", 6, 27)
   4194 GEN_TWOVEC_TEST(usra_4s_4s_1,   "usra v6.4s,  v27.4s, #1",  6, 27)
   4195 GEN_TWOVEC_TEST(usra_4s_4s_16,  "usra v6.4s,  v27.4s, #16", 6, 27)
   4196 GEN_TWOVEC_TEST(usra_4s_4s_32,  "usra v6.4s,  v27.4s, #32", 6, 27)
   4197 GEN_TWOVEC_TEST(usra_2s_2s_1,   "usra v6.2s,  v27.2s, #1",  6, 27)
   4198 GEN_TWOVEC_TEST(usra_2s_2s_16,  "usra v6.2s,  v27.2s, #16", 6, 27)
   4199 GEN_TWOVEC_TEST(usra_2s_2s_32,  "usra v6.2s,  v27.2s, #32", 6, 27)
   4200 GEN_TWOVEC_TEST(usra_8h_8h_1,   "usra v6.8h,  v27.8h, #1",  6, 27)
   4201 GEN_TWOVEC_TEST(usra_8h_8h_8,   "usra v6.8h,  v27.8h, #8",  6, 27)
   4202 GEN_TWOVEC_TEST(usra_8h_8h_16,  "usra v6.8h,  v27.8h, #16", 6, 27)
   4203 GEN_TWOVEC_TEST(usra_4h_4h_1,   "usra v6.4h,  v27.4h, #1",  6, 27)
   4204 GEN_TWOVEC_TEST(usra_4h_4h_8,   "usra v6.4h,  v27.4h, #8",  6, 27)
   4205 GEN_TWOVEC_TEST(usra_4h_4h_16,  "usra v6.4h,  v27.4h, #16", 6, 27)
   4206 GEN_TWOVEC_TEST(usra_16b_16b_1, "usra v6.16b, v27.16b, #1", 6, 27)
   4207 GEN_TWOVEC_TEST(usra_16b_16b_3, "usra v6.16b, v27.16b, #3", 6, 27)
   4208 GEN_TWOVEC_TEST(usra_16b_16b_8, "usra v6.16b, v27.16b, #8", 6, 27)
   4209 GEN_TWOVEC_TEST(usra_8b_8b_1,   "usra v6.8b,  v27.8b, #1",  6, 27)
   4210 GEN_TWOVEC_TEST(usra_8b_8b_3,   "usra v6.8b,  v27.8b, #3",  6, 27)
   4211 GEN_TWOVEC_TEST(usra_8b_8b_8,   "usra v6.8b,  v27.8b, #8",  6, 27)
   4212 
   4213 GEN_THREEVEC_TEST(srshl_d_d_d, "srshl d29, d28, d27", 29, 28, 27)
   4214 GEN_THREEVEC_TEST(urshl_d_d_d, "urshl d29, d28, d27", 29, 28, 27)
   4215 
   4216 GEN_THREEVEC_TEST(srshl_2d_2d_2d,   "srshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4217 GEN_THREEVEC_TEST(srshl_4s_4s_4s,   "srshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4218 GEN_THREEVEC_TEST(srshl_2s_2s_2s,   "srshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4219 GEN_THREEVEC_TEST(srshl_8h_8h_8h,   "srshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4220 GEN_THREEVEC_TEST(srshl_4h_4h_4h,   "srshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4221 GEN_THREEVEC_TEST(srshl_16b_16b_16b,"srshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4222 GEN_THREEVEC_TEST(srshl_8b_8b_8b,   "srshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4223 GEN_THREEVEC_TEST(urshl_2d_2d_2d,   "urshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4224 GEN_THREEVEC_TEST(urshl_4s_4s_4s,   "urshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4225 GEN_THREEVEC_TEST(urshl_2s_2s_2s,   "urshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4226 GEN_THREEVEC_TEST(urshl_8h_8h_8h,   "urshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4227 GEN_THREEVEC_TEST(urshl_4h_4h_4h,   "urshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4228 GEN_THREEVEC_TEST(urshl_16b_16b_16b,"urshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4229 GEN_THREEVEC_TEST(urshl_8b_8b_8b,   "urshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4230 
   4231 GEN_TWOVEC_TEST(srshr_d_d_1,  "srshr d5, d28, #1",  5, 28)
   4232 GEN_TWOVEC_TEST(srshr_d_d_32, "srshr d5, d28, #32", 5, 28)
   4233 GEN_TWOVEC_TEST(srshr_d_d_64, "srshr d5, d28, #64", 5, 28)
   4234 GEN_TWOVEC_TEST(urshr_d_d_1,  "urshr d5, d28, #1",  5, 28)
   4235 GEN_TWOVEC_TEST(urshr_d_d_32, "urshr d5, d28, #32", 5, 28)
   4236 GEN_TWOVEC_TEST(urshr_d_d_64, "urshr d5, d28, #64", 5, 28)
   4237 
   4238 GEN_TWOVEC_TEST(srshr_2d_2d_1,   "srshr v6.2d,  v27.2d, #1",  6, 27)
   4239 GEN_TWOVEC_TEST(srshr_2d_2d_32,  "srshr v6.2d,  v27.2d, #32", 6, 27)
   4240 GEN_TWOVEC_TEST(srshr_2d_2d_64,  "srshr v6.2d,  v27.2d, #64", 6, 27)
   4241 GEN_TWOVEC_TEST(srshr_4s_4s_1,   "srshr v6.4s,  v27.4s, #1",  6, 27)
   4242 GEN_TWOVEC_TEST(srshr_4s_4s_16,  "srshr v6.4s,  v27.4s, #16", 6, 27)
   4243 GEN_TWOVEC_TEST(srshr_4s_4s_32,  "srshr v6.4s,  v27.4s, #32", 6, 27)
   4244 GEN_TWOVEC_TEST(srshr_2s_2s_1,   "srshr v6.2s,  v27.2s, #1",  6, 27)
   4245 GEN_TWOVEC_TEST(srshr_2s_2s_16,  "srshr v6.2s,  v27.2s, #16", 6, 27)
   4246 GEN_TWOVEC_TEST(srshr_2s_2s_32,  "srshr v6.2s,  v27.2s, #32", 6, 27)
   4247 GEN_TWOVEC_TEST(srshr_8h_8h_1,   "srshr v6.8h,  v27.8h, #1",  6, 27)
   4248 GEN_TWOVEC_TEST(srshr_8h_8h_8,   "srshr v6.8h,  v27.8h, #8",  6, 27)
   4249 GEN_TWOVEC_TEST(srshr_8h_8h_16,  "srshr v6.8h,  v27.8h, #16", 6, 27)
   4250 GEN_TWOVEC_TEST(srshr_4h_4h_1,   "srshr v6.4h,  v27.4h, #1",  6, 27)
   4251 GEN_TWOVEC_TEST(srshr_4h_4h_8,   "srshr v6.4h,  v27.4h, #8",  6, 27)
   4252 GEN_TWOVEC_TEST(srshr_4h_4h_16,  "srshr v6.4h,  v27.4h, #16", 6, 27)
   4253 GEN_TWOVEC_TEST(srshr_16b_16b_1, "srshr v6.16b, v27.16b, #1", 6, 27)
   4254 GEN_TWOVEC_TEST(srshr_16b_16b_3, "srshr v6.16b, v27.16b, #3", 6, 27)
   4255 GEN_TWOVEC_TEST(srshr_16b_16b_8, "srshr v6.16b, v27.16b, #8", 6, 27)
   4256 GEN_TWOVEC_TEST(srshr_8b_8b_1,   "srshr v6.8b,  v27.8b, #1",  6, 27)
   4257 GEN_TWOVEC_TEST(srshr_8b_8b_3,   "srshr v6.8b,  v27.8b, #3",  6, 27)
   4258 GEN_TWOVEC_TEST(srshr_8b_8b_8,   "srshr v6.8b,  v27.8b, #8",  6, 27)
   4259 GEN_TWOVEC_TEST(urshr_2d_2d_1,   "urshr v6.2d,  v27.2d, #1",  6, 27)
   4260 GEN_TWOVEC_TEST(urshr_2d_2d_32,  "urshr v6.2d,  v27.2d, #32", 6, 27)
   4261 GEN_TWOVEC_TEST(urshr_2d_2d_64,  "urshr v6.2d,  v27.2d, #64", 6, 27)
   4262 GEN_TWOVEC_TEST(urshr_4s_4s_1,   "urshr v6.4s,  v27.4s, #1",  6, 27)
   4263 GEN_TWOVEC_TEST(urshr_4s_4s_16,  "urshr v6.4s,  v27.4s, #16", 6, 27)
   4264 GEN_TWOVEC_TEST(urshr_4s_4s_32,  "urshr v6.4s,  v27.4s, #32", 6, 27)
   4265 GEN_TWOVEC_TEST(urshr_2s_2s_1,   "urshr v6.2s,  v27.2s, #1",  6, 27)
   4266 GEN_TWOVEC_TEST(urshr_2s_2s_16,  "urshr v6.2s,  v27.2s, #16", 6, 27)
   4267 GEN_TWOVEC_TEST(urshr_2s_2s_32,  "urshr v6.2s,  v27.2s, #32", 6, 27)
   4268 GEN_TWOVEC_TEST(urshr_8h_8h_1,   "urshr v6.8h,  v27.8h, #1",  6, 27)
   4269 GEN_TWOVEC_TEST(urshr_8h_8h_8,   "urshr v6.8h,  v27.8h, #8",  6, 27)
   4270 GEN_TWOVEC_TEST(urshr_8h_8h_16,  "urshr v6.8h,  v27.8h, #16", 6, 27)
   4271 GEN_TWOVEC_TEST(urshr_4h_4h_1,   "urshr v6.4h,  v27.4h, #1",  6, 27)
   4272 GEN_TWOVEC_TEST(urshr_4h_4h_8,   "urshr v6.4h,  v27.4h, #8",  6, 27)
   4273 GEN_TWOVEC_TEST(urshr_4h_4h_16,  "urshr v6.4h,  v27.4h, #16", 6, 27)
   4274 GEN_TWOVEC_TEST(urshr_16b_16b_1, "urshr v6.16b, v27.16b, #1", 6, 27)
   4275 GEN_TWOVEC_TEST(urshr_16b_16b_3, "urshr v6.16b, v27.16b, #3", 6, 27)
   4276 GEN_TWOVEC_TEST(urshr_16b_16b_8, "urshr v6.16b, v27.16b, #8", 6, 27)
   4277 GEN_TWOVEC_TEST(urshr_8b_8b_1,   "urshr v6.8b,  v27.8b, #1",  6, 27)
   4278 GEN_TWOVEC_TEST(urshr_8b_8b_3,   "urshr v6.8b,  v27.8b, #3",  6, 27)
   4279 GEN_TWOVEC_TEST(urshr_8b_8b_8,   "urshr v6.8b,  v27.8b, #8",  6, 27)
   4280 
   4281 GEN_TWOVEC_TEST(srsra_d_d_1,  "srsra d5, d28, #1",  5, 28)
   4282 GEN_TWOVEC_TEST(srsra_d_d_32, "srsra d5, d28, #32", 5, 28)
   4283 GEN_TWOVEC_TEST(srsra_d_d_64, "srsra d5, d28, #64", 5, 28)
   4284 GEN_TWOVEC_TEST(ursra_d_d_1,  "ursra d5, d28, #1",  5, 28)
   4285 GEN_TWOVEC_TEST(ursra_d_d_32, "ursra d5, d28, #32", 5, 28)
   4286 GEN_TWOVEC_TEST(ursra_d_d_64, "ursra d5, d28, #64", 5, 28)
   4287 
   4288 GEN_TWOVEC_TEST(srsra_2d_2d_1,   "srsra v6.2d,  v27.2d, #1",  6, 27)
   4289 GEN_TWOVEC_TEST(srsra_2d_2d_32,  "srsra v6.2d,  v27.2d, #32", 6, 27)
   4290 GEN_TWOVEC_TEST(srsra_2d_2d_64,  "srsra v6.2d,  v27.2d, #64", 6, 27)
   4291 GEN_TWOVEC_TEST(srsra_4s_4s_1,   "srsra v6.4s,  v27.4s, #1",  6, 27)
   4292 GEN_TWOVEC_TEST(srsra_4s_4s_16,  "srsra v6.4s,  v27.4s, #16", 6, 27)
   4293 GEN_TWOVEC_TEST(srsra_4s_4s_32,  "srsra v6.4s,  v27.4s, #32", 6, 27)
   4294 GEN_TWOVEC_TEST(srsra_2s_2s_1,   "srsra v6.2s,  v27.2s, #1",  6, 27)
   4295 GEN_TWOVEC_TEST(srsra_2s_2s_16,  "srsra v6.2s,  v27.2s, #16", 6, 27)
   4296 GEN_TWOVEC_TEST(srsra_2s_2s_32,  "srsra v6.2s,  v27.2s, #32", 6, 27)
   4297 GEN_TWOVEC_TEST(srsra_8h_8h_1,   "srsra v6.8h,  v27.8h, #1",  6, 27)
   4298 GEN_TWOVEC_TEST(srsra_8h_8h_8,   "srsra v6.8h,  v27.8h, #8",  6, 27)
   4299 GEN_TWOVEC_TEST(srsra_8h_8h_16,  "srsra v6.8h,  v27.8h, #16", 6, 27)
   4300 GEN_TWOVEC_TEST(srsra_4h_4h_1,   "srsra v6.4h,  v27.4h, #1",  6, 27)
   4301 GEN_TWOVEC_TEST(srsra_4h_4h_8,   "srsra v6.4h,  v27.4h, #8",  6, 27)
   4302 GEN_TWOVEC_TEST(srsra_4h_4h_16,  "srsra v6.4h,  v27.4h, #16", 6, 27)
   4303 GEN_TWOVEC_TEST(srsra_16b_16b_1, "srsra v6.16b, v27.16b, #1", 6, 27)
   4304 GEN_TWOVEC_TEST(srsra_16b_16b_3, "srsra v6.16b, v27.16b, #3", 6, 27)
   4305 GEN_TWOVEC_TEST(srsra_16b_16b_8, "srsra v6.16b, v27.16b, #8", 6, 27)
   4306 GEN_TWOVEC_TEST(srsra_8b_8b_1,   "srsra v6.8b,  v27.8b, #1",  6, 27)
   4307 GEN_TWOVEC_TEST(srsra_8b_8b_3,   "srsra v6.8b,  v27.8b, #3",  6, 27)
   4308 GEN_TWOVEC_TEST(srsra_8b_8b_8,   "srsra v6.8b,  v27.8b, #8",  6, 27)
   4309 GEN_TWOVEC_TEST(ursra_2d_2d_1,   "ursra v6.2d,  v27.2d, #1",  6, 27)
   4310 GEN_TWOVEC_TEST(ursra_2d_2d_32,  "ursra v6.2d,  v27.2d, #32", 6, 27)
   4311 GEN_TWOVEC_TEST(ursra_2d_2d_64,  "ursra v6.2d,  v27.2d, #64", 6, 27)
   4312 GEN_TWOVEC_TEST(ursra_4s_4s_1,   "ursra v6.4s,  v27.4s, #1",  6, 27)
   4313 GEN_TWOVEC_TEST(ursra_4s_4s_16,  "ursra v6.4s,  v27.4s, #16", 6, 27)
   4314 GEN_TWOVEC_TEST(ursra_4s_4s_32,  "ursra v6.4s,  v27.4s, #32", 6, 27)
   4315 GEN_TWOVEC_TEST(ursra_2s_2s_1,   "ursra v6.2s,  v27.2s, #1",  6, 27)
   4316 GEN_TWOVEC_TEST(ursra_2s_2s_16,  "ursra v6.2s,  v27.2s, #16", 6, 27)
   4317 GEN_TWOVEC_TEST(ursra_2s_2s_32,  "ursra v6.2s,  v27.2s, #32", 6, 27)
   4318 GEN_TWOVEC_TEST(ursra_8h_8h_1,   "ursra v6.8h,  v27.8h, #1",  6, 27)
   4319 GEN_TWOVEC_TEST(ursra_8h_8h_8,   "ursra v6.8h,  v27.8h, #8",  6, 27)
   4320 GEN_TWOVEC_TEST(ursra_8h_8h_16,  "ursra v6.8h,  v27.8h, #16", 6, 27)
   4321 GEN_TWOVEC_TEST(ursra_4h_4h_1,   "ursra v6.4h,  v27.4h, #1",  6, 27)
   4322 GEN_TWOVEC_TEST(ursra_4h_4h_8,   "ursra v6.4h,  v27.4h, #8",  6, 27)
   4323 GEN_TWOVEC_TEST(ursra_4h_4h_16,  "ursra v6.4h,  v27.4h, #16", 6, 27)
   4324 GEN_TWOVEC_TEST(ursra_16b_16b_1, "ursra v6.16b, v27.16b, #1", 6, 27)
   4325 GEN_TWOVEC_TEST(ursra_16b_16b_3, "ursra v6.16b, v27.16b, #3", 6, 27)
   4326 GEN_TWOVEC_TEST(ursra_16b_16b_8, "ursra v6.16b, v27.16b, #8", 6, 27)
   4327 GEN_TWOVEC_TEST(ursra_8b_8b_1,   "ursra v6.8b,  v27.8b, #1",  6, 27)
   4328 GEN_TWOVEC_TEST(ursra_8b_8b_3,   "ursra v6.8b,  v27.8b, #3",  6, 27)
   4329 GEN_TWOVEC_TEST(ursra_8b_8b_8,   "ursra v6.8b,  v27.8b, #8",  6, 27)
   4330 
   4331 GEN_SHIFT_TEST(sshll,  2d, 2s,  0)
   4332 GEN_SHIFT_TEST(sshll,  2d, 2s,  15)
   4333 GEN_SHIFT_TEST(sshll,  2d, 2s,  31)
   4334 GEN_SHIFT_TEST(sshll2, 2d, 4s,  0)
   4335 GEN_SHIFT_TEST(sshll2, 2d, 4s,  15)
   4336 GEN_SHIFT_TEST(sshll2, 2d, 4s,  31)
   4337 GEN_SHIFT_TEST(sshll,  4s, 4h,  0)
   4338 GEN_SHIFT_TEST(sshll,  4s, 4h,  7)
   4339 GEN_SHIFT_TEST(sshll,  4s, 4h,  15)
   4340 GEN_SHIFT_TEST(sshll2, 4s, 8h,  0)
   4341 GEN_SHIFT_TEST(sshll2, 4s, 8h,  7)
   4342 GEN_SHIFT_TEST(sshll2, 4s, 8h,  15)
   4343 GEN_SHIFT_TEST(sshll,  8h, 8b,  0)
   4344 GEN_SHIFT_TEST(sshll,  8h, 8b,  3)
   4345 GEN_SHIFT_TEST(sshll,  8h, 8b,  7)
   4346 GEN_SHIFT_TEST(sshll2, 8h, 16b, 0)
   4347 GEN_SHIFT_TEST(sshll2, 8h, 16b, 3)
   4348 GEN_SHIFT_TEST(sshll2, 8h, 16b, 7)
   4349 GEN_SHIFT_TEST(ushll,  2d, 2s, 0)
   4350 GEN_SHIFT_TEST(ushll,  2d, 2s, 15)
   4351 GEN_SHIFT_TEST(ushll,  2d, 2s, 31)
   4352 GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
   4353 GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
   4354 GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
   4355 GEN_SHIFT_TEST(ushll,  4s, 4h,  0)
   4356 GEN_SHIFT_TEST(ushll,  4s, 4h,  7)
   4357 GEN_SHIFT_TEST(ushll,  4s, 4h,  15)
   4358 GEN_SHIFT_TEST(ushll2, 4s, 8h,  0)
   4359 GEN_SHIFT_TEST(ushll2, 4s, 8h,  7)
   4360 GEN_SHIFT_TEST(ushll2, 4s, 8h,  15)
   4361 GEN_SHIFT_TEST(ushll,  8h, 8b,  0)
   4362 GEN_SHIFT_TEST(ushll,  8h, 8b,  3)
   4363 GEN_SHIFT_TEST(ushll,  8h, 8b,  7)
   4364 GEN_SHIFT_TEST(ushll2, 8h, 16b, 0)
   4365 GEN_SHIFT_TEST(ushll2, 8h, 16b, 3)
   4366 GEN_SHIFT_TEST(ushll2, 8h, 16b, 7)
   4367 
   4368 GEN_TWOVEC_TEST(suqadd_d_d,  "suqadd d22, d23",   22, 23)
   4369 GEN_TWOVEC_TEST(suqadd_s_s,  "suqadd s22, s23",   22, 23)
   4370 GEN_TWOVEC_TEST(suqadd_h_h,  "suqadd h22, h23",   22, 23)
   4371 GEN_TWOVEC_TEST(suqadd_b_b,  "suqadd b22, b23",   22, 23)
   4372 GEN_TWOVEC_TEST(usqadd_d_d,  "usqadd d22, d23",   22, 23)
   4373 GEN_TWOVEC_TEST(usqadd_s_s,  "usqadd s22, s23",   22, 23)
   4374 GEN_TWOVEC_TEST(usqadd_h_h,  "usqadd h22, h23",   22, 23)
   4375 GEN_TWOVEC_TEST(usqadd_b_b,  "usqadd b22, b23",   22, 23)
   4376 
   4377 GEN_TWOVEC_TEST(suqadd_2d_2d,   "suqadd v6.2d,  v27.2d",  6, 27)
   4378 GEN_TWOVEC_TEST(suqadd_4s_4s,   "suqadd v6.4s,  v27.4s",  6, 27)
   4379 GEN_TWOVEC_TEST(suqadd_2s_2s,   "suqadd v6.2s,  v27.2s",  6, 27)
   4380 GEN_TWOVEC_TEST(suqadd_8h_8h,   "suqadd v6.8h,  v27.8h",  6, 27)
   4381 GEN_TWOVEC_TEST(suqadd_4h_4h,   "suqadd v6.4h,  v27.4h",  6, 27)
   4382 GEN_TWOVEC_TEST(suqadd_16b_16b, "suqadd v6.16b, v27.16b", 6, 27)
   4383 GEN_TWOVEC_TEST(suqadd_8b_8b,   "suqadd v6.8b,  v27.8b",  6, 27)
   4384 GEN_TWOVEC_TEST(usqadd_2d_2d,   "usqadd v6.2d,  v27.2d",  6, 27)
   4385 GEN_TWOVEC_TEST(usqadd_4s_4s,   "usqadd v6.4s,  v27.4s",  6, 27)
   4386 GEN_TWOVEC_TEST(usqadd_2s_2s,   "usqadd v6.2s,  v27.2s",  6, 27)
   4387 GEN_TWOVEC_TEST(usqadd_8h_8h,   "usqadd v6.8h,  v27.8h",  6, 27)
   4388 GEN_TWOVEC_TEST(usqadd_4h_4h,   "usqadd v6.4h,  v27.4h",  6, 27)
   4389 GEN_TWOVEC_TEST(usqadd_16b_16b, "usqadd v6.16b, v27.16b", 6, 27)
   4390 GEN_TWOVEC_TEST(usqadd_8b_8b,   "usqadd v6.8b,  v27.8b",  6, 27)
   4391 
   4392 // Uses v15 as the first table entry
   4393 GEN_THREEVEC_TEST(
   4394    tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4395 // and v15 ^ v21 as the second table entry
   4396 GEN_THREEVEC_TEST(
   4397    tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4398                  "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4399 // and v15 ^ v23 as the third table entry
   4400 GEN_THREEVEC_TEST(
   4401    tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4402                  "eor v17.16b, v15.16b, v23.16b ; "
   4403                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4404                  21, 15, 23)
   4405 // and v21 ^ v23 as the fourth table entry
   4406 GEN_THREEVEC_TEST(
   4407    tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4408                  "eor v17.16b, v15.16b, v23.16b ; "
   4409                  "eor v18.16b, v21.16b, v23.16b ; "
   4410                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4411                  21, 15, 23)
   4412 // Same register scheme for tbl .8b, tbx .16b, tbx.8b
   4413 GEN_THREEVEC_TEST(
   4414    tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4415 GEN_THREEVEC_TEST(
   4416    tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4417                 "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4418 GEN_THREEVEC_TEST(
   4419    tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4420                 "eor v17.16b, v15.16b, v23.16b ; "
   4421                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4422                 21, 15, 23)
   4423 GEN_THREEVEC_TEST(
   4424    tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4425                 "eor v17.16b, v15.16b, v23.16b ; "
   4426                 "eor v18.16b, v21.16b, v23.16b ; "
   4427                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4428                 21, 15, 23)
   4429 
   4430 GEN_THREEVEC_TEST(
   4431    tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4432 GEN_THREEVEC_TEST(
   4433    tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4434                  "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4435 GEN_THREEVEC_TEST(
   4436    tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4437                  "eor v17.16b, v15.16b, v23.16b ; "
   4438                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4439                  21, 15, 23)
   4440 GEN_THREEVEC_TEST(
   4441    tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4442                  "eor v17.16b, v15.16b, v23.16b ; "
   4443                  "eor v18.16b, v21.16b, v23.16b ; "
   4444                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4445                  21, 15, 23)
   4446 // Same register scheme for tbx .8b, tbx .16b, tbx.8b
   4447 GEN_THREEVEC_TEST(
   4448    tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4449 GEN_THREEVEC_TEST(
   4450    tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4451                 "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4452 GEN_THREEVEC_TEST(
   4453    tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4454                 "eor v17.16b, v15.16b, v23.16b ; "
   4455                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4456                 21, 15, 23)
   4457 GEN_THREEVEC_TEST(
   4458    tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4459                 "eor v17.16b, v15.16b, v23.16b ; "
   4460                 "eor v18.16b, v21.16b, v23.16b ; "
   4461                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4462                 21, 15, 23)
   4463 
   4464 GEN_THREEVEC_TEST(trn1_2d_2d_2d,    "trn1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4465 GEN_THREEVEC_TEST(trn1_4s_4s_4s,    "trn1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4466 GEN_THREEVEC_TEST(trn1_2s_2s_2s,    "trn1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4467 GEN_THREEVEC_TEST(trn1_8h_8h_8h,    "trn1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4468 GEN_THREEVEC_TEST(trn1_4h_4h_4h,    "trn1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4469 GEN_THREEVEC_TEST(trn1_16b_16b_16b, "trn1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4470 GEN_THREEVEC_TEST(trn1_8b_8b_8b,    "trn1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4471 GEN_THREEVEC_TEST(trn2_2d_2d_2d,    "trn2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4472 GEN_THREEVEC_TEST(trn2_4s_4s_4s,    "trn2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4473 GEN_THREEVEC_TEST(trn2_2s_2s_2s,    "trn2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4474 GEN_THREEVEC_TEST(trn2_8h_8h_8h,    "trn2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4475 GEN_THREEVEC_TEST(trn2_4h_4h_4h,    "trn2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4476 GEN_THREEVEC_TEST(trn2_16b_16b_16b, "trn2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4477 GEN_THREEVEC_TEST(trn2_8b_8b_8b,    "trn2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4478 
   4479 GEN_TWOVEC_TEST(urecpe_4s_4s,   "urecpe v6.4s,  v27.4s",  6, 27)
   4480 GEN_TWOVEC_TEST(urecpe_2s_2s,   "urecpe v6.2s,  v27.2s",  6, 27)
   4481 GEN_TWOVEC_TEST(ursqrte_4s_4s,   "ursqrte v6.4s,  v27.4s",  6, 27)
   4482 GEN_TWOVEC_TEST(ursqrte_2s_2s,   "ursqrte v6.2s,  v27.2s",  6, 27)
   4483 
   4484 GEN_THREEVEC_TEST(uzp1_2d_2d_2d,    "uzp1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4485 GEN_THREEVEC_TEST(uzp1_4s_4s_4s,    "uzp1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4486 GEN_THREEVEC_TEST(uzp1_2s_2s_2s,    "uzp1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4487 GEN_THREEVEC_TEST(uzp1_8h_8h_8h,    "uzp1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4488 GEN_THREEVEC_TEST(uzp1_4h_4h_4h,    "uzp1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4489 GEN_THREEVEC_TEST(uzp1_16b_16b_16b, "uzp1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4490 GEN_THREEVEC_TEST(uzp1_8b_8b_8b,    "uzp1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4491 GEN_THREEVEC_TEST(uzp2_2d_2d_2d,    "uzp2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4492 GEN_THREEVEC_TEST(uzp2_4s_4s_4s,    "uzp2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4493 GEN_THREEVEC_TEST(uzp2_2s_2s_2s,    "uzp2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4494 GEN_THREEVEC_TEST(uzp2_8h_8h_8h,    "uzp2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4495 GEN_THREEVEC_TEST(uzp2_4h_4h_4h,    "uzp2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4496 GEN_THREEVEC_TEST(uzp2_16b_16b_16b, "uzp2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4497 GEN_THREEVEC_TEST(uzp2_8b_8b_8b,    "uzp2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4498 GEN_THREEVEC_TEST(zip1_2d_2d_2d,    "zip1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4499 GEN_THREEVEC_TEST(zip1_4s_4s_4s,    "zip1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4500 GEN_THREEVEC_TEST(zip1_2s_2s_2s,    "zip1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4501 GEN_THREEVEC_TEST(zip1_8h_8h_8h,    "zip1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4502 GEN_THREEVEC_TEST(zip1_4h_4h_4h,    "zip1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4503 GEN_THREEVEC_TEST(zip1_16b_16b_16b, "zip1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4504 GEN_THREEVEC_TEST(zip1_8b_8b_8b,    "zip1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4505 GEN_THREEVEC_TEST(zip2_2d_2d_2d,    "zip2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4506 GEN_THREEVEC_TEST(zip2_4s_4s_4s,    "zip2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4507 GEN_THREEVEC_TEST(zip2_2s_2s_2s,    "zip2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4508 GEN_THREEVEC_TEST(zip2_8h_8h_8h,    "zip2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4509 GEN_THREEVEC_TEST(zip2_4h_4h_4h,    "zip2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4510 GEN_THREEVEC_TEST(zip2_16b_16b_16b, "zip2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4511 GEN_THREEVEC_TEST(zip2_8b_8b_8b,    "zip2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4512 
   4513 GEN_UNARY_TEST(xtn,  2s, 2d)
   4514 GEN_UNARY_TEST(xtn2, 4s, 2d)
   4515 GEN_UNARY_TEST(xtn,  4h, 4s)
   4516 GEN_UNARY_TEST(xtn2, 8h, 4s)
   4517 GEN_UNARY_TEST(xtn,  8b, 8h)
   4518 GEN_UNARY_TEST(xtn2, 16b, 8h)
   4519 
   4520 // ======================== MEM ========================
   4521 
   4522 // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   4523 
   4524 // ======================== CRYPTO ========================
   4525 
   4526 // These tests are believed to be correct but are disabled because
   4527 // GNU assembler (GNU Binutils) 2.24.0.20140311 Linaro 2014.03
   4528 // cannot be persuaded to accept those instructions (AFAICT).
   4529 
   4530 GEN_TWOVEC_TEST(aesd_16b_16b,    "aesd v6.16b,  v27.16b",  6, 27)
   4531 GEN_TWOVEC_TEST(aese_16b_16b,    "aese v6.16b,  v27.16b",  6, 27)
   4532 GEN_TWOVEC_TEST(aesimc_16b_16b,  "aesimc v6.16b,  v27.16b",  6, 27)
   4533 GEN_TWOVEC_TEST(aesmc_16b_16b,   "aesmc v6.16b,  v27.16b",  6, 27)
   4534 
   4535 GEN_THREEVEC_TEST(sha1c_q_s_4s,     "sha1c q29, s28, v27.4s", 29,28,27)
   4536 GEN_TWOVEC_TEST(sha1h_s_s,          "sha1h s6,  s27",  6, 27)
   4537 GEN_THREEVEC_TEST(sha1m_q_s_4s,     "sha1m q29, s28, v27.4s", 29,28,27)
   4538 GEN_THREEVEC_TEST(sha1p_q_s_4s,     "sha1p q29, s28, v27.4s", 29,28,27)
   4539 GEN_THREEVEC_TEST(sha1su0_4s_4s_4s, "sha1su0 v29.4s, v28.4s, v27.4s", 29,28,27)
   4540 GEN_TWOVEC_TEST(sha1su1_4s_4s,      "sha1su1 v6.4s,  v27.4s",  6, 27)
   4541 
   4542 GEN_THREEVEC_TEST(sha256h2_q_q_4s,  "sha256h2 q29, q28, v27.4s", 29,28,27)
   4543 GEN_THREEVEC_TEST(sha256h_q_q_4s,   "sha256h q29, q28, v27.4s", 29,28,27)
   4544 GEN_TWOVEC_TEST(sha256su0_4s_4s,    "sha256su0 v6.4s,  v27.4s",  6, 27)
   4545 GEN_THREEVEC_TEST(sha256su1_4s_4s_4s, "sha256su1 v29.4s, v28.4s, v27.4s",
   4546                                       29,28,27)
   4547 
   4548 
   4549 /* ---------------------------------------------------------------- */
   4550 /* -- main()                                                     -- */
   4551 /* ---------------------------------------------------------------- */
   4552 
   4553 int main ( void )
   4554 {
   4555    assert(sizeof(V128) == 16);
   4556 
   4557    // ======================== FP ========================
   4558 
   4559    // fabs      d,s
   4560    // fabs      2d,4s,2s
   4561    if (1) test_fabs_d_d(TyDF);
   4562    if (1) test_fabs_s_s(TySF);
   4563    if (1) test_fabs_2d_2d(TyDF);
   4564    if (1) test_fabs_4s_4s(TySF);
   4565    if (1) test_fabs_2s_2s(TyDF);
   4566 
   4567    // fneg      d,s
   4568    // fneg      2d,4s,2s
   4569    if (1) test_fneg_d_d(TyDF);
   4570    if (1) test_fneg_s_s(TySF);
   4571    if (1) test_fneg_2d_2d(TySF);
   4572    if (1) test_fneg_4s_4s(TyDF);
   4573    if (1) test_fneg_2s_2s(TySF);
   4574 
   4575    // fsqrt     d,s
   4576    // fsqrt     2d,4s,2s
   4577    if (1) test_fsqrt_d_d(TyDF);
   4578    if (1) test_fsqrt_s_s(TySF);
   4579    if (1) test_fsqrt_2d_2d(TySF);
   4580    if (1) test_fsqrt_4s_4s(TyDF);
   4581    if (1) test_fsqrt_2s_2s(TySF);
   4582 
   4583    // fadd      d,s
   4584    // fsub      d,s
   4585    if (1) test_fadd_d_d_d(TyDF);
   4586    if (1) test_fadd_s_s_s(TySF);
   4587    if (1) test_fsub_d_d_d(TyDF);
   4588    if (1) test_fsub_s_s_s(TySF);
   4589 
   4590    // fadd      2d,4s,2s
   4591    // fsub      2d,4s,2s
   4592    if (1) test_fadd_2d_2d_2d(TyDF);
   4593    if (1) test_fadd_4s_4s_4s(TySF);
   4594    if (1) test_fadd_2s_2s_2s(TySF);
   4595    if (1) test_fsub_2d_2d_2d(TyDF);
   4596    if (1) test_fsub_4s_4s_4s(TySF);
   4597    if (1) test_fsub_2s_2s_2s(TySF);
   4598 
   4599    // fabd      d,s
   4600    // fabd      2d,4s,2s
   4601    if (1) test_fabd_d_d_d(TyDF);
   4602    if (1) test_fabd_s_s_s(TySF);
   4603    if (1) test_fabd_2d_2d_2d(TyDF);
   4604    if (1) test_fabd_4s_4s_4s(TySF);
   4605    if (1) test_fabd_2s_2s_2s(TySF);
   4606 
   4607    // faddp     d,s (floating add pair)
   4608    // faddp     2d,4s,2s
   4609    if (1) test_faddp_d_2d(TyDF);
   4610    if (1) test_faddp_s_2s(TySF);
   4611    if (1) test_faddp_2d_2d_2d(TySF);
   4612    if (1) test_faddp_4s_4s_4s(TyDF);
   4613    if (1) test_faddp_2s_2s_2s(TySF);
   4614 
   4615    // fccmp     d,s (floating point conditional quiet compare)
   4616    // fccmpe    d,s (floating point conditional signaling compare)
   4617    if (1) DO50( test_FCCMP_D_D_0xF_EQ() );
   4618    if (1) DO50( test_FCCMP_D_D_0xF_NE() );
   4619    if (1) DO50( test_FCCMP_D_D_0x0_EQ() );
   4620    if (1) DO50( test_FCCMP_D_D_0x0_NE() );
   4621    if (1) DO50( test_FCCMP_S_S_0xF_EQ() );
   4622    if (1) DO50( test_FCCMP_S_S_0xF_NE() );
   4623    if (1) DO50( test_FCCMP_S_S_0x0_EQ() );
   4624    if (1) DO50( test_FCCMP_S_S_0x0_NE() );
   4625    if (1) DO50( test_FCCMPE_D_D_0xF_EQ() );
   4626    if (1) DO50( test_FCCMPE_D_D_0xF_NE() );
   4627    if (1) DO50( test_FCCMPE_D_D_0x0_EQ() );
   4628    if (1) DO50( test_FCCMPE_D_D_0x0_NE() );
   4629    if (1) DO50( test_FCCMPE_S_S_0xF_EQ() );
   4630    if (1) DO50( test_FCCMPE_S_S_0xF_NE() );
   4631    if (1) DO50( test_FCCMPE_S_S_0x0_EQ() );
   4632    if (1) DO50( test_FCCMPE_S_S_0x0_NE() );
   4633 
   4634    // fcmeq     d,s
   4635    // fcmge     d,s
   4636    // fcmgt     d,s
   4637    // facgt     d,s  (floating abs compare GE)
   4638    // facge     d,s  (floating abs compare GE)
   4639    if (1) DO50( test_FCMEQ_D_D_D() );
   4640    if (1) DO50( test_FCMEQ_S_S_S() );
   4641    if (1) DO50( test_FCMGE_D_D_D() );
   4642    if (1) DO50( test_FCMGE_S_S_S() );
   4643    if (1) DO50( test_FCMGT_D_D_D() );
   4644    if (1) DO50( test_FCMGT_S_S_S() );
   4645    if (1) DO50( test_FACGT_D_D_D() );
   4646    if (1) DO50( test_FACGT_S_S_S() );
   4647    if (1) DO50( test_FACGE_D_D_D() );
   4648    if (1) DO50( test_FACGE_S_S_S() );
   4649 
   4650    // fcmeq     2d,4s,2s
   4651    // fcmge     2d,4s,2s
   4652    // fcmgt     2d,4s,2s
   4653    // facge     2d,4s,2s
   4654    // facgt     2d,4s,2s
   4655    if (1) test_fcmeq_2d_2d_2d(TyDF);
   4656    if (1) test_fcmeq_4s_4s_4s(TySF);
   4657    if (1) test_fcmeq_2s_2s_2s(TySF);
   4658    if (1) test_fcmge_2d_2d_2d(TyDF);
   4659    if (1) test_fcmge_4s_4s_4s(TySF);
   4660    if (1) test_fcmge_2s_2s_2s(TySF);
   4661    if (1) test_fcmgt_2d_2d_2d(TyDF);
   4662    if (1) test_fcmgt_4s_4s_4s(TySF);
   4663    if (1) test_fcmgt_2s_2s_2s(TySF);
   4664    if (1) test_facge_2d_2d_2d(TyDF);
   4665    if (1) test_facge_4s_4s_4s(TySF);
   4666    if (1) test_facge_2s_2s_2s(TySF);
   4667    if (1) test_facgt_2d_2d_2d(TyDF);
   4668    if (1) test_facgt_4s_4s_4s(TySF);
   4669    if (1) test_facgt_2s_2s_2s(TySF);
   4670 
   4671    // fcmeq_z   d,s
   4672    // fcmge_z   d,s
   4673    // fcmgt_z   d,s
   4674    // fcmle_z   d,s
   4675    // fcmlt_z   d,s
   4676    if (1) DO50( test_FCMEQ_D_D_Z() );
   4677    if (1) DO50( test_FCMEQ_S_S_Z() );
   4678    if (1) DO50( test_FCMGE_D_D_Z() );
   4679    if (1) DO50( test_FCMGE_S_S_Z() );
   4680    if (1) DO50( test_FCMGT_D_D_Z() );
   4681    if (1) DO50( test_FCMGT_S_S_Z() );
   4682    if (1) DO50( test_FCMLE_D_D_Z() );
   4683    if (1) DO50( test_FCMLE_S_S_Z() );
   4684    if (1) DO50( test_FCMLT_D_D_Z() );
   4685    if (1) DO50( test_FCMLT_S_S_Z() );
   4686 
   4687    // fcmeq_z   2d,4s,2s
   4688    // fcmge_z   2d,4s,2s
   4689    // fcmgt_z   2d,4s,2s
   4690    // fcmle_z   2d,4s,2s
   4691    // fcmlt_z   2d,4s,2s
   4692    if (1) test_fcmeq_z_2d_2d(TyDF);
   4693    if (1) test_fcmeq_z_4s_4s(TySF);
   4694    if (1) test_fcmeq_z_2s_2s(TySF);
   4695    if (1) test_fcmge_z_2d_2d(TyDF);
   4696    if (1) test_fcmge_z_4s_4s(TySF);
   4697    if (1) test_fcmge_z_2s_2s(TySF);
   4698    if (1) test_fcmgt_z_2d_2d(TyDF);
   4699    if (1) test_fcmgt_z_4s_4s(TySF);
   4700    if (1) test_fcmgt_z_2s_2s(TySF);
   4701    if (1) test_fcmle_z_2d_2d(TyDF);
   4702    if (1) test_fcmle_z_4s_4s(TySF);
   4703    if (1) test_fcmle_z_2s_2s(TySF);
   4704    if (1) test_fcmlt_z_2d_2d(TyDF);
   4705    if (1) test_fcmlt_z_4s_4s(TySF);
   4706    if (1) test_fcmlt_z_2s_2s(TySF);
   4707 
   4708    // fcmp_z    d,s
   4709    // fcmpe_z   d,s
   4710    // fcmp      d,s (floating point quiet, set flags)
   4711    // fcmpe     d,s (floating point signaling, set flags)
   4712    if (1) DO50( test_FCMP_D_Z() );
   4713    if (1) DO50( test_FCMP_S_Z() );
   4714    if (1) DO50( test_FCMPE_D_Z() );
   4715    if (1) DO50( test_FCMPE_S_Z() );
   4716    if (1) DO50( test_FCMP_D_D() );
   4717    if (1) DO50( test_FCMP_S_S() );
   4718    if (1) DO50( test_FCMPE_D_D() );
   4719    if (1) DO50( test_FCMPE_S_S() );
   4720 
   4721    // fcsel     d,s (fp cond select)
   4722    if (1) DO50( test_FCSEL_D_D_D_EQ() );
   4723    if (1) DO50( test_FCSEL_D_D_D_NE() );
   4724    if (1) DO50( test_FCSEL_S_S_S_EQ() );
   4725    if (1) DO50( test_FCSEL_S_S_S_NE() );
   4726 
   4727    // fdiv      d,s
   4728    // fdiv      2d,4s,2s
   4729    if (1) test_fdiv_d_d_d(TyDF);
   4730    if (1) test_fdiv_s_s_s(TySF);
   4731    if (1) test_fdiv_2d_2d_2d(TyDF);
   4732    if (1) test_fdiv_4s_4s_4s(TySF);
   4733    if (1) test_fdiv_2s_2s_2s(TySF);
   4734 
   4735    // fmadd     d,s
   4736    // fnmadd    d,s
   4737    // fmsub     d,s
   4738    // fnmsub    d,s
   4739    if (1) test_fmadd_d_d_d_d(TyDF);
   4740    if (1) test_fmadd_s_s_s_s(TySF);
   4741    if (1) test_fnmadd_d_d_d_d(TyDF);
   4742    if (1) test_fnmadd_s_s_s_s(TySF);
   4743    if (1) test_fmsub_d_d_d_d(TyDF);
   4744    if (1) test_fmsub_s_s_s_s(TySF);
   4745    if (1) test_fnmsub_d_d_d_d(TyDF);
   4746    if (1) test_fnmsub_s_s_s_s(TySF);
   4747 
   4748    // fnmul     d,s
   4749    if (1) test_fnmul_d_d_d(TyDF);
   4750    if (1) test_fnmul_s_s_s(TySF);
   4751 
   4752    // fmax      d,s
   4753    // fmin      d,s
   4754    // fmaxnm    d,s ("max number")
   4755    // fminnm    d,s
   4756    if (1) test_fmax_d_d_d(TyDF);
   4757    if (1) test_fmax_s_s_s(TySF);
   4758    if (1) test_fmin_d_d_d(TyDF);
   4759    if (1) test_fmin_s_s_s(TySF);
   4760    if (1) test_fmaxnm_d_d_d(TyDF);
   4761    if (1) test_fmaxnm_s_s_s(TySF);
   4762    if (1) test_fminnm_d_d_d(TyDF);
   4763    if (1) test_fminnm_s_s_s(TySF);
   4764 
   4765    // fmax      2d,4s,2s
   4766    // fmin      2d,4s,2s
   4767    // fmaxnm    2d,4s,2s
   4768    // fminnm    2d,4s,2s
   4769    if (1) test_fmax_2d_2d_2d(TyDF);
   4770    if (1) test_fmax_4s_4s_4s(TySF);
   4771    if (1) test_fmax_2s_2s_2s(TySF);
   4772    if (1) test_fmin_2d_2d_2d(TyDF);
   4773    if (1) test_fmin_4s_4s_4s(TySF);
   4774    if (1) test_fmin_2s_2s_2s(TySF);
   4775    if (1) test_fmaxnm_2d_2d_2d(TyDF);
   4776    if (1) test_fmaxnm_4s_4s_4s(TySF);
   4777    if (1) test_fmaxnm_2s_2s_2s(TySF);
   4778    if (1) test_fminnm_2d_2d_2d(TyDF);
   4779    if (1) test_fminnm_4s_4s_4s(TySF);
   4780    if (1) test_fminnm_2s_2s_2s(TySF);
   4781 
   4782    // fmaxnmp   d_2d,s_2s ("max number pairwise")
   4783    // fminnmp   d_2d,s_2s
   4784    if (1) test_fmaxnmp_d_2d(TyDF);
   4785    if (1) test_fmaxnmp_s_2s(TySF);
   4786    if (1) test_fminnmp_d_2d(TyDF);
   4787    if (1) test_fminnmp_s_2s(TySF);
   4788 
   4789    // fmaxnmp   2d,4s,2s
   4790    // fminnmp   2d,4s,2s
   4791    if (1) test_fmaxnmp_2d_2d_2d(TyDF);
   4792    if (1) test_fmaxnmp_4s_4s_4s(TySF);
   4793    if (1) test_fmaxnmp_2s_2s_2s(TySF);
   4794    if (1) test_fminnmp_2d_2d_2d(TyDF);
   4795    if (1) test_fminnmp_4s_4s_4s(TySF);
   4796    if (1) test_fminnmp_2s_2s_2s(TySF);
   4797 
   4798    // fmaxnmv   s_4s (maxnum across vector)
   4799    // fminnmv   s_4s
   4800    if (1) test_fmaxnmv_s_4s(TySF);
   4801    if (1) test_fminnmv_s_4s(TySF);
   4802 
   4803    // fmaxp     d_2d,s_2s (max of a pair)
   4804    // fminp     d_2d,s_2s (max of a pair)
   4805    if (1) test_fmaxp_d_2d(TyDF);
   4806    if (1) test_fmaxp_s_2s(TySF);
   4807    if (1) test_fminp_d_2d(TyDF);
   4808    if (1) test_fminp_s_2s(TySF);
   4809 
   4810    // fmaxp     2d,4s,2s  (max pairwise)
   4811    // fminp     2d,4s,2s
   4812    if (1) test_fmaxp_2d_2d_2d(TyDF);
   4813    if (1) test_fmaxp_4s_4s_4s(TySF);
   4814    if (1) test_fmaxp_2s_2s_2s(TySF);
   4815    if (1) test_fminp_2d_2d_2d(TyDF);
   4816    if (1) test_fminp_4s_4s_4s(TySF);
   4817    if (1) test_fminp_2s_2s_2s(TySF);
   4818 
   4819    // fmaxv     s_4s (max across vector)
   4820    // fminv     s_4s
   4821    if (1) test_fmaxv_s_4s(TySF);
   4822    if (1) test_fminv_s_4s(TySF);
   4823 
   4824    // fmla      2d,4s,2s
   4825    // fmls      2d,4s,2s
   4826    if (1) test_fmla_2d_2d_2d(TyDF);
   4827    if (1) test_fmla_4s_4s_4s(TySF);
   4828    if (1) test_fmla_2s_2s_2s(TySF);
   4829    if (1) test_fmls_2d_2d_2d(TyDF);
   4830    if (1) test_fmls_4s_4s_4s(TySF);
   4831    if (1) test_fmls_2s_2s_2s(TySF);
   4832 
   4833    // fmla      d_d_d[],s_s_s[] (by element)
   4834    // fmls      d_d_d[],s_s_s[] (by element)
   4835    if (1) test_fmla_d_d_d0(TyDF);
   4836    if (1) test_fmla_d_d_d1(TyDF);
   4837    if (1) test_fmla_s_s_s0(TySF);
   4838    if (1) test_fmla_s_s_s3(TySF);
   4839    if (1) test_fmls_d_d_d0(TyDF);
   4840    if (1) test_fmls_d_d_d1(TyDF);
   4841    if (1) test_fmls_s_s_s0(TySF);
   4842    if (1) test_fmls_s_s_s3(TySF);
   4843 
   4844    // fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4845    // fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4846    if (1) test_fmla_2d_2d_d0(TyDF);
   4847    if (1) test_fmla_2d_2d_d1(TyDF);
   4848    if (1) test_fmla_4s_4s_s0(TySF);
   4849    if (1) test_fmla_4s_4s_s3(TySF);
   4850    if (1) test_fmla_2s_2s_s0(TySF);
   4851    if (1) test_fmla_2s_2s_s3(TySF);
   4852    if (1) test_fmls_2d_2d_d0(TyDF);
   4853    if (1) test_fmls_2d_2d_d1(TyDF);
   4854    if (1) test_fmls_4s_4s_s0(TySF);
   4855    if (1) test_fmls_4s_4s_s3(TySF);
   4856    if (1) test_fmls_2s_2s_s0(TySF);
   4857    if (1) test_fmls_2s_2s_s3(TySF);
   4858 
   4859    // fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   4860    if (1) test_fmov_2d_imm_01(TyD);
   4861    if (1) test_fmov_2d_imm_02(TyD);
   4862    if (1) test_fmov_2d_imm_03(TyD);
   4863    if (1) test_fmov_4s_imm_01(TyS);
   4864    if (1) test_fmov_4s_imm_02(TyS);
   4865    if (1) test_fmov_4s_imm_03(TyS);
   4866    if (1) test_fmov_2s_imm_01(TyS);
   4867    if (1) test_fmov_2s_imm_02(TyS);
   4868    if (1) test_fmov_2s_imm_03(TyS);
   4869 
   4870    // fmov      d_d,s_s
   4871    if (1) test_fmov_d_d(TyDF);
   4872    if (1) test_fmov_s_s(TySF);
   4873 
   4874    // fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   4875    if (1) test_fmov_s_w(TyS);
   4876    if (1) test_fmov_d_x(TyD);
   4877    if (1) test_fmov_d1_x(TyD);
   4878    if (1) test_fmov_w_s(TyS);
   4879    if (1) test_fmov_x_d(TyD);
   4880    if (1) test_fmov_x_d1(TyD);
   4881 
   4882    // fmov      d,s #imm
   4883    if (1) test_fmov_d_imm_01(TyNONE);
   4884    if (1) test_fmov_d_imm_02(TyNONE);
   4885    if (1) test_fmov_d_imm_03(TyNONE);
   4886    if (1) test_fmov_s_imm_01(TyNONE);
   4887    if (1) test_fmov_s_imm_02(TyNONE);
   4888    if (1) test_fmov_s_imm_03(TyNONE);
   4889 
   4890    // fmul      d_d_d[],s_s_s[]
   4891    if (1) test_fmul_d_d_d0(TyDF);
   4892    if (1) test_fmul_d_d_d1(TyDF);
   4893    if (1) test_fmul_s_s_s0(TySF);
   4894    if (1) test_fmul_s_s_s3(TySF);
   4895 
   4896    // fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4897    if (1) test_fmul_2d_2d_d0(TyDF);
   4898    if (1) test_fmul_2d_2d_d1(TyDF);
   4899    if (1) test_fmul_4s_4s_s0(TySF);
   4900    if (1) test_fmul_4s_4s_s3(TySF);
   4901    if (1) test_fmul_2s_2s_s0(TySF);
   4902    if (1) test_fmul_2s_2s_s3(TySF);
   4903 
   4904    // fmul      d,s
   4905    // fmul      2d,4s,2s
   4906    if (1) test_fmul_d_d_d(TyDF);
   4907    if (1) test_fmul_s_s_s(TySF);
   4908    if (1) test_fmul_2d_2d_2d(TyDF);
   4909    if (1) test_fmul_4s_4s_4s(TySF);
   4910    if (1) test_fmul_2s_2s_2s(TySF);
   4911 
   4912    // fmulx     d_d_d[],s_s_s[]
   4913    // fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4914    if (1) test_fmulx_d_d_d0(TyDF);
   4915    if (1) test_fmulx_d_d_d1(TyDF);
   4916    if (1) test_fmulx_s_s_s0(TySF);
   4917    if (1) test_fmulx_s_s_s3(TySF);
   4918    if (1) test_fmulx_2d_2d_d0(TyDF);
   4919    if (1) test_fmulx_2d_2d_d1(TyDF);
   4920    if (1) test_fmulx_4s_4s_s0(TySF);
   4921    if (1) test_fmulx_4s_4s_s3(TySF);
   4922    if (1) test_fmulx_2s_2s_s0(TySF);
   4923    if (1) test_fmulx_2s_2s_s3(TySF);
   4924 
   4925    // fmulx     d,s
   4926    // fmulx     2d,4s,2s
   4927    if (1) test_fmulx_d_d_d(TyDF);
   4928    if (1) test_fmulx_s_s_s(TySF);
   4929    if (1) test_fmulx_2d_2d_2d(TyDF);
   4930    if (1) test_fmulx_4s_4s_4s(TySF);
   4931    if (1) test_fmulx_2s_2s_2s(TySF);
   4932 
   4933    // frecpe    d,s (recip estimate)
   4934    // frecpe    2d,4s,2s
   4935    if (1) test_frecpe_d_d(TyDF);
   4936    if (1) test_frecpe_s_s(TySF);
   4937    if (1) test_frecpe_2d_2d(TyDF);
   4938    if (1) test_frecpe_4s_4s(TySF);
   4939    if (1) test_frecpe_2s_2s(TySF);
   4940 
   4941    // frecps    d,s (recip step)
   4942    // frecps    2d,4s,2s
   4943    if (1) test_frecps_d_d_d(TyDF);
   4944    if (1) test_frecps_s_s_s(TySF);
   4945    if (1) test_frecps_2d_2d_2d(TyDF);
   4946    if (1) test_frecps_4s_4s_4s(TySF);
   4947    if (1) test_frecps_2s_2s_2s(TySF);
   4948 
   4949    // frecpx    d,s (recip exponent)
   4950    if (1) test_frecpx_d_d(TyDF);
   4951    if (1) test_frecpx_s_s(TySF);
   4952 
   4953    // frinta    d,s
   4954    // frinti    d,s
   4955    // frintm    d,s
   4956    // frintn    d,s
   4957    // frintp    d,s
   4958    // frintx    d,s
   4959    // frintz    d,s
   4960    if (1) test_frinta_d_d(TyDF);
   4961    if (1) test_frinta_s_s(TySF);
   4962    if (1) test_frinti_d_d(TyDF);
   4963    if (1) test_frinti_s_s(TySF);
   4964    if (1) test_frintm_d_d(TyDF);
   4965    if (1) test_frintm_s_s(TySF);
   4966    if (1) test_frintn_d_d(TyDF);
   4967    if (1) test_frintn_s_s(TySF);
   4968    if (1) test_frintp_d_d(TyDF);
   4969    if (1) test_frintp_s_s(TySF);
   4970    if (1) test_frintx_d_d(TyDF);
   4971    if (1) test_frintx_s_s(TySF);
   4972    if (1) test_frintz_d_d(TyDF);
   4973    if (1) test_frintz_s_s(TySF);
   4974 
   4975    // frinta    2d,4s,2s (round to integral, nearest away)
   4976    // frinti    2d,4s,2s (round to integral, per FPCR)
   4977    // frintm    2d,4s,2s (round to integral, minus inf)
   4978    // frintn    2d,4s,2s (round to integral, nearest, to even)
   4979    // frintp    2d,4s,2s (round to integral, plus inf)
   4980    // frintx    2d,4s,2s (round to integral exact, per FPCR)
   4981    // frintz    2d,4s,2s (round to integral, zero)
   4982    if (1) test_frinta_2d_2d(TyDF);
   4983    if (1) test_frinta_4s_4s(TySF);
   4984    if (1) test_frinta_2s_2s(TySF);
   4985    if (1) test_frinti_2d_2d(TyDF);
   4986    if (1) test_frinti_4s_4s(TySF);
   4987    if (1) test_frinti_2s_2s(TySF);
   4988    if (1) test_frintm_2d_2d(TyDF);
   4989    if (1) test_frintm_4s_4s(TySF);
   4990    if (1) test_frintm_2s_2s(TySF);
   4991    if (1) test_frintn_2d_2d(TyDF);
   4992    if (1) test_frintn_4s_4s(TySF);
   4993    if (1) test_frintn_2s_2s(TySF);
   4994    if (1) test_frintp_2d_2d(TyDF);
   4995    if (1) test_frintp_4s_4s(TySF);
   4996    if (1) test_frintp_2s_2s(TySF);
   4997    if (1) test_frintx_2d_2d(TyDF);
   4998    if (1) test_frintx_4s_4s(TySF);
   4999    if (1) test_frintx_2s_2s(TySF);
   5000    if (1) test_frintz_2d_2d(TyDF);
   5001    if (1) test_frintz_4s_4s(TySF);
   5002    if (1) test_frintz_2s_2s(TySF);
   5003 
   5004    // frsqrte   d,s (est)
   5005    // frsqrte   2d,4s,2s
   5006    if (1) test_frsqrte_d_d(TyDF);
   5007    if (1) test_frsqrte_s_s(TySF);
   5008    if (1) test_frsqrte_2d_2d(TyDF);
   5009    if (1) test_frsqrte_4s_4s(TySF);
   5010    if (1) test_frsqrte_2s_2s(TySF);
   5011 
   5012    // frsqrts   d,s (step)
   5013    // frsqrts   2d,4s,2s
   5014    if (1) test_frsqrts_d_d_d(TyDF);
   5015    if (1) test_frsqrts_s_s_s(TySF);
   5016    if (1) test_frsqrts_2d_2d_2d(TyDF);
   5017    if (1) test_frsqrts_4s_4s_4s(TySF);
   5018    if (1) test_frsqrts_2s_2s_2s(TySF);
   5019 
   5020    // ======================== CONV ========================
   5021 
   5022    // fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   5023    if (1) test_fcvt_s_h(TyHF);
   5024    if (1) test_fcvt_d_h(TyHF);
   5025    if (1) test_fcvt_h_s(TySF);
   5026    if (1) test_fcvt_d_s(TySF);
   5027    if (1) test_fcvt_h_d(TyDF);
   5028    if (1) test_fcvt_s_d(TyDF);
   5029 
   5030    // fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   5031    if (1) test_fcvtl_4s_4h(TyHF);
   5032    if (1) test_fcvtl_4s_8h(TyHF);
   5033    if (1) test_fcvtl_2d_2s(TySF);
   5034    if (1) test_fcvtl_2d_4s(TySF);
   5035 
   5036    // fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   5037    if (1) test_fcvtn_4h_4s(TySF);
   5038    if (1) test_fcvtn_8h_4s(TySF);
   5039    if (1) test_fcvtn_2s_2d(TyDF);
   5040    if (1) test_fcvtn_4s_2d(TyDF);
   5041 
   5042    // fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   5043    // fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   5044    // fcvtas    2d,4s,2s
   5045    // fcvtau    2d,4s,2s
   5046    // fcvtas    w_s,x_s,w_d,x_d
   5047    // fcvtau    w_s,x_s,w_d,x_d
   5048    if (1) test_fcvtas_d_d(TyDF);
   5049    if (1) test_fcvtau_d_d(TyDF);
   5050    if (1) test_fcvtas_s_s(TySF);
   5051    if (1) test_fcvtau_s_s(TySF);
   5052    if (1) test_fcvtas_2d_2d(TyDF);
   5053    if (1) test_fcvtau_2d_2d(TyDF);
   5054    if (1) test_fcvtas_4s_4s(TySF);
   5055    if (1) test_fcvtau_4s_4s(TySF);
   5056    if (1) test_fcvtas_2s_2s(TySF);
   5057    if (1) test_fcvtau_2s_2s(TySF);
   5058    if (1) test_fcvtas_w_s(TySF);
   5059    if (1) test_fcvtau_w_s(TySF);
   5060    if (1) test_fcvtas_x_s(TySF);
   5061    if (1) test_fcvtau_x_s(TySF);
   5062    if (1) test_fcvtas_w_d(TyDF);
   5063    if (1) test_fcvtau_w_d(TyDF);
   5064    if (1) test_fcvtas_x_d(TyDF);
   5065    if (1) test_fcvtau_x_d(TyDF);
   5066 
   5067    // fcvtms    d,s  (fcvt to signed int,   minus inf)
   5068    // fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   5069    // fcvtms    2d,4s,2s
   5070    // fcvtmu    2d,4s,2s
   5071    // fcvtms    w_s,x_s,w_d,x_d
   5072    // fcvtmu    w_s,x_s,w_d,x_d
   5073    if (1) test_fcvtms_d_d(TyDF);
   5074    if (1) test_fcvtmu_d_d(TyDF);
   5075    if (1) test_fcvtms_s_s(TySF);
   5076    if (1) test_fcvtmu_s_s(TySF);
   5077    if (1) test_fcvtms_2d_2d(TyDF);
   5078    if (1) test_fcvtmu_2d_2d(TyDF);
   5079    if (1) test_fcvtms_4s_4s(TySF);
   5080    if (1) test_fcvtmu_4s_4s(TySF);
   5081    if (1) test_fcvtms_2s_2s(TySF);
   5082    if (1) test_fcvtmu_2s_2s(TySF);
   5083    if (1) test_fcvtms_w_s(TySF);
   5084    if (1) test_fcvtmu_w_s(TySF);
   5085    if (1) test_fcvtms_x_s(TySF);
   5086    if (1) test_fcvtmu_x_s(TySF);
   5087    if (1) test_fcvtms_w_d(TyDF);
   5088    if (1) test_fcvtmu_w_d(TyDF);
   5089    if (1) test_fcvtms_x_d(TyDF);
   5090    if (1) test_fcvtmu_x_d(TyDF);
   5091 
   5092    // fcvtns    d,s  (fcvt to signed int,   nearest)
   5093    // fcvtnu    d,s  (fcvt to unsigned int, nearest)
   5094    // fcvtns    2d,4s,2s
   5095    // fcvtnu    2d,4s,2s
   5096    // fcvtns    w_s,x_s,w_d,x_d
   5097    // fcvtnu    w_s,x_s,w_d,x_d
   5098    if (1) test_fcvtns_d_d(TyDF);
   5099    if (1) test_fcvtnu_d_d(TyDF);
   5100    if (1) test_fcvtns_s_s(TySF);
   5101    if (1) test_fcvtnu_s_s(TySF);
   5102    if (1) test_fcvtns_2d_2d(TyDF);
   5103    if (1) test_fcvtnu_2d_2d(TyDF);
   5104    if (1) test_fcvtns_4s_4s(TySF);
   5105    if (1) test_fcvtnu_4s_4s(TySF);
   5106    if (1) test_fcvtns_2s_2s(TySF);
   5107    if (1) test_fcvtnu_2s_2s(TySF);
   5108    if (1) test_fcvtns_w_s(TySF);
   5109    if (1) test_fcvtnu_w_s(TySF);
   5110    if (1) test_fcvtns_x_s(TySF);
   5111    if (1) test_fcvtnu_x_s(TySF);
   5112    if (1) test_fcvtns_w_d(TyDF);
   5113    if (1) test_fcvtnu_w_d(TyDF);
   5114    if (1) test_fcvtns_x_d(TyDF);
   5115    if (1) test_fcvtnu_x_d(TyDF);
   5116 
   5117    // fcvtps    d,s  (fcvt to signed int,   plus inf)
   5118    // fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   5119    // fcvtps    2d,4s,2s
   5120    // fcvtpu    2d,4s,2s
   5121    // fcvtps    w_s,x_s,w_d,x_d
   5122    // fcvtpu    w_s,x_s,w_d,x_d
   5123    if (1) test_fcvtps_d_d(TyDF);
   5124    if (1) test_fcvtpu_d_d(TyDF);
   5125    if (1) test_fcvtps_s_s(TySF);
   5126    if (1) test_fcvtpu_s_s(TySF);
   5127    if (1) test_fcvtps_2d_2d(TyDF);
   5128    if (1) test_fcvtpu_2d_2d(TyDF);
   5129    if (1) test_fcvtps_4s_4s(TySF);
   5130    if (1) test_fcvtpu_4s_4s(TySF);
   5131    if (1) test_fcvtps_2s_2s(TySF);
   5132    if (1) test_fcvtpu_2s_2s(TySF);
   5133    if (1) test_fcvtps_w_s(TySF);
   5134    if (1) test_fcvtpu_w_s(TySF);
   5135    if (1) test_fcvtps_x_s(TySF);
   5136    if (1) test_fcvtpu_x_s(TySF);
   5137    if (1) test_fcvtps_w_d(TyDF);
   5138    if (1) test_fcvtpu_w_d(TyDF);
   5139    if (1) test_fcvtps_x_d(TyDF);
   5140    if (1) test_fcvtpu_x_d(TyDF);
   5141 
   5142    // fcvtzs    d,s (fcvt to signed integer,   to zero)
   5143    // fcvtzu    d,s (fcvt to unsigned integer, to zero)
   5144    // fcvtzs    2d,4s,2s
   5145    // fcvtzu    2d,4s,2s
   5146    // fcvtzs    w_s,x_s,w_d,x_d
   5147    // fcvtzu    w_s,x_s,w_d,x_d
   5148    if (1) test_fcvtzs_d_d(TyDF);
   5149    if (1) test_fcvtzu_d_d(TyDF);
   5150    if (1) test_fcvtzs_s_s(TySF);
   5151    if (1) test_fcvtzu_s_s(TySF);
   5152    if (1) test_fcvtzs_2d_2d(TyDF);
   5153    if (1) test_fcvtzu_2d_2d(TyDF);
   5154    if (1) test_fcvtzs_4s_4s(TySF);
   5155    if (1) test_fcvtzu_4s_4s(TySF);
   5156    if (1) test_fcvtzs_2s_2s(TySF);
   5157    if (1) test_fcvtzu_2s_2s(TySF);
   5158    if (1) test_fcvtzs_w_s(TySF);
   5159    if (1) test_fcvtzu_w_s(TySF);
   5160    if (1) test_fcvtzs_x_s(TySF);
   5161    if (1) test_fcvtzu_x_s(TySF);
   5162    if (1) test_fcvtzs_w_d(TyDF);
   5163    if (1) test_fcvtzu_w_d(TyDF);
   5164    if (1) test_fcvtzs_x_d(TyDF);
   5165    if (1) test_fcvtzu_x_d(TyDF);
   5166 
   5167    // fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5168    // fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5169    // fcvtzs    2d,4s,2s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5170    // fcvtzu    2d,4s,2s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5171    // fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5172    // fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5173    if (1) test_fcvtzs_d_d_fbits1(TyDF);
   5174    if (1) test_fcvtzs_d_d_fbits32(TyDF);
   5175    if (1) test_fcvtzs_d_d_fbits64(TyDF);
   5176    if (1) test_fcvtzu_d_d_fbits1(TyDF);
   5177    if (1) test_fcvtzu_d_d_fbits32(TyDF);
   5178    if (1) test_fcvtzu_d_d_fbits64(TyDF);
   5179    if (1) test_fcvtzs_s_s_fbits1(TySF);
   5180    if (1) test_fcvtzs_s_s_fbits16(TySF);
   5181    if (1) test_fcvtzs_s_s_fbits32(TySF);
   5182    if (1) test_fcvtzu_s_s_fbits1(TySF);
   5183    if (1) test_fcvtzu_s_s_fbits16(TySF);
   5184    if (1) test_fcvtzu_s_s_fbits32(TySF);
   5185    if (1) test_fcvtzs_2d_2d_fbits1(TyDF);
   5186    if (1) test_fcvtzs_2d_2d_fbits32(TyDF);
   5187    if (1) test_fcvtzs_2d_2d_fbits64(TyDF);
   5188    if (1) test_fcvtzu_2d_2d_fbits1(TyDF);
   5189    if (1) test_fcvtzu_2d_2d_fbits32(TyDF);
   5190    if (1) test_fcvtzu_2d_2d_fbits64(TyDF);
   5191    if (1) test_fcvtzs_4s_4s_fbits1(TySF);
   5192    if (1) test_fcvtzs_4s_4s_fbits16(TySF);
   5193    if (1) test_fcvtzs_4s_4s_fbits32(TySF);
   5194    if (1) test_fcvtzu_4s_4s_fbits1(TySF);
   5195    if (1) test_fcvtzu_4s_4s_fbits16(TySF);
   5196    if (1) test_fcvtzu_4s_4s_fbits32(TySF);
   5197    if (1) test_fcvtzs_2s_2s_fbits1(TySF);
   5198    if (1) test_fcvtzs_2s_2s_fbits16(TySF);
   5199    if (1) test_fcvtzs_2s_2s_fbits32(TySF);
   5200    if (1) test_fcvtzu_2s_2s_fbits1(TySF);
   5201    if (1) test_fcvtzu_2s_2s_fbits16(TySF);
   5202    if (1) test_fcvtzu_2s_2s_fbits32(TySF);
   5203    if (1) test_fcvtzs_w_s_fbits1(TySF);
   5204    if (1) test_fcvtzs_w_s_fbits16(TySF);
   5205    if (1) test_fcvtzs_w_s_fbits32(TySF);
   5206    if (1) test_fcvtzu_w_s_fbits1(TySF);
   5207    if (1) test_fcvtzu_w_s_fbits16(TySF);
   5208    if (1) test_fcvtzu_w_s_fbits32(TySF);
   5209    if (1) test_fcvtzs_x_s_fbits1(TySF);
   5210    if (1) test_fcvtzs_x_s_fbits32(TySF);
   5211    if (1) test_fcvtzs_x_s_fbits64(TySF);
   5212    if (1) test_fcvtzu_x_s_fbits1(TySF);
   5213    if (1) test_fcvtzu_x_s_fbits32(TySF);
   5214    if (1) test_fcvtzu_x_s_fbits64(TySF);
   5215    if (1) test_fcvtzs_w_d_fbits1(TyDF);
   5216    if (1) test_fcvtzs_w_d_fbits16(TyDF);
   5217    if (1) test_fcvtzs_w_d_fbits32(TyDF);
   5218    if (1) test_fcvtzu_w_d_fbits1(TyDF);
   5219    if (1) test_fcvtzu_w_d_fbits16(TyDF);
   5220    if (1) test_fcvtzu_w_d_fbits32(TyDF);
   5221    if (1) test_fcvtzs_x_d_fbits1(TyDF);
   5222    if (1) test_fcvtzs_x_d_fbits32(TyDF);
   5223    if (1) test_fcvtzs_x_d_fbits64(TyDF);
   5224    if (1) test_fcvtzu_x_d_fbits1(TyDF);
   5225    if (1) test_fcvtzu_x_d_fbits32(TyDF);
   5226    if (1) test_fcvtzu_x_d_fbits64(TyDF);
   5227 
   5228    // fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   5229    // fcvtxn    2s_2d,4s_2d
   5230    if (1) test_fcvtxn_s_d(TyDF);
   5231    if (1) test_fcvtxn_2s_2d(TyDF);
   5232    if (1) test_fcvtxn_4s_2d(TyDF);
   5233 
   5234    // scvtf     d,s        _#fbits
   5235    // ucvtf     d,s        _#fbits
   5236    // scvtf     2d,4s,2s   _#fbits
   5237    // ucvtf     2d,4s,2s   _#fbits
   5238    if (1) test_scvtf_d_d_fbits1(TyD);
   5239    if (1) test_scvtf_d_d_fbits32(TyD);
   5240    if (1) test_scvtf_d_d_fbits64(TyD);
   5241    if (1) test_ucvtf_d_d_fbits1(TyD);
   5242    if (1) test_ucvtf_d_d_fbits32(TyD);
   5243    if (1) test_ucvtf_d_d_fbits64(TyD);
   5244    if (1) test_scvtf_s_s_fbits1(TyS);
   5245    if (1) test_scvtf_s_s_fbits16(TyS);
   5246    if (1) test_scvtf_s_s_fbits32(TyS);
   5247    if (1) test_ucvtf_s_s_fbits1(TyS);
   5248    if (1) test_ucvtf_s_s_fbits16(TyS);
   5249    if (1) test_ucvtf_s_s_fbits32(TyS);
   5250    if (1) test_scvtf_2d_2d_fbits1(TyD);
   5251    if (1) test_scvtf_2d_2d_fbits32(TyD);
   5252    if (1) test_scvtf_2d_2d_fbits64(TyD);
   5253    if (1) test_ucvtf_2d_2d_fbits1(TyD);
   5254    if (1) test_ucvtf_2d_2d_fbits32(TyD);
   5255    if (1) test_ucvtf_2d_2d_fbits64(TyD);
   5256    if (1) test_scvtf_4s_4s_fbits1(TyS);
   5257    if (1) test_scvtf_4s_4s_fbits16(TyS);
   5258    if (1) test_scvtf_4s_4s_fbits32(TyS);
   5259    if (1) test_ucvtf_4s_4s_fbits1(TyS);
   5260    if (1) test_ucvtf_4s_4s_fbits16(TyS);
   5261    if (1) test_ucvtf_4s_4s_fbits32(TyS);
   5262    if (1) test_scvtf_2s_2s_fbits1(TyS);
   5263    if (1) test_scvtf_2s_2s_fbits16(TyS);
   5264    if (1) test_scvtf_2s_2s_fbits32(TyS);
   5265    if (1) test_ucvtf_2s_2s_fbits1(TyS);
   5266    if (1) test_ucvtf_2s_2s_fbits16(TyS);
   5267    if (1) test_ucvtf_2s_2s_fbits32(TyS);
   5268 
   5269    // scvtf     d,s
   5270    // ucvtf     d,s
   5271    // scvtf     2d,4s,2s
   5272    // ucvtf     2d,4s,2s
   5273    if (1) test_scvtf_d_d(TyD);
   5274    if (1) test_ucvtf_d_d(TyD);
   5275    if (1) test_scvtf_s_s(TyS);
   5276    if (1) test_ucvtf_s_s(TyS);
   5277    if (1) test_scvtf_2d_2d(TyD);
   5278    if (1) test_ucvtf_2d_2d(TyD);
   5279    if (1) test_scvtf_4s_4s(TyS);
   5280    if (1) test_ucvtf_4s_4s(TyS);
   5281    if (1) test_scvtf_2s_2s(TyS);
   5282    if (1) test_ucvtf_2s_2s(TyS);
   5283 
   5284    // scvtf     s_w, d_w, s_x, d_x,   _#fbits
   5285    // ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   5286    if (1) test_scvtf_s_w_fbits1(TyS);
   5287    if (1) test_scvtf_s_w_fbits16(TyS);
   5288    if (1) test_scvtf_s_w_fbits32(TyS);
   5289    if (1) test_scvtf_d_w_fbits1(TyS);
   5290    if (1) test_scvtf_d_w_fbits16(TyS);
   5291    if (1) test_scvtf_d_w_fbits32(TyS);
   5292    if (1) test_scvtf_s_x_fbits1(TyD);
   5293    if (1) test_scvtf_s_x_fbits32(TyD);
   5294    if (1) test_scvtf_s_x_fbits64(TyD);
   5295    if (1) test_scvtf_d_x_fbits1(TyD);
   5296    if (1) test_scvtf_d_x_fbits32(TyD);
   5297    if (1) test_scvtf_d_x_fbits64(TyD);
   5298    if (1) test_ucvtf_s_w_fbits1(TyS);
   5299    if (1) test_ucvtf_s_w_fbits16(TyS);
   5300    if (1) test_ucvtf_s_w_fbits32(TyS);
   5301    if (1) test_ucvtf_d_w_fbits1(TyS);
   5302    if (1) test_ucvtf_d_w_fbits16(TyS);
   5303    if (1) test_ucvtf_d_w_fbits32(TyS);
   5304    if (1) test_ucvtf_s_x_fbits1(TyD);
   5305    if (1) test_ucvtf_s_x_fbits32(TyD);
   5306    if (1) test_ucvtf_s_x_fbits64(TyD);
   5307    if (1) test_ucvtf_d_x_fbits1(TyD);
   5308    if (1) test_ucvtf_d_x_fbits32(TyD);
   5309    if (1) test_ucvtf_d_x_fbits64(TyD);
   5310 
   5311    // scvtf     s_w, d_w, s_x, d_x
   5312    // ucvtf     s_w, d_w, s_x, d_x
   5313    if (1) test_scvtf_s_w(TyS);
   5314    if (1) test_scvtf_d_w(TyS);
   5315    if (1) test_scvtf_s_x(TyD);
   5316    if (1) test_scvtf_d_x(TyD);
   5317    if (1) test_ucvtf_s_w(TyS);
   5318    if (1) test_ucvtf_d_w(TyS);
   5319    if (1) test_ucvtf_s_x(TyD);
   5320    if (1) test_ucvtf_d_x(TyD);
   5321 
   5322    // ======================== INT ========================
   5323 
   5324    // abs       d
   5325    // neg       d
   5326    if (1) test_abs_d_d(TyD);
   5327    if (1) test_neg_d_d(TyD);
   5328 
   5329    // abs       2d,4s,2s,8h,4h,16b,8b
   5330    // neg       2d,4s,2s,8h,4h,16b,8b
   5331    if (1) test_abs_2d_2d(TyD);
   5332    if (1) test_abs_4s_4s(TyS);
   5333    if (1) test_abs_2s_2s(TyS);
   5334    if (1) test_abs_8h_8h(TyH);
   5335    if (1) test_abs_4h_4h(TyH);
   5336    if (1) test_abs_16b_16b(TyB);
   5337    if (1) test_abs_8b_8b(TyB);
   5338    if (1) test_neg_2d_2d(TyD);
   5339    if (1) test_neg_4s_4s(TyS);
   5340    if (1) test_neg_2s_2s(TyS);
   5341    if (1) test_neg_8h_8h(TyH);
   5342    if (1) test_neg_4h_4h(TyH);
   5343    if (1) test_neg_16b_16b(TyB);
   5344    if (1) test_neg_8b_8b(TyB);
   5345 
   5346    // add       d
   5347    // sub       d
   5348    if (1) test_add_d_d_d(TyD);
   5349    if (1) test_sub_d_d_d(TyD);
   5350 
   5351    // add       2d,4s,2s,8h,4h,16b,8b
   5352    // sub       2d,4s,2s,8h,4h,16b,8b
   5353    if (1) test_add_2d_2d_2d(TyD);
   5354    if (1) test_add_4s_4s_4s(TyS);
   5355    if (1) test_add_2s_2s_2s(TyS);
   5356    if (1) test_add_8h_8h_8h(TyH);
   5357    if (1) test_add_4h_4h_4h(TyH);
   5358    if (1) test_add_16b_16b_16b(TyB);
   5359    if (1) test_add_8b_8b_8b(TyB);
   5360    if (1) test_sub_2d_2d_2d(TyD);
   5361    if (1) test_sub_4s_4s_4s(TyS);
   5362    if (1) test_sub_2s_2s_2s(TyS);
   5363    if (1) test_sub_8h_8h_8h(TyH);
   5364    if (1) test_sub_4h_4h_4h(TyH);
   5365    if (1) test_sub_16b_16b_16b(TyB);
   5366    if (1) test_sub_8b_8b_8b(TyB);
   5367 
   5368    // addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5369    // subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5370    // raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5371    // rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5372    if (1) test_addhn_2s_2d_2d(TyD);
   5373    if (1) test_addhn2_4s_2d_2d(TyD);
   5374    if (1) test_addhn_4h_4s_4s(TyS);
   5375    if (1) test_addhn2_8h_4s_4s(TyS);
   5376    if (1) test_addhn_8b_8h_8h(TyH);
   5377    if (1) test_addhn2_16b_8h_8h(TyH);
   5378    if (1) test_subhn_2s_2d_2d(TyD);
   5379    if (1) test_subhn2_4s_2d_2d(TyD);
   5380    if (1) test_subhn_4h_4s_4s(TyS);
   5381    if (1) test_subhn2_8h_4s_4s(TyS);
   5382    if (1) test_subhn_8b_8h_8h(TyH);
   5383    if (1) test_subhn2_16b_8h_8h(TyH);
   5384    if (1) test_raddhn_2s_2d_2d(TyD);
   5385    if (1) test_raddhn2_4s_2d_2d(TyD);
   5386    if (1) test_raddhn_4h_4s_4s(TyS);
   5387    if (1) test_raddhn2_8h_4s_4s(TyS);
   5388    if (1) test_raddhn_8b_8h_8h(TyH);
   5389    if (1) test_raddhn2_16b_8h_8h(TyH);
   5390    if (1) test_rsubhn_2s_2d_2d(TyD);
   5391    if (1) test_rsubhn2_4s_2d_2d(TyD);
   5392    if (1) test_rsubhn_4h_4s_4s(TyS);
   5393    if (1) test_rsubhn2_8h_4s_4s(TyS);
   5394    if (1) test_rsubhn_8b_8h_8h(TyH);
   5395    if (1) test_rsubhn2_16b_8h_8h(TyH);
   5396 
   5397    // addp     d (add pairs, across)
   5398    if (1) test_addp_d_2d(TyD);
   5399 
   5400    // addp     2d,4s,2s,8h,4h,16b,8b
   5401    if (1) test_addp_2d_2d_2d(TyD);
   5402    if (1) test_addp_4s_4s_4s(TyS);
   5403    if (1) test_addp_2s_2s_2s(TyS);
   5404    if (1) test_addp_8h_8h_8h(TyH);
   5405    if (1) test_addp_4h_4h_4h(TyH);
   5406    if (1) test_addp_16b_16b_16b(TyB);
   5407    if (1) test_addp_8b_8b_8b(TyB);
   5408 
   5409    // addv     4s,8h,4h,16b,18b (reduce across vector)
   5410    if (1) test_addv_s_4s(TyS);
   5411    if (1) test_addv_h_8h(TyH);
   5412    if (1) test_addv_h_4h(TyH);
   5413    if (1) test_addv_b_16b(TyB);
   5414    if (1) test_addv_b_8b(TyB);
   5415 
   5416    // and      16b,8b
   5417    // bic      16b,8b
   5418    // orn      16b,8b
   5419    // orr      16b,8b
   5420    if (1) test_and_16b_16b_16b(TyB);
   5421    if (1) test_and_8b_8b_8b(TyB);
   5422    if (1) test_bic_16b_16b_16b(TyB);
   5423    if (1) test_bic_8b_8b_8b(TyB);
   5424    if (1) test_orr_16b_16b_16b(TyB);
   5425    if (1) test_orr_8b_8b_8b(TyB);
   5426    if (1) test_orn_16b_16b_16b(TyB);
   5427    if (1) test_orn_8b_8b_8b(TyB);
   5428 
   5429    // orr      8h,4h   #imm8, LSL #0 or 8
   5430    // orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5431    // bic      8h,4h   #imm8, LSL #0 or 8
   5432    // bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5433    // movi and mvni are very similar, a superset of these.
   5434    // Cases are below.
   5435    if (1) test_orr_8h_0x5A_lsl0(TyH);
   5436    if (1) test_orr_8h_0xA5_lsl8(TyH);
   5437    if (1) test_orr_4h_0x5A_lsl0(TyH);
   5438    if (1) test_orr_4h_0xA5_lsl8(TyH);
   5439    if (1) test_orr_4s_0x5A_lsl0(TyS);
   5440    if (1) test_orr_4s_0x6B_lsl8(TyS);
   5441    if (1) test_orr_4s_0x49_lsl16(TyS);
   5442    if (1) test_orr_4s_0x3D_lsl24(TyS);
   5443    if (1) test_orr_2s_0x5A_lsl0(TyS);
   5444    if (1) test_orr_2s_0x6B_lsl8(TyS);
   5445    if (1) test_orr_2s_0x49_lsl16(TyS);
   5446    if (1) test_orr_2s_0x3D_lsl24(TyS);
   5447    if (1) test_bic_8h_0x5A_lsl0(TyH);
   5448    if (1) test_bic_8h_0xA5_lsl8(TyH);
   5449    if (1) test_bic_4h_0x5A_lsl0(TyH);
   5450    if (1) test_bic_4h_0xA5_lsl8(TyH);
   5451    if (1) test_bic_4s_0x5A_lsl0(TyS);
   5452    if (1) test_bic_4s_0x6B_lsl8(TyS);
   5453    if (1) test_bic_4s_0x49_lsl16(TyS);
   5454    if (1) test_bic_4s_0x3D_lsl24(TyS);
   5455    if (1) test_bic_2s_0x5A_lsl0(TyS);
   5456    if (1) test_bic_2s_0x6B_lsl8(TyS);
   5457    if (1) test_bic_2s_0x49_lsl16(TyS);
   5458    if (1) test_bic_2s_0x3D_lsl24(TyS);
   5459 
   5460    // bif      16b,8b (vector) (bit insert if false)
   5461    // bit      16b,8b (vector) (bit insert if true)
   5462    // bsl      16b,8b (vector) (bit select)
   5463    // eor      16b,8b (vector)
   5464    if (1) test_bif_16b_16b_16b(TyB);
   5465    if (1) test_bif_8b_8b_8b(TyB);
   5466    if (1) test_bit_16b_16b_16b(TyB);
   5467    if (1) test_bit_8b_8b_8b(TyB);
   5468    if (1) test_bsl_16b_16b_16b(TyB);
   5469    if (1) test_bsl_8b_8b_8b(TyB);
   5470    if (1) test_eor_16b_16b_16b(TyB);
   5471    if (1) test_eor_8b_8b_8b(TyB);
   5472 
   5473    // cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   5474    // clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   5475    if (1) test_cls_4s_4s(TyS);
   5476    if (1) test_cls_2s_2s(TyS);
   5477    if (1) test_cls_8h_8h(TyH);
   5478    if (1) test_cls_4h_4h(TyH);
   5479    if (1) test_cls_16b_16b(TyB);
   5480    if (1) test_cls_8b_8b(TyB);
   5481    if (1) test_clz_4s_4s(TyS);
   5482    if (1) test_clz_2s_2s(TyS);
   5483    if (1) test_clz_8h_8h(TyH);
   5484    if (1) test_clz_4h_4h(TyH);
   5485    if (1) test_clz_16b_16b(TyB);
   5486    if (1) test_clz_8b_8b(TyB);
   5487 
   5488    // cmeq     d
   5489    // cmge     d
   5490    // cmgt     d
   5491    // cmhi     d
   5492    // cmhs     d
   5493    // cmtst    d
   5494    if (1) test_cmeq_d_d_d(TyD);
   5495    if (1) test_cmge_d_d_d(TyD);
   5496    if (1) test_cmgt_d_d_d(TyD);
   5497    if (1) test_cmhi_d_d_d(TyD);
   5498    if (1) test_cmhs_d_d_d(TyD);
   5499    if (1) test_cmtst_d_d_d(TyD);
   5500 
   5501    // cmeq     2d,4s,2s,8h,4h,16b,8b
   5502    // cmge     2d,4s,2s,8h,4h,16b,8b
   5503    // cmgt     2d,4s,2s,8h,4h,16b,8b
   5504    // cmhi     2d,4s,2s,8h,4h,16b,8b
   5505    // cmhs     2d,4s,2s,8h,4h,16b,8b
   5506    // cmtst    2d,4s,2s,8h,4h,16b,8b
   5507    if (1) test_cmeq_2d_2d_2d(TyD);
   5508    if (1) test_cmeq_4s_4s_4s(TyS);
   5509    if (1) test_cmeq_2s_2s_2s(TyS);
   5510    if (1) test_cmeq_8h_8h_8h(TyH);
   5511    if (1) test_cmeq_4h_4h_4h(TyH);
   5512    if (1) test_cmeq_16b_16b_16b(TyB);
   5513    if (1) test_cmeq_8b_8b_8b(TyB);
   5514    if (1) test_cmge_2d_2d_2d(TyD);
   5515    if (1) test_cmge_4s_4s_4s(TyS);
   5516    if (1) test_cmge_2s_2s_2s(TyS);
   5517    if (1) test_cmge_8h_8h_8h(TyH);
   5518    if (1) test_cmge_4h_4h_4h(TyH);
   5519    if (1) test_cmge_16b_16b_16b(TyB);
   5520    if (1) test_cmge_8b_8b_8b(TyB);
   5521    if (1) test_cmgt_2d_2d_2d(TyD);
   5522    if (1) test_cmgt_4s_4s_4s(TyS);
   5523    if (1) test_cmgt_2s_2s_2s(TyS);
   5524    if (1) test_cmgt_8h_8h_8h(TyH);
   5525    if (1) test_cmgt_4h_4h_4h(TyH);
   5526    if (1) test_cmgt_16b_16b_16b(TyB);
   5527    if (1) test_cmgt_8b_8b_8b(TyB);
   5528    if (1) test_cmhi_2d_2d_2d(TyD);
   5529    if (1) test_cmhi_4s_4s_4s(TyS);
   5530    if (1) test_cmhi_2s_2s_2s(TyS);
   5531    if (1) test_cmhi_8h_8h_8h(TyH);
   5532    if (1) test_cmhi_4h_4h_4h(TyH);
   5533    if (1) test_cmhi_16b_16b_16b(TyB);
   5534    if (1) test_cmhi_8b_8b_8b(TyB);
   5535    if (1) test_cmhs_2d_2d_2d(TyD);
   5536    if (1) test_cmhs_4s_4s_4s(TyS);
   5537    if (1) test_cmhs_2s_2s_2s(TyS);
   5538    if (1) test_cmhs_8h_8h_8h(TyH);
   5539    if (1) test_cmhs_4h_4h_4h(TyH);
   5540    if (1) test_cmhs_16b_16b_16b(TyB);
   5541    if (1) test_cmhs_8b_8b_8b(TyB);
   5542    if (1) test_cmtst_2d_2d_2d(TyD);
   5543    if (1) test_cmtst_4s_4s_4s(TyS);
   5544    if (1) test_cmtst_2s_2s_2s(TyS);
   5545    if (1) test_cmtst_8h_8h_8h(TyH);
   5546    if (1) test_cmtst_4h_4h_4h(TyH);
   5547    if (1) test_cmtst_16b_16b_16b(TyB);
   5548    if (1) test_cmtst_8b_8b_8b(TyB);
   5549 
   5550    // cmeq_z   d
   5551    // cmge_z   d
   5552    // cmgt_z   d
   5553    // cmle_z   d
   5554    // cmlt_z   d
   5555    if (1) test_cmeq_zero_d_d(TyD);
   5556    if (1) test_cmge_zero_d_d(TyD);
   5557    if (1) test_cmgt_zero_d_d(TyD);
   5558    if (1) test_cmle_zero_d_d(TyD);
   5559    if (1) test_cmlt_zero_d_d(TyD);
   5560 
   5561    // cmeq_z   2d,4s,2s,8h,4h,16b,8b
   5562    // cmge_z   2d,4s,2s,8h,4h,16b,8b
   5563    // cmgt_z   2d,4s,2s,8h,4h,16b,8b
   5564    // cmle_z   2d,4s,2s,8h,4h,16b,8b
   5565    // cmlt_z   2d,4s,2s,8h,4h,16b,8b
   5566    if (1) test_cmeq_zero_2d_2d(TyD);
   5567    if (1) test_cmeq_zero_4s_4s(TyS);
   5568    if (1) test_cmeq_zero_2s_2s(TyS);
   5569    if (1) test_cmeq_zero_8h_8h(TyH);
   5570    if (1) test_cmeq_zero_4h_4h(TyH);
   5571    if (1) test_cmeq_zero_16b_16b(TyB);
   5572    if (1) test_cmeq_zero_8b_8b(TyB);
   5573    if (1) test_cmge_zero_2d_2d(TyD);
   5574    if (1) test_cmge_zero_4s_4s(TyS);
   5575    if (1) test_cmge_zero_2s_2s(TyS);
   5576    if (1) test_cmge_zero_8h_8h(TyH);
   5577    if (1) test_cmge_zero_4h_4h(TyH);
   5578    if (1) test_cmge_zero_16b_16b(TyB);
   5579    if (1) test_cmge_zero_8b_8b(TyB);
   5580    if (1) test_cmgt_zero_2d_2d(TyD);
   5581    if (1) test_cmgt_zero_4s_4s(TyS);
   5582    if (1) test_cmgt_zero_2s_2s(TyS);
   5583    if (1) test_cmgt_zero_8h_8h(TyH);
   5584    if (1) test_cmgt_zero_4h_4h(TyH);
   5585    if (1) test_cmgt_zero_16b_16b(TyB);
   5586    if (1) test_cmgt_zero_8b_8b(TyB);
   5587    if (1) test_cmle_zero_2d_2d(TyD);
   5588    if (1) test_cmle_zero_4s_4s(TyS);
   5589    if (1) test_cmle_zero_2s_2s(TyS);
   5590    if (1) test_cmle_zero_8h_8h(TyH);
   5591    if (1) test_cmle_zero_4h_4h(TyH);
   5592    if (1) test_cmle_zero_16b_16b(TyB);
   5593    if (1) test_cmle_zero_8b_8b(TyB);
   5594    if (1) test_cmlt_zero_2d_2d(TyD);
   5595    if (1) test_cmlt_zero_4s_4s(TyS);
   5596    if (1) test_cmlt_zero_2s_2s(TyS);
   5597    if (1) test_cmlt_zero_8h_8h(TyH);
   5598    if (1) test_cmlt_zero_4h_4h(TyH);
   5599    if (1) test_cmlt_zero_16b_16b(TyB);
   5600    if (1) test_cmlt_zero_8b_8b(TyB);
   5601 
   5602    // cnt      16b,8b (population count per byte)
   5603    if (1) test_cnt_16b_16b(TyB);
   5604    if (1) test_cnt_8b_8b(TyB);
   5605 
   5606    // dup      d,s,h,b (vec elem to scalar)
   5607    if (1) test_dup_d_d0(TyD);
   5608    if (1) test_dup_d_d1(TyD);
   5609    if (1) test_dup_s_s0(TyS);
   5610    if (1) test_dup_s_s3(TyS);
   5611    if (1) test_dup_h_h0(TyH);
   5612    if (1) test_dup_h_h6(TyH);
   5613    if (1) test_dup_b_b0(TyB);
   5614    if (1) test_dup_b_b13(TyB);
   5615 
   5616    // dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   5617    if (1) test_dup_2d_d0(TyD);
   5618    if (1) test_dup_2d_d1(TyD);
   5619    if (1) test_dup_4s_s0(TyS);
   5620    if (1) test_dup_4s_s3(TyS);
   5621    if (1) test_dup_2s_s0(TyS);
   5622    if (1) test_dup_2s_s2(TyS);
   5623    if (1) test_dup_8h_h0(TyH);
   5624    if (1) test_dup_8h_h6(TyH);
   5625    if (1) test_dup_4h_h1(TyH);
   5626    if (1) test_dup_4h_h5(TyH);
   5627    if (1) test_dup_16b_b2(TyB);
   5628    if (1) test_dup_16b_b12(TyB);
   5629    if (1) test_dup_8b_b3(TyB);
   5630    if (1) test_dup_8b_b13(TyB);
   5631 
   5632    // dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   5633    if (1) test_dup_2d_x(TyD);
   5634    if (1) test_dup_4s_w(TyS);
   5635    if (1) test_dup_2s_w(TyS);
   5636    if (1) test_dup_8h_w(TyH);
   5637    if (1) test_dup_4h_w(TyH);
   5638    if (1) test_dup_16b_w(TyB);
   5639    if (1) test_dup_8b_w(TyB);
   5640 
   5641    // ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   5642    if (1) test_ext_16b_16b_16b_0x0(TyB);
   5643    if (1) test_ext_16b_16b_16b_0x1(TyB);
   5644    if (1) test_ext_16b_16b_16b_0x2(TyB);
   5645    if (1) test_ext_16b_16b_16b_0x3(TyB);
   5646    if (1) test_ext_16b_16b_16b_0x4(TyB);
   5647    if (1) test_ext_16b_16b_16b_0x5(TyB);
   5648    if (1) test_ext_16b_16b_16b_0x6(TyB);
   5649    if (1) test_ext_16b_16b_16b_0x7(TyB);
   5650    if (1) test_ext_16b_16b_16b_0x8(TyB);
   5651    if (1) test_ext_16b_16b_16b_0x9(TyB);
   5652    if (1) test_ext_16b_16b_16b_0xA(TyB);
   5653    if (1) test_ext_16b_16b_16b_0xB(TyB);
   5654    if (1) test_ext_16b_16b_16b_0xC(TyB);
   5655    if (1) test_ext_16b_16b_16b_0xD(TyB);
   5656    if (1) test_ext_16b_16b_16b_0xE(TyB);
   5657    if (1) test_ext_16b_16b_16b_0xF(TyB);
   5658    if (1) test_ext_8b_8b_8b_0x0(TyB);
   5659    if (1) test_ext_8b_8b_8b_0x1(TyB);
   5660    if (1) test_ext_8b_8b_8b_0x2(TyB);
   5661    if (1) test_ext_8b_8b_8b_0x3(TyB);
   5662    if (1) test_ext_8b_8b_8b_0x4(TyB);
   5663    if (1) test_ext_8b_8b_8b_0x5(TyB);
   5664    if (1) test_ext_8b_8b_8b_0x6(TyB);
   5665    if (1) test_ext_8b_8b_8b_0x7(TyB);
   5666 
   5667    // ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   5668    if (1) test_ins_d0_d0(TyD);
   5669    if (1) test_ins_d0_d1(TyD);
   5670    if (1) test_ins_d1_d0(TyD);
   5671    if (1) test_ins_d1_d1(TyD);
   5672    if (1) test_ins_s0_s2(TyS);
   5673    if (1) test_ins_s3_s0(TyS);
   5674    if (1) test_ins_s2_s1(TyS);
   5675    if (1) test_ins_s1_s3(TyS);
   5676    if (1) test_ins_h0_h6(TyH);
   5677    if (1) test_ins_h7_h0(TyH);
   5678    if (1) test_ins_h6_h1(TyH);
   5679    if (1) test_ins_h1_h7(TyH);
   5680    if (1) test_ins_b0_b14(TyB);
   5681    if (1) test_ins_b15_b8(TyB);
   5682    if (1) test_ins_b13_b9(TyB);
   5683    if (1) test_ins_b5_b12(TyB);
   5684 
   5685    // ins      d[]_x, s[]_w, h[]_w, b[]_w
   5686    if (1) test_INS_general();
   5687 
   5688    // mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5689    // mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5690    // mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5691    if (1) test_mla_4s_4s_s0(TyS);
   5692    if (1) test_mla_4s_4s_s3(TyS);
   5693    if (1) test_mla_2s_2s_s0(TyS);
   5694    if (1) test_mla_2s_2s_s3(TyS);
   5695    if (1) test_mla_8h_8h_h1(TyH);
   5696    if (1) test_mla_8h_8h_h5(TyH);
   5697    if (1) test_mla_4h_4h_h2(TyH);
   5698    if (1) test_mla_4h_4h_h7(TyH);
   5699    if (1) test_mls_4s_4s_s0(TyS);
   5700    if (1) test_mls_4s_4s_s3(TyS);
   5701    if (1) test_mls_2s_2s_s0(TyS);
   5702    if (1) test_mls_2s_2s_s3(TyS);
   5703    if (1) test_mls_8h_8h_h1(TyH);
   5704    if (1) test_mls_8h_8h_h5(TyH);
   5705    if (1) test_mls_4h_4h_h2(TyH);
   5706    if (1) test_mls_4h_4h_h7(TyH);
   5707    if (1) test_mul_4s_4s_s0(TyS);
   5708    if (1) test_mul_4s_4s_s3(TyS);
   5709    if (1) test_mul_2s_2s_s0(TyS);
   5710    if (1) test_mul_2s_2s_s3(TyS);
   5711    if (1) test_mul_8h_8h_h1(TyH);
   5712    if (1) test_mul_8h_8h_h5(TyH);
   5713    if (1) test_mul_4h_4h_h2(TyH);
   5714    if (1) test_mul_4h_4h_h7(TyH);
   5715 
   5716    // mla   4s,2s,8h,4h,16b,8b
   5717    // mls   4s,2s,8h,4h,16b,8b
   5718    // mul   4s,2s,8h,4h,16b,8b
   5719    if (1) test_mla_4s_4s_4s(TyS);
   5720    if (1) test_mla_2s_2s_2s(TyS);
   5721    if (1) test_mla_8h_8h_8h(TyH);
   5722    if (1) test_mla_4h_4h_4h(TyH);
   5723    if (1) test_mla_16b_16b_16b(TyB);
   5724    if (1) test_mla_8b_8b_8b(TyB);
   5725    if (1) test_mls_4s_4s_4s(TyS);
   5726    if (1) test_mls_2s_2s_2s(TyS);
   5727    if (1) test_mls_8h_8h_8h(TyH);
   5728    if (1) test_mls_4h_4h_4h(TyH);
   5729    if (1) test_mls_16b_16b_16b(TyB);
   5730    if (1) test_mls_8b_8b_8b(TyB);
   5731    if (1) test_mul_4s_4s_4s(TyS);
   5732    if (1) test_mul_2s_2s_2s(TyS);
   5733    if (1) test_mul_8h_8h_8h(TyH);
   5734    if (1) test_mul_4h_4h_4h(TyH);
   5735    if (1) test_mul_16b_16b_16b(TyB);
   5736    if (1) test_mul_8b_8b_8b(TyB);
   5737 
   5738    // Some of these movi and mvni cases are similar to orr and bic
   5739    // cases with immediates.  Maybe they should be moved together.
   5740    // movi  16b,8b   #imm8, LSL #0
   5741    if (1) test_movi_16b_0x9C_lsl0(TyB);
   5742    if (1) test_movi_8b_0x8B_lsl0(TyB);
   5743 
   5744    // movi  8h,4h    #imm8, LSL #0 or 8
   5745    // mvni  8h,4h    #imm8, LSL #0 or 8
   5746    if (1) test_movi_8h_0x5A_lsl0(TyH);
   5747    if (1) test_movi_8h_0xA5_lsl8(TyH);
   5748    if (1) test_movi_4h_0x5A_lsl0(TyH);
   5749    if (1) test_movi_4h_0xA5_lsl8(TyH);
   5750    if (1) test_mvni_8h_0x5A_lsl0(TyH);
   5751    if (1) test_mvni_8h_0xA5_lsl8(TyH);
   5752    if (1) test_mvni_4h_0x5A_lsl0(TyH);
   5753    if (1) test_mvni_4h_0xA5_lsl8(TyH);
   5754 
   5755    // movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   5756    // mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   5757    if (1) test_movi_4s_0x5A_lsl0(TyS);
   5758    if (1) test_movi_4s_0x6B_lsl8(TyS);
   5759    if (1) test_movi_4s_0x49_lsl16(TyS);
   5760    if (1) test_movi_4s_0x3D_lsl24(TyS);
   5761    if (1) test_movi_2s_0x5A_lsl0(TyS);
   5762    if (1) test_movi_2s_0x6B_lsl8(TyS);
   5763    if (1) test_movi_2s_0x49_lsl16(TyS);
   5764    if (1) test_movi_2s_0x3D_lsl24(TyS);
   5765    if (1) test_mvni_4s_0x5A_lsl0(TyS);
   5766    if (1) test_mvni_4s_0x6B_lsl8(TyS);
   5767    if (1) test_mvni_4s_0x49_lsl16(TyS);
   5768    if (1) test_mvni_4s_0x3D_lsl24(TyS);
   5769    if (1) test_mvni_2s_0x5A_lsl0(TyS);
   5770    if (1) test_mvni_2s_0x6B_lsl8(TyS);
   5771    if (1) test_mvni_2s_0x49_lsl16(TyS);
   5772    if (1) test_mvni_2s_0x3D_lsl24(TyS);
   5773 
   5774    // movi  4s,2s    #imm8, MSL #8 or 16
   5775    // mvni  4s,2s    #imm8, MSL #8 or 16
   5776    if (1) test_movi_4s_0x6B_msl8(TyS);
   5777    if (1) test_movi_4s_0x94_msl16(TyS);
   5778    if (1) test_movi_2s_0x7A_msl8(TyS);
   5779    if (1) test_movi_2s_0xA5_msl16(TyS);
   5780    if (1) test_mvni_4s_0x6B_msl8(TyS);
   5781    if (1) test_mvni_4s_0x94_msl16(TyS);
   5782    if (1) test_mvni_2s_0x7A_msl8(TyS);
   5783    if (1) test_mvni_2s_0xA5_msl16(TyS);
   5784 
   5785    // movi  d,       #imm64
   5786    // movi  2d,      #imm64
   5787    if (1) test_movi_d_0xA5(TyD);
   5788    if (1) test_movi_2d_0xB4(TyD);
   5789 
   5790    // not   16b,8b
   5791    if (1) test_not_16b_16b(TyB);
   5792    if (1) test_not_8b_8b(TyB);
   5793 
   5794    // pmul  16b,8b
   5795    if (1) test_pmul_16b_16b_16b(TyB);
   5796    if (1) test_pmul_8b_8b_8b(TyB);
   5797 
   5798    // pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1q_2d_2d
   5799    if (1) test_pmull_8h_8b_8b(TyB);
   5800    if (1) test_pmull2_8h_16b_16b(TyB);
   5801    //if (0) test_pmull_1q_1d_1d(TyD);
   5802    //if (0) test_pmull_1q_2d_2d(TyD);
   5803 
   5804    // rbit    16b,8b
   5805    // rev16   16b,8b
   5806    // rev32   16b,8b,8h,4h
   5807    // rev64   16b,8b,8h,4h,4s,2s
   5808    if (1) test_rbit_16b_16b(TyB);
   5809    if (1) test_rbit_8b_8b(TyB);
   5810    if (1) test_rev16_16b_16b(TyB);
   5811    if (1) test_rev16_8b_8b(TyB);
   5812    if (1) test_rev32_16b_16b(TyB);
   5813    if (1) test_rev32_8b_8b(TyB);
   5814    if (1) test_rev32_8h_8h(TyH);
   5815    if (1) test_rev32_4h_4h(TyH);
   5816    if (1) test_rev64_16b_16b(TyB);
   5817    if (1) test_rev64_8b_8b(TyB);
   5818    if (1) test_rev64_8h_8h(TyH);
   5819    if (1) test_rev64_4h_4h(TyH);
   5820    if (1) test_rev64_4s_4s(TyS);
   5821    if (1) test_rev64_2s_2s(TyS);
   5822 
   5823    // saba      16b,8b,8h,4h,4s,2s
   5824    // uaba      16b,8b,8h,4h,4s,2s
   5825    if (1) test_saba_4s_4s_4s(TyS);
   5826    if (1) test_saba_2s_2s_2s(TyS);
   5827    if (1) test_saba_8h_8h_8h(TyH);
   5828    if (1) test_saba_4h_4h_4h(TyH);
   5829    if (1) test_saba_16b_16b_16b(TyB);
   5830    if (1) test_saba_8b_8b_8b(TyB);
   5831    if (1) test_uaba_4s_4s_4s(TyS);
   5832    if (1) test_uaba_2s_2s_2s(TyS);
   5833    if (1) test_uaba_8h_8h_8h(TyH);
   5834    if (1) test_uaba_4h_4h_4h(TyH);
   5835    if (1) test_uaba_16b_16b_16b(TyB);
   5836    if (1) test_uaba_8b_8b_8b(TyB);
   5837 
   5838    // sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5839    // uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5840    if (1) test_sabal_2d_2s_2s(TyS);
   5841    if (1) test_sabal2_2d_4s_4s(TyS);
   5842    if (1) test_sabal_4s_4h_4h(TyH);
   5843    if (1) test_sabal2_4s_8h_8h(TyH);
   5844    if (1) test_sabal_8h_8b_8b(TyB);
   5845    if (1) test_sabal2_8h_16b_16b(TyB);
   5846    if (1) test_uabal_2d_2s_2s(TyS);
   5847    if (1) test_uabal2_2d_4s_4s(TyS);
   5848    if (1) test_uabal_4s_4h_4h(TyH);
   5849    if (1) test_uabal2_4s_8h_8h(TyH);
   5850    if (1) test_uabal_8h_8b_8b(TyB);
   5851    if (1) test_uabal2_8h_16b_16b(TyB);
   5852 
   5853    // sabd      16b,8b,8h,4h,4s,2s
   5854    // uabd      16b,8b,8h,4h,4s,2s
   5855    if (1) test_sabd_4s_4s_4s(TyS);
   5856    if (1) test_sabd_2s_2s_2s(TyS);
   5857    if (1) test_sabd_8h_8h_8h(TyH);
   5858    if (1) test_sabd_4h_4h_4h(TyH);
   5859    if (1) test_sabd_16b_16b_16b(TyB);
   5860    if (1) test_sabd_8b_8b_8b(TyB);
   5861    if (1) test_uabd_4s_4s_4s(TyS);
   5862    if (1) test_uabd_2s_2s_2s(TyS);
   5863    if (1) test_uabd_8h_8h_8h(TyH);
   5864    if (1) test_uabd_4h_4h_4h(TyH);
   5865    if (1) test_uabd_16b_16b_16b(TyB);
   5866    if (1) test_uabd_8b_8b_8b(TyB);
   5867 
   5868    // sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5869    // uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5870    if (1) test_sabdl_2d_2s_2s(TyS);
   5871    if (1) test_sabdl2_2d_4s_4s(TyS);
   5872    if (1) test_sabdl_4s_4h_4h(TyH);
   5873    if (1) test_sabdl2_4s_8h_8h(TyH);
   5874    if (1) test_sabdl_8h_8b_8b(TyB);
   5875    if (1) test_sabdl2_8h_16b_16b(TyB);
   5876    if (1) test_uabdl_2d_2s_2s(TyS);
   5877    if (1) test_uabdl2_2d_4s_4s(TyS);
   5878    if (1) test_uabdl_4s_4h_4h(TyH);
   5879    if (1) test_uabdl2_4s_8h_8h(TyH);
   5880    if (1) test_uabdl_8h_8b_8b(TyB);
   5881    if (1) test_uabdl2_8h_16b_16b(TyB);
   5882 
   5883    // sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5884    // uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5885    if (1) test_sadalp_1d_2s(TyS);
   5886    if (1) test_sadalp_2d_4s(TyS);
   5887    if (1) test_sadalp_2s_4h(TyH);
   5888    if (1) test_sadalp_4s_8h(TyH);
   5889    if (1) test_sadalp_4h_8b(TyB);
   5890    if (1) test_sadalp_8h_16b(TyB);
   5891    if (1) test_uadalp_1d_2s(TyS);
   5892    if (1) test_uadalp_2d_4s(TyS);
   5893    if (1) test_uadalp_2s_4h(TyH);
   5894    if (1) test_uadalp_4s_8h(TyH);
   5895    if (1) test_uadalp_4h_8b(TyB);
   5896    if (1) test_uadalp_8h_16b(TyB);
   5897 
   5898    // saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5899    // uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5900    // ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5901    // usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5902    if (1) test_saddl_2d_2s_2s(TyS);
   5903    if (1) test_saddl2_2d_4s_4s(TyS);
   5904    if (1) test_saddl_4s_4h_4h(TyH);
   5905    if (1) test_saddl2_4s_8h_8h(TyH);
   5906    if (1) test_saddl_8h_8b_8b(TyB);
   5907    if (1) test_saddl2_8h_16b_16b(TyB);
   5908    if (1) test_uaddl_2d_2s_2s(TyS);
   5909    if (1) test_uaddl2_2d_4s_4s(TyS);
   5910    if (1) test_uaddl_4s_4h_4h(TyH);
   5911    if (1) test_uaddl2_4s_8h_8h(TyH);
   5912    if (1) test_uaddl_8h_8b_8b(TyB);
   5913    if (1) test_uaddl2_8h_16b_16b(TyB);
   5914    if (1) test_ssubl_2d_2s_2s(TyS);
   5915    if (1) test_ssubl2_2d_4s_4s(TyS);
   5916    if (1) test_ssubl_4s_4h_4h(TyH);
   5917    if (1) test_ssubl2_4s_8h_8h(TyH);
   5918    if (1) test_ssubl_8h_8b_8b(TyB);
   5919    if (1) test_ssubl2_8h_16b_16b(TyB);
   5920    if (1) test_usubl_2d_2s_2s(TyS);
   5921    if (1) test_usubl2_2d_4s_4s(TyS);
   5922    if (1) test_usubl_4s_4h_4h(TyH);
   5923    if (1) test_usubl2_4s_8h_8h(TyH);
   5924    if (1) test_usubl_8h_8b_8b(TyB);
   5925    if (1) test_usubl2_8h_16b_16b(TyB);
   5926 
   5927    // saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5928    // uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5929    if (1) test_saddlp_1d_2s(TyS);
   5930    if (1) test_saddlp_2d_4s(TyS);
   5931    if (1) test_saddlp_2s_4h(TyH);
   5932    if (1) test_saddlp_4s_8h(TyH);
   5933    if (1) test_saddlp_4h_8b(TyB);
   5934    if (1) test_saddlp_8h_16b(TyB);
   5935    if (1) test_uaddlp_1d_2s(TyS);
   5936    if (1) test_uaddlp_2d_4s(TyS);
   5937    if (1) test_uaddlp_2s_4h(TyH);
   5938    if (1) test_uaddlp_4s_8h(TyH);
   5939    if (1) test_uaddlp_4h_8b(TyB);
   5940    if (1) test_uaddlp_8h_16b(TyB);
   5941 
   5942    // saddlv    h_16b/8b, s_8h/4h, d_4s
   5943    // uaddlv    h_16b/8b, s_8h/4h, d_4s
   5944    if (1) test_saddlv_h_16b(TyB);
   5945    if (1) test_saddlv_h_8b(TyB);
   5946    if (1) test_saddlv_s_8h(TyH);
   5947    if (1) test_saddlv_s_4h(TyH);
   5948    if (1) test_saddlv_d_4s(TyH);
   5949    if (1) test_uaddlv_h_16b(TyB);
   5950    if (1) test_uaddlv_h_8b(TyB);
   5951    if (1) test_uaddlv_s_8h(TyH);
   5952    if (1) test_uaddlv_s_4h(TyH);
   5953    if (1) test_uaddlv_d_4s(TyH);
   5954 
   5955    // saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5956    // uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5957    // ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5958    // usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5959    if (1) test_saddw2_8h_8h_16b(TyB);
   5960    if (1) test_saddw_8h_8h_8b(TyB);
   5961    if (1) test_saddw2_4s_4s_8h(TyH);
   5962    if (1) test_saddw_4s_4s_4h(TyH);
   5963    if (1) test_saddw2_2d_2d_4s(TyS);
   5964    if (1) test_saddw_2d_2d_2s(TyS);
   5965    if (1) test_uaddw2_8h_8h_16b(TyB);
   5966    if (1) test_uaddw_8h_8h_8b(TyB);
   5967    if (1) test_uaddw2_4s_4s_8h(TyH);
   5968    if (1) test_uaddw_4s_4s_4h(TyH);
   5969    if (1) test_uaddw2_2d_2d_4s(TyS);
   5970    if (1) test_uaddw_2d_2d_2s(TyS);
   5971    if (1) test_ssubw2_8h_8h_16b(TyB);
   5972    if (1) test_ssubw_8h_8h_8b(TyB);
   5973    if (1) test_ssubw2_4s_4s_8h(TyH);
   5974    if (1) test_ssubw_4s_4s_4h(TyH);
   5975    if (1) test_ssubw2_2d_2d_4s(TyS);
   5976    if (1) test_ssubw_2d_2d_2s(TyS);
   5977    if (1) test_usubw2_8h_8h_16b(TyB);
   5978    if (1) test_usubw_8h_8h_8b(TyB);
   5979    if (1) test_usubw2_4s_4s_8h(TyH);
   5980    if (1) test_usubw_4s_4s_4h(TyH);
   5981    if (1) test_usubw2_2d_2d_4s(TyS);
   5982    if (1) test_usubw_2d_2d_2s(TyS);
   5983 
   5984    // shadd        16b,8b,8h,4h,4s,2s
   5985    // uhadd        16b,8b,8h,4h,4s,2s
   5986    // shsub        16b,8b,8h,4h,4s,2s
   5987    // uhsub        16b,8b,8h,4h,4s,2s
   5988    if (1) test_shadd_4s_4s_4s(TyS);
   5989    if (1) test_shadd_2s_2s_2s(TyS);
   5990    if (1) test_shadd_8h_8h_8h(TyH);
   5991    if (1) test_shadd_4h_4h_4h(TyH);
   5992    if (1) test_shadd_16b_16b_16b(TyB);
   5993    if (1) test_shadd_8b_8b_8b(TyB);
   5994    if (1) test_uhadd_4s_4s_4s(TyS);
   5995    if (1) test_uhadd_2s_2s_2s(TyS);
   5996    if (1) test_uhadd_8h_8h_8h(TyH);
   5997    if (1) test_uhadd_4h_4h_4h(TyH);
   5998    if (1) test_uhadd_16b_16b_16b(TyB);
   5999    if (1) test_uhadd_8b_8b_8b(TyB);
   6000    if (1) test_shsub_4s_4s_4s(TyS);
   6001    if (1) test_shsub_2s_2s_2s(TyS);
   6002    if (1) test_shsub_8h_8h_8h(TyH);
   6003    if (1) test_shsub_4h_4h_4h(TyH);
   6004    if (1) test_shsub_16b_16b_16b(TyB);
   6005    if (1) test_shsub_8b_8b_8b(TyB);
   6006    if (1) test_uhsub_4s_4s_4s(TyS);
   6007    if (1) test_uhsub_2s_2s_2s(TyS);
   6008    if (1) test_uhsub_8h_8h_8h(TyH);
   6009    if (1) test_uhsub_4h_4h_4h(TyH);
   6010    if (1) test_uhsub_16b_16b_16b(TyB);
   6011    if (1) test_uhsub_8b_8b_8b(TyB);
   6012 
   6013    // shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   6014    if (1) test_shll_8h_8b_8(TyB);
   6015    if (1) test_shll2_8h_16b_8(TyB);
   6016    if (1) test_shll_4s_4h_16(TyH);
   6017    if (1) test_shll2_4s_8h_16(TyH);
   6018    if (1) test_shll_2d_2s_32(TyS);
   6019    if (1) test_shll2_2d_4s_32(TyS);
   6020 
   6021    // shrn{2}      2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6022    // rshrn{2}     2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6023    if (1) test_shrn_2s_2d_1(TyD);
   6024    if (1) test_shrn_2s_2d_32(TyD);
   6025    if (1) test_shrn2_4s_2d_1(TyD);
   6026    if (1) test_shrn2_4s_2d_32(TyD);
   6027    if (1) test_shrn_4h_4s_1(TyS);
   6028    if (1) test_shrn_4h_4s_16(TyS);
   6029    if (1) test_shrn2_8h_4s_1(TyS);
   6030    if (1) test_shrn2_8h_4s_16(TyS);
   6031    if (1) test_shrn_8b_8h_1(TyH);
   6032    if (1) test_shrn_8b_8h_8(TyH);
   6033    if (1) test_shrn2_16b_8h_1(TyH);
   6034    if (1) test_shrn2_16b_8h_8(TyH);
   6035    if (1) test_rshrn_2s_2d_1(TyD);
   6036    if (1) test_rshrn_2s_2d_32(TyD);
   6037    if (1) test_rshrn2_4s_2d_1(TyD);
   6038    if (1) test_rshrn2_4s_2d_32(TyD);
   6039    if (1) test_rshrn_4h_4s_1(TyS);
   6040    if (1) test_rshrn_4h_4s_16(TyS);
   6041    if (1) test_rshrn2_8h_4s_1(TyS);
   6042    if (1) test_rshrn2_8h_4s_16(TyS);
   6043    if (1) test_rshrn_8b_8h_1(TyH);
   6044    if (1) test_rshrn_8b_8h_8(TyH);
   6045    if (1) test_rshrn2_16b_8h_1(TyH);
   6046    if (1) test_rshrn2_16b_8h_8(TyH);
   6047 
   6048    // sli          d_#imm
   6049    // sri          d_#imm
   6050    if (1) test_sli_d_d_0(TyD);
   6051    if (1) test_sli_d_d_32(TyD);
   6052    if (1) test_sli_d_d_63(TyD);
   6053    if (1) test_sri_d_d_1(TyD);
   6054    if (1) test_sri_d_d_33(TyD);
   6055    if (1) test_sri_d_d_64(TyD);
   6056 
   6057    // sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   6058    // sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   6059    if (1) test_sli_2d_2d_0(TyD);
   6060    if (1) test_sli_2d_2d_32(TyD);
   6061    if (1) test_sli_2d_2d_63(TyD);
   6062    if (1) test_sli_4s_4s_0(TyS);
   6063    if (1) test_sli_4s_4s_16(TyS);
   6064    if (1) test_sli_4s_4s_31(TyS);
   6065    if (1) test_sli_2s_2s_0(TyS);
   6066    if (1) test_sli_2s_2s_16(TyS);
   6067    if (1) test_sli_2s_2s_31(TyS);
   6068    if (1) test_sli_8h_8h_0(TyH);
   6069    if (1) test_sli_8h_8h_8(TyH);
   6070    if (1) test_sli_8h_8h_15(TyH);
   6071    if (1) test_sli_4h_4h_0(TyH);
   6072    if (1) test_sli_4h_4h_8(TyH);
   6073    if (1) test_sli_4h_4h_15(TyH);
   6074    if (1) test_sli_16b_16b_0(TyB);
   6075    if (1) test_sli_16b_16b_3(TyB);
   6076    if (1) test_sli_16b_16b_7(TyB);
   6077    if (1) test_sli_8b_8b_0(TyB);
   6078    if (1) test_sli_8b_8b_3(TyB);
   6079    if (1) test_sli_8b_8b_7(TyB);
   6080    if (1) test_sri_2d_2d_1(TyD);
   6081    if (1) test_sri_2d_2d_33(TyD);
   6082    if (1) test_sri_2d_2d_64(TyD);
   6083    if (1) test_sri_4s_4s_1(TyS);
   6084    if (1) test_sri_4s_4s_17(TyS);
   6085    if (1) test_sri_4s_4s_32(TyS);
   6086    if (1) test_sri_2s_2s_1(TyS);
   6087    if (1) test_sri_2s_2s_17(TyS);
   6088    if (1) test_sri_2s_2s_32(TyS);
   6089    if (1) test_sri_8h_8h_1(TyH);
   6090    if (1) test_sri_8h_8h_8(TyH);
   6091    if (1) test_sri_8h_8h_16(TyH);
   6092    if (1) test_sri_4h_4h_1(TyH);
   6093    if (1) test_sri_4h_4h_8(TyH);
   6094    if (1) test_sri_4h_4h_16(TyH);
   6095    if (1) test_sri_16b_16b_1(TyB);
   6096    if (1) test_sri_16b_16b_4(TyB);
   6097    if (1) test_sri_16b_16b_8(TyB);
   6098    if (1) test_sri_8b_8b_1(TyB);
   6099    if (1) test_sri_8b_8b_4(TyB);
   6100    if (1) test_sri_8b_8b_8(TyB);
   6101 
   6102    // smax         4s,2s,8h,4h,16b,8b
   6103    // umax         4s,2s,8h,4h,16b,8b
   6104    // smin         4s,2s,8h,4h,16b,8b
   6105    // umin         4s,2s,8h,4h,16b,8b
   6106    if (1) test_smax_4s_4s_4s(TyS);
   6107    if (1) test_smax_2s_2s_2s(TyS);
   6108    if (1) test_smax_8h_8h_8h(TyH);
   6109    if (1) test_smax_4h_4h_4h(TyH);
   6110    if (1) test_smax_16b_16b_16b(TyB);
   6111    if (1) test_smax_8b_8b_8b(TyB);
   6112    if (1) test_umax_4s_4s_4s(TyS);
   6113    if (1) test_umax_2s_2s_2s(TyS);
   6114    if (1) test_umax_8h_8h_8h(TyH);
   6115    if (1) test_umax_4h_4h_4h(TyH);
   6116    if (1) test_umax_16b_16b_16b(TyB);
   6117    if (1) test_umax_8b_8b_8b(TyB);
   6118    if (1) test_smin_4s_4s_4s(TyS);
   6119    if (1) test_smin_2s_2s_2s(TyS);
   6120    if (1) test_smin_8h_8h_8h(TyH);
   6121    if (1) test_smin_4h_4h_4h(TyH);
   6122    if (1) test_smin_16b_16b_16b(TyB);
   6123    if (1) test_smin_8b_8b_8b(TyB);
   6124    if (1) test_umin_4s_4s_4s(TyS);
   6125    if (1) test_umin_2s_2s_2s(TyS);
   6126    if (1) test_umin_8h_8h_8h(TyH);
   6127    if (1) test_umin_4h_4h_4h(TyH);
   6128    if (1) test_umin_16b_16b_16b(TyB);
   6129    if (1) test_umin_8b_8b_8b(TyB);
   6130 
   6131    // smaxp        4s,2s,8h,4h,16b,8b
   6132    // umaxp        4s,2s,8h,4h,16b,8b
   6133    // sminp        4s,2s,8h,4h,16b,8b
   6134    // uminp        4s,2s,8h,4h,16b,8b
   6135    if (1) test_smaxp_4s_4s_4s(TyS);
   6136    if (1) test_smaxp_2s_2s_2s(TyS);
   6137    if (1) test_smaxp_8h_8h_8h(TyH);
   6138    if (1) test_smaxp_4h_4h_4h(TyH);
   6139    if (1) test_smaxp_16b_16b_16b(TyB);
   6140    if (1) test_smaxp_8b_8b_8b(TyB);
   6141    if (1) test_umaxp_4s_4s_4s(TyS);
   6142    if (1) test_umaxp_2s_2s_2s(TyS);
   6143    if (1) test_umaxp_8h_8h_8h(TyH);
   6144    if (1) test_umaxp_4h_4h_4h(TyH);
   6145    if (1) test_umaxp_16b_16b_16b(TyB);
   6146    if (1) test_umaxp_8b_8b_8b(TyB);
   6147    if (1) test_sminp_4s_4s_4s(TyS);
   6148    if (1) test_sminp_2s_2s_2s(TyS);
   6149    if (1) test_sminp_8h_8h_8h(TyH);
   6150    if (1) test_sminp_4h_4h_4h(TyH);
   6151    if (1) test_sminp_16b_16b_16b(TyB);
   6152    if (1) test_sminp_8b_8b_8b(TyB);
   6153    if (1) test_uminp_4s_4s_4s(TyS);
   6154    if (1) test_uminp_2s_2s_2s(TyS);
   6155    if (1) test_uminp_8h_8h_8h(TyH);
   6156    if (1) test_uminp_4h_4h_4h(TyH);
   6157    if (1) test_uminp_16b_16b_16b(TyB);
   6158    if (1) test_uminp_8b_8b_8b(TyB);
   6159 
   6160    // smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6161    // umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6162    // sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6163    // uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6164    if (1) test_SMAXV();
   6165    if (1) test_UMAXV();
   6166    if (1) test_SMINV();
   6167    if (1) test_UMINV();
   6168 
   6169    // smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6170    // umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6171    // smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6172    // umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6173    // smull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6174    // umull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6175    if (1) test_smlal_2d_2s_s0(TyS);
   6176    if (1) test_smlal_2d_2s_s3(TyS);
   6177    if (1) test_smlal2_2d_4s_s1(TyS);
   6178    if (1) test_smlal2_2d_4s_s2(TyS);
   6179    if (1) test_smlal_4s_4h_h0(TyH);
   6180    if (1) test_smlal_4s_4h_h7(TyH);
   6181    if (1) test_smlal2_4s_8h_h1(TyH);
   6182    if (1) test_smlal2_4s_8h_h4(TyH);
   6183    if (1) test_umlal_2d_2s_s0(TyS);
   6184    if (1) test_umlal_2d_2s_s3(TyS);
   6185    if (1) test_umlal2_2d_4s_s1(TyS);
   6186    if (1) test_umlal2_2d_4s_s2(TyS);
   6187    if (1) test_umlal_4s_4h_h0(TyH);
   6188    if (1) test_umlal_4s_4h_h7(TyH);
   6189    if (1) test_umlal2_4s_8h_h1(TyH);
   6190    if (1) test_umlal2_4s_8h_h4(TyH);
   6191    if (1) test_smlsl_2d_2s_s0(TyS);
   6192    if (1) test_smlsl_2d_2s_s3(TyS);
   6193    if (1) test_smlsl2_2d_4s_s1(TyS);
   6194    if (1) test_smlsl2_2d_4s_s2(TyS);
   6195    if (1) test_smlsl_4s_4h_h0(TyH);
   6196    if (1) test_smlsl_4s_4h_h7(TyH);
   6197    if (1) test_smlsl2_4s_8h_h1(TyH);
   6198    if (1) test_smlsl2_4s_8h_h4(TyH);
   6199    if (1) test_umlsl_2d_2s_s0(TyS);
   6200    if (1) test_umlsl_2d_2s_s3(TyS);
   6201    if (1) test_umlsl2_2d_4s_s1(TyS);
   6202    if (1) test_umlsl2_2d_4s_s2(TyS);
   6203    if (1) test_umlsl_4s_4h_h0(TyH);
   6204    if (1) test_umlsl_4s_4h_h7(TyH);
   6205    if (1) test_umlsl2_4s_8h_h1(TyH);
   6206    if (1) test_umlsl2_4s_8h_h4(TyH);
   6207    if (1) test_smull_2d_2s_s0(TyS);
   6208    if (1) test_smull_2d_2s_s3(TyS);
   6209    if (1) test_smull2_2d_4s_s1(TyS);
   6210    if (1) test_smull2_2d_4s_s2(TyS);
   6211    if (1) test_smull_4s_4h_h0(TyH);
   6212    if (1) test_smull_4s_4h_h7(TyH);
   6213    if (1) test_smull2_4s_8h_h1(TyH);
   6214    if (1) test_smull2_4s_8h_h4(TyH);
   6215    if (1) test_umull_2d_2s_s0(TyS);
   6216    if (1) test_umull_2d_2s_s3(TyS);
   6217    if (1) test_umull2_2d_4s_s1(TyS);
   6218    if (1) test_umull2_2d_4s_s2(TyS);
   6219    if (1) test_umull_4s_4h_h0(TyH);
   6220    if (1) test_umull_4s_4h_h7(TyH);
   6221    if (1) test_umull2_4s_8h_h1(TyH);
   6222    if (1) test_umull2_4s_8h_h4(TyH);
   6223 
   6224    // smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6225    // umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6226    // smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6227    // umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6228    // smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6229    // umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6230    if (1) test_smlal_2d_2s_2s(TyS);
   6231    if (1) test_smlal2_2d_4s_4s(TyS);
   6232    if (1) test_smlal_4s_4h_4h(TyH);
   6233    if (1) test_smlal2_4s_8h_8h(TyH);
   6234    if (1) test_smlal_8h_8b_8b(TyB);
   6235    if (1) test_smlal2_8h_16b_16b(TyB);
   6236    if (1) test_umlal_2d_2s_2s(TyS);
   6237    if (1) test_umlal2_2d_4s_4s(TyS);
   6238    if (1) test_umlal_4s_4h_4h(TyH);
   6239    if (1) test_umlal2_4s_8h_8h(TyH);
   6240    if (1) test_umlal_8h_8b_8b(TyB);
   6241    if (1) test_umlal2_8h_16b_16b(TyB);
   6242    if (1) test_smlsl_2d_2s_2s(TyS);
   6243    if (1) test_smlsl2_2d_4s_4s(TyS);
   6244    if (1) test_smlsl_4s_4h_4h(TyH);
   6245    if (1) test_smlsl2_4s_8h_8h(TyH);
   6246    if (1) test_smlsl_8h_8b_8b(TyB);
   6247    if (1) test_smlsl2_8h_16b_16b(TyB);
   6248    if (1) test_umlsl_2d_2s_2s(TyS);
   6249    if (1) test_umlsl2_2d_4s_4s(TyS);
   6250    if (1) test_umlsl_4s_4h_4h(TyH);
   6251    if (1) test_umlsl2_4s_8h_8h(TyH);
   6252    if (1) test_umlsl_8h_8b_8b(TyB);
   6253    if (1) test_umlsl2_8h_16b_16b(TyB);
   6254    if (1) test_smull_2d_2s_2s(TyS);
   6255    if (1) test_smull2_2d_4s_4s(TyS);
   6256    if (1) test_smull_4s_4h_4h(TyH);
   6257    if (1) test_smull2_4s_8h_8h(TyH);
   6258    if (1) test_smull_8h_8b_8b(TyB);
   6259    if (1) test_smull2_8h_16b_16b(TyB);
   6260    if (1) test_umull_2d_2s_2s(TyS);
   6261    if (1) test_umull2_2d_4s_4s(TyS);
   6262    if (1) test_umull_4s_4h_4h(TyH);
   6263    if (1) test_umull2_4s_8h_8h(TyH);
   6264    if (1) test_umull_8h_8b_8b(TyB);
   6265    if (1) test_umull2_8h_16b_16b(TyB);
   6266 
   6267    // smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   6268    // umov         w_b[], w_h[],               w_s[], x_d[]
   6269    if (1) test_umov_x_d0(TyD);
   6270    if (1) test_umov_x_d1(TyD);
   6271    if (1) test_umov_w_s0(TyS);
   6272    if (1) test_umov_w_s3(TyS);
   6273    if (1) test_umov_w_h0(TyH);
   6274    if (1) test_umov_w_h7(TyH);
   6275    if (1) test_umov_w_b0(TyB);
   6276    if (1) test_umov_w_b15(TyB);
   6277    if (1) test_smov_x_s0(TyS);
   6278    if (1) test_smov_x_s3(TyS);
   6279    if (1) test_smov_x_h0(TyH);
   6280    if (1) test_smov_x_h7(TyH);
   6281    if (1) test_smov_w_h0(TyH);
   6282    if (1) test_smov_w_h7(TyH);
   6283    if (1) test_smov_x_b0(TyB);
   6284    if (1) test_smov_x_b15(TyB);
   6285    if (1) test_smov_w_b0(TyB);
   6286    if (1) test_smov_w_b15(TyB);
   6287 
   6288    // sqabs        d,s,h,b
   6289    // sqneg        d,s,h,b
   6290    if (1) test_sqabs_d_d(TyD);
   6291    if (1) test_sqabs_s_s(TyS);
   6292    if (1) test_sqabs_h_h(TyH);
   6293    if (1) test_sqabs_b_b(TyB);
   6294    if (1) test_sqneg_d_d(TyD);
   6295    if (1) test_sqneg_s_s(TyS);
   6296    if (1) test_sqneg_h_h(TyH);
   6297    if (1) test_sqneg_b_b(TyB);
   6298 
   6299    // sqabs        2d,4s,2s,8h,4h,16b,8b
   6300    // sqneg        2d,4s,2s,8h,4h,16b,8b
   6301    if (1) test_sqabs_2d_2d(TyD);
   6302    if (1) test_sqabs_4s_4s(TyS);
   6303    if (1) test_sqabs_2s_2s(TyS);
   6304    if (1) test_sqabs_8h_8h(TyH);
   6305    if (1) test_sqabs_4h_4h(TyH);
   6306    if (1) test_sqabs_16b_16b(TyB);
   6307    if (1) test_sqabs_8b_8b(TyB);
   6308    if (1) test_sqneg_2d_2d(TyD);
   6309    if (1) test_sqneg_4s_4s(TyS);
   6310    if (1) test_sqneg_2s_2s(TyS);
   6311    if (1) test_sqneg_8h_8h(TyH);
   6312    if (1) test_sqneg_4h_4h(TyH);
   6313    if (1) test_sqneg_16b_16b(TyB);
   6314    if (1) test_sqneg_8b_8b(TyB);
   6315 
   6316    // sqadd        d,s,h,b
   6317    // uqadd        d,s,h,b
   6318    // sqsub        d,s,h,b
   6319    // uqsub        d,s,h,b
   6320    if (1) test_sqadd_d_d_d(TyD);
   6321    if (1) test_sqadd_s_s_s(TyS);
   6322    if (1) test_sqadd_h_h_h(TyH);
   6323    if (1) test_sqadd_b_b_b(TyB);
   6324    if (1) test_uqadd_d_d_d(TyD);
   6325    if (1) test_uqadd_s_s_s(TyS);
   6326    if (1) test_uqadd_h_h_h(TyH);
   6327    if (1) test_uqadd_b_b_b(TyB);
   6328    if (1) test_sqsub_d_d_d(TyD);
   6329    if (1) test_sqsub_s_s_s(TyS);
   6330    if (1) test_sqsub_h_h_h(TyH);
   6331    if (1) test_sqsub_b_b_b(TyB);
   6332    if (1) test_uqsub_d_d_d(TyD);
   6333    if (1) test_uqsub_s_s_s(TyS);
   6334    if (1) test_uqsub_h_h_h(TyH);
   6335    if (1) test_uqsub_b_b_b(TyB);
   6336 
   6337    // sqadd        2d,4s,2s,8h,4h,16b,8b
   6338    // uqadd        2d,4s,2s,8h,4h,16b,8b
   6339    // sqsub        2d,4s,2s,8h,4h,16b,8b
   6340    // uqsub        2d,4s,2s,8h,4h,16b,8b
   6341    if (1) test_sqadd_2d_2d_2d(TyD);
   6342    if (1) test_sqadd_4s_4s_4s(TyS);
   6343    if (1) test_sqadd_2s_2s_2s(TyS);
   6344    if (1) test_sqadd_8h_8h_8h(TyH);
   6345    if (1) test_sqadd_4h_4h_4h(TyH);
   6346    if (1) test_sqadd_16b_16b_16b(TyB);
   6347    if (1) test_sqadd_8b_8b_8b(TyB);
   6348    if (1) test_uqadd_2d_2d_2d(TyD);
   6349    if (1) test_uqadd_4s_4s_4s(TyS);
   6350    if (1) test_uqadd_2s_2s_2s(TyS);
   6351    if (1) test_uqadd_8h_8h_8h(TyH);
   6352    if (1) test_uqadd_4h_4h_4h(TyH);
   6353    if (1) test_uqadd_16b_16b_16b(TyB);
   6354    if (1) test_uqadd_8b_8b_8b(TyB);
   6355    if (1) test_sqsub_2d_2d_2d(TyD);
   6356    if (1) test_sqsub_4s_4s_4s(TyS);
   6357    if (1) test_sqsub_2s_2s_2s(TyS);
   6358    if (1) test_sqsub_8h_8h_8h(TyH);
   6359    if (1) test_sqsub_4h_4h_4h(TyH);
   6360    if (1) test_sqsub_16b_16b_16b(TyB);
   6361    if (1) test_sqsub_8b_8b_8b(TyB);
   6362    if (1) test_uqsub_2d_2d_2d(TyD);
   6363    if (1) test_uqsub_4s_4s_4s(TyS);
   6364    if (1) test_uqsub_2s_2s_2s(TyS);
   6365    if (1) test_uqsub_8h_8h_8h(TyH);
   6366    if (1) test_uqsub_4h_4h_4h(TyH);
   6367    if (1) test_uqsub_16b_16b_16b(TyB);
   6368    if (1) test_uqsub_8b_8b_8b(TyB);
   6369 
   6370    // sqdmlal      d_s_s[], s_h_h[]
   6371    // sqdmlsl      d_s_s[], s_h_h[]
   6372    // sqdmull      d_s_s[], s_h_h[]
   6373    if (1) test_sqdmlal_d_s_s0(TyS);
   6374    if (1) test_sqdmlal_d_s_s3(TyS);
   6375    if (1) test_sqdmlal_s_h_h1(TyH);
   6376    if (1) test_sqdmlal_s_h_h5(TyH);
   6377    if (1) test_sqdmlsl_d_s_s0(TyS);
   6378    if (1) test_sqdmlsl_d_s_s3(TyS);
   6379    if (1) test_sqdmlsl_s_h_h1(TyH);
   6380    if (1) test_sqdmlsl_s_h_h5(TyH);
   6381    if (1) test_sqdmull_d_s_s0(TyS);
   6382    if (1) test_sqdmull_d_s_s3(TyS);
   6383    if (1) test_sqdmull_s_h_h1(TyH);
   6384    if (1) test_sqdmull_s_h_h5(TyH);
   6385 
   6386    // sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6387    // sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6388    // sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   6389    if (1) test_sqdmlal_2d_2s_s0(TyS);
   6390    if (1) test_sqdmlal_2d_2s_s3(TyS);
   6391    if (1) test_sqdmlal2_2d_4s_s1(TyS);
   6392    if (1) test_sqdmlal2_2d_4s_s2(TyS);
   6393    if (1) test_sqdmlal_4s_4h_h0(TyH);
   6394    if (1) test_sqdmlal_4s_4h_h7(TyH);
   6395    if (1) test_sqdmlal2_4s_8h_h1(TyH);
   6396    if (1) test_sqdmlal2_4s_8h_h4(TyH);
   6397    if (1) test_sqdmlsl_2d_2s_s0(TyS);
   6398    if (1) test_sqdmlsl_2d_2s_s3(TyS);
   6399    if (1) test_sqdmlsl2_2d_4s_s1(TyS);
   6400    if (1) test_sqdmlsl2_2d_4s_s2(TyS);
   6401    if (1) test_sqdmlsl_4s_4h_h0(TyH);
   6402    if (1) test_sqdmlsl_4s_4h_h7(TyH);
   6403    if (1) test_sqdmlsl2_4s_8h_h1(TyH);
   6404    if (1) test_sqdmlsl2_4s_8h_h4(TyH);
   6405    if (1) test_sqdmull_2d_2s_s0(TyS);
   6406    if (1) test_sqdmull_2d_2s_s3(TyS);
   6407    if (1) test_sqdmull2_2d_4s_s1(TyS);
   6408    if (1) test_sqdmull2_2d_4s_s2(TyS);
   6409    if (1) test_sqdmull_4s_4h_h0(TyH);
   6410    if (1) test_sqdmull_4s_4h_h7(TyH);
   6411    if (1) test_sqdmull2_4s_8h_h1(TyH);
   6412    if (1) test_sqdmull2_4s_8h_h4(TyH);
   6413 
   6414    // sqdmlal      d_s_s, s_h_h
   6415    // sqdmlsl      d_s_s, s_h_h
   6416    // sqdmull      d_s_s, s_h_h
   6417    if (1) test_sqdmlal_d_s_s(TyS);
   6418    if (1) test_sqdmlal_s_h_h(TyH);
   6419    if (1) test_sqdmlsl_d_s_s(TyS);
   6420    if (1) test_sqdmlsl_s_h_h(TyH);
   6421    if (1) test_sqdmull_d_s_s(TyS);
   6422    if (1) test_sqdmull_s_h_h(TyH);
   6423 
   6424    // sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6425    // sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6426    // sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6427    if (1) test_sqdmlal_2d_2s_2s(TyS);
   6428    if (1) test_sqdmlal2_2d_4s_4s(TyS);
   6429    if (1) test_sqdmlal_4s_4h_4h(TyH);
   6430    if (1) test_sqdmlal2_4s_8h_8h(TyH);
   6431    if (1) test_sqdmlsl_2d_2s_2s(TyS);
   6432    if (1) test_sqdmlsl2_2d_4s_4s(TyS);
   6433    if (1) test_sqdmlsl_4s_4h_4h(TyH);
   6434    if (1) test_sqdmlsl2_4s_8h_8h(TyH);
   6435    if (1) test_sqdmull_2d_2s_2s(TyS);
   6436    if (1) test_sqdmull2_2d_4s_4s(TyS);
   6437    if (1) test_sqdmull_4s_4h_4h(TyH);
   6438    if (1) test_sqdmull2_4s_8h_8h(TyH);
   6439 
   6440    // sqdmulh      s_s_s[], h_h_h[]
   6441    // sqrdmulh     s_s_s[], h_h_h[]
   6442    if (1) test_sqdmulh_s_s_s1(TyS);
   6443    if (1) test_sqdmulh_s_s_s3(TyS);
   6444    if (1) test_sqdmulh_h_h_h2(TyH);
   6445    if (1) test_sqdmulh_h_h_h7(TyH);
   6446    if (1) test_sqrdmulh_s_s_s1(TyS);
   6447    if (1) test_sqrdmulh_s_s_s3(TyS);
   6448    if (1) test_sqrdmulh_h_h_h2(TyH);
   6449    if (1) test_sqrdmulh_h_h_h7(TyH);
   6450 
   6451    // sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6452    // sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6453    if (1) test_sqdmulh_4s_4s_s1(TyS);
   6454    if (1) test_sqdmulh_4s_4s_s3(TyS);
   6455    if (1) test_sqdmulh_2s_2s_s1(TyS);
   6456    if (1) test_sqdmulh_2s_2s_s3(TyS);
   6457    if (1) test_sqdmulh_8h_8h_h2(TyH);
   6458    if (1) test_sqdmulh_8h_8h_h7(TyH);
   6459    if (1) test_sqdmulh_4h_4h_h2(TyH);
   6460    if (1) test_sqdmulh_4h_4h_h7(TyH);
   6461    if (1) test_sqrdmulh_4s_4s_s1(TyS);
   6462    if (1) test_sqrdmulh_4s_4s_s3(TyS);
   6463    if (1) test_sqrdmulh_2s_2s_s1(TyS);
   6464    if (1) test_sqrdmulh_2s_2s_s3(TyS);
   6465    if (1) test_sqrdmulh_8h_8h_h2(TyH);
   6466    if (1) test_sqrdmulh_8h_8h_h7(TyH);
   6467    if (1) test_sqrdmulh_4h_4h_h2(TyH);
   6468    if (1) test_sqrdmulh_4h_4h_h7(TyH);
   6469 
   6470    // sqdmulh      h,s
   6471    // sqrdmulh     h,s
   6472    if (1) test_sqdmulh_s_s_s(TyS);
   6473    if (1) test_sqdmulh_h_h_h(TyH);
   6474    if (1) test_sqrdmulh_s_s_s(TyS);
   6475    if (1) test_sqrdmulh_h_h_h(TyH);
   6476 
   6477    // sqdmulh      4s,2s,8h,4h
   6478    // sqrdmulh     4s,2s,8h,4h
   6479    if (1) test_sqdmulh_4s_4s_4s(TyS);
   6480    if (1) test_sqdmulh_2s_2s_2s(TyS);
   6481    if (1) test_sqdmulh_8h_8h_8h(TyH);
   6482    if (1) test_sqdmulh_4h_4h_4h(TyH);
   6483    if (1) test_sqrdmulh_4s_4s_4s(TyS);
   6484    if (1) test_sqrdmulh_2s_2s_2s(TyS);
   6485    if (1) test_sqrdmulh_8h_8h_8h(TyH);
   6486    if (1) test_sqrdmulh_4h_4h_4h(TyH);
   6487 
   6488    // sqshl (reg)  d,s,h,b
   6489    // uqshl (reg)  d,s,h,b
   6490    // sqrshl (reg) d,s,h,b
   6491    // uqrshl (reg) d,s,h,b
   6492    if (1) test_sqshl_d_d_d(TyD);
   6493    if (1) test_sqshl_s_s_s(TyS);
   6494    if (1) test_sqshl_h_h_h(TyH);
   6495    if (1) test_sqshl_b_b_b(TyB);
   6496    if (1) test_uqshl_d_d_d(TyD);
   6497    if (1) test_uqshl_s_s_s(TyS);
   6498    if (1) test_uqshl_h_h_h(TyH);
   6499    if (1) test_uqshl_b_b_b(TyB);
   6500    if (1) test_sqrshl_d_d_d(TyD);
   6501    if (1) test_sqrshl_s_s_s(TyS);
   6502    if (1) test_sqrshl_h_h_h(TyH);
   6503    if (1) test_sqrshl_b_b_b(TyB);
   6504    if (1) test_uqrshl_d_d_d(TyD);
   6505    if (1) test_uqrshl_s_s_s(TyS);
   6506    if (1) test_uqrshl_h_h_h(TyH);
   6507    if (1) test_uqrshl_b_b_b(TyB);
   6508 
   6509    // sqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6510    // uqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6511    // sqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6512    // uqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6513    if (1) test_sqshl_2d_2d_2d(TyD);
   6514    if (1) test_sqshl_4s_4s_4s(TyS);
   6515    if (1) test_sqshl_2s_2s_2s(TyS);
   6516    if (1) test_sqshl_8h_8h_8h(TyH);
   6517    if (1) test_sqshl_4h_4h_4h(TyH);
   6518    if (1) test_sqshl_16b_16b_16b(TyB);
   6519    if (1) test_sqshl_8b_8b_8b(TyB);
   6520    if (1) test_uqshl_2d_2d_2d(TyD);
   6521    if (1) test_uqshl_4s_4s_4s(TyS);
   6522    if (1) test_uqshl_2s_2s_2s(TyS);
   6523    if (1) test_uqshl_8h_8h_8h(TyH);
   6524    if (1) test_uqshl_4h_4h_4h(TyH);
   6525    if (1) test_uqshl_16b_16b_16b(TyB);
   6526    if (1) test_uqshl_8b_8b_8b(TyB);
   6527    if (1) test_sqrshl_2d_2d_2d(TyD);
   6528    if (1) test_sqrshl_4s_4s_4s(TyS);
   6529    if (1) test_sqrshl_2s_2s_2s(TyS);
   6530    if (1) test_sqrshl_8h_8h_8h(TyH);
   6531    if (1) test_sqrshl_4h_4h_4h(TyH);
   6532    if (1) test_sqrshl_16b_16b_16b(TyB);
   6533    if (1) test_sqrshl_8b_8b_8b(TyB);
   6534    if (1) test_uqrshl_2d_2d_2d(TyD);
   6535    if (1) test_uqrshl_4s_4s_4s(TyS);
   6536    if (1) test_uqrshl_2s_2s_2s(TyS);
   6537    if (1) test_uqrshl_8h_8h_8h(TyH);
   6538    if (1) test_uqrshl_4h_4h_4h(TyH);
   6539    if (1) test_uqrshl_16b_16b_16b(TyB);
   6540    if (1) test_uqrshl_8b_8b_8b(TyB);
   6541 
   6542    // sqrshrn      s_d, h_s, b_h   #imm
   6543    // uqrshrn      s_d, h_s, b_h   #imm
   6544    // sqshrn       s_d, h_s, b_h   #imm
   6545    // uqshrn       s_d, h_s, b_h   #imm
   6546    // sqrshrun     s_d, h_s, b_h   #imm
   6547    // sqshrun      s_d, h_s, b_h   #imm
   6548    if (1) test_sqrshrn_s_d_1(TyD);
   6549    if (1) test_sqrshrn_s_d_17(TyD);
   6550    if (1) test_sqrshrn_s_d_32(TyD);
   6551    if (1) test_sqrshrn_h_s_1(TyS);
   6552    if (1) test_sqrshrn_h_s_9(TyS);
   6553    if (1) test_sqrshrn_h_s_16(TyS);
   6554    if (1) test_sqrshrn_b_h_1(TyH);
   6555    if (1) test_sqrshrn_b_h_4(TyH);
   6556    if (1) test_sqrshrn_b_h_8(TyH);
   6557    if (1) test_uqrshrn_s_d_1(TyD);
   6558    if (1) test_uqrshrn_s_d_17(TyD);
   6559    if (1) test_uqrshrn_s_d_32(TyD);
   6560    if (1) test_uqrshrn_h_s_1(TyS);
   6561    if (1) test_uqrshrn_h_s_9(TyS);
   6562    if (1) test_uqrshrn_h_s_16(TyS);
   6563    if (1) test_uqrshrn_b_h_1(TyH);
   6564    if (1) test_uqrshrn_b_h_4(TyH);
   6565    if (1) test_uqrshrn_b_h_8(TyH);
   6566    if (1) test_sqshrn_s_d_1(TyD);
   6567    if (1) test_sqshrn_s_d_17(TyD);
   6568    if (1) test_sqshrn_s_d_32(TyD);
   6569    if (1) test_sqshrn_h_s_1(TyS);
   6570    if (1) test_sqshrn_h_s_9(TyS);
   6571    if (1) test_sqshrn_h_s_16(TyS);
   6572    if (1) test_sqshrn_b_h_1(TyH);
   6573    if (1) test_sqshrn_b_h_4(TyH);
   6574    if (1) test_sqshrn_b_h_8(TyH);
   6575    if (1) test_uqshrn_s_d_1(TyD);
   6576    if (1) test_uqshrn_s_d_17(TyD);
   6577    if (1) test_uqshrn_s_d_32(TyD);
   6578    if (1) test_uqshrn_h_s_1(TyS);
   6579    if (1) test_uqshrn_h_s_9(TyS);
   6580    if (1) test_uqshrn_h_s_16(TyS);
   6581    if (1) test_uqshrn_b_h_1(TyH);
   6582    if (1) test_uqshrn_b_h_4(TyH);
   6583    if (1) test_uqshrn_b_h_8(TyH);
   6584    if (1) test_sqrshrun_s_d_1(TyD);
   6585    if (1) test_sqrshrun_s_d_17(TyD);
   6586    if (1) test_sqrshrun_s_d_32(TyD);
   6587    if (1) test_sqrshrun_h_s_1(TyS);
   6588    if (1) test_sqrshrun_h_s_9(TyS);
   6589    if (1) test_sqrshrun_h_s_16(TyS);
   6590    if (1) test_sqrshrun_b_h_1(TyH);
   6591    if (1) test_sqrshrun_b_h_4(TyH);
   6592    if (1) test_sqrshrun_b_h_8(TyH);
   6593    if (1) test_sqshrun_s_d_1(TyD);
   6594    if (1) test_sqshrun_s_d_17(TyD);
   6595    if (1) test_sqshrun_s_d_32(TyD);
   6596    if (1) test_sqshrun_h_s_1(TyS);
   6597    if (1) test_sqshrun_h_s_9(TyS);
   6598    if (1) test_sqshrun_h_s_16(TyS);
   6599    if (1) test_sqshrun_b_h_1(TyH);
   6600    if (1) test_sqshrun_b_h_4(TyH);
   6601    if (1) test_sqshrun_b_h_8(TyH);
   6602 
   6603    // sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6604    // uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6605    // sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6606    // uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6607    // sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6608    // sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6609    if (1) test_sqrshrn_2s_2d_1(TyD);
   6610    if (1) test_sqrshrn_2s_2d_17(TyD);
   6611    if (1) test_sqrshrn_2s_2d_32(TyD);
   6612    if (1) test_sqrshrn2_4s_2d_1(TyD);
   6613    if (1) test_sqrshrn2_4s_2d_17(TyD);
   6614    if (1) test_sqrshrn2_4s_2d_32(TyD);
   6615    if (1) test_sqrshrn_4h_4s_1(TyS);
   6616    if (1) test_sqrshrn_4h_4s_9(TyS);
   6617    if (1) test_sqrshrn_4h_4s_16(TyS);
   6618    if (1) test_sqrshrn2_8h_4s_1(TyS);
   6619    if (1) test_sqrshrn2_8h_4s_9(TyS);
   6620    if (1) test_sqrshrn2_8h_4s_16(TyS);
   6621    if (1) test_sqrshrn_8b_8h_1(TyH);
   6622    if (1) test_sqrshrn_8b_8h_4(TyH);
   6623    if (1) test_sqrshrn_8b_8h_8(TyH);
   6624    if (1) test_sqrshrn2_16b_8h_1(TyH);
   6625    if (1) test_sqrshrn2_16b_8h_4(TyH);
   6626    if (1) test_sqrshrn2_16b_8h_8(TyH);
   6627    if (1) test_uqrshrn_2s_2d_1(TyD);
   6628    if (1) test_uqrshrn_2s_2d_17(TyD);
   6629    if (1) test_uqrshrn_2s_2d_32(TyD);
   6630    if (1) test_uqrshrn2_4s_2d_1(TyD);
   6631    if (1) test_uqrshrn2_4s_2d_17(TyD);
   6632    if (1) test_uqrshrn2_4s_2d_32(TyD);
   6633    if (1) test_uqrshrn_4h_4s_1(TyS);
   6634    if (1) test_uqrshrn_4h_4s_9(TyS);
   6635    if (1) test_uqrshrn_4h_4s_16(TyS);
   6636    if (1) test_uqrshrn2_8h_4s_1(TyS);
   6637    if (1) test_uqrshrn2_8h_4s_9(TyS);
   6638    if (1) test_uqrshrn2_8h_4s_16(TyS);
   6639    if (1) test_uqrshrn_8b_8h_1(TyH);
   6640    if (1) test_uqrshrn_8b_8h_4(TyH);
   6641    if (1) test_uqrshrn_8b_8h_8(TyH);
   6642    if (1) test_uqrshrn2_16b_8h_1(TyH);
   6643    if (1) test_uqrshrn2_16b_8h_4(TyH);
   6644    if (1) test_uqrshrn2_16b_8h_8(TyH);
   6645    if (1) test_sqshrn_2s_2d_1(TyD);
   6646    if (1) test_sqshrn_2s_2d_17(TyD);
   6647    if (1) test_sqshrn_2s_2d_32(TyD);
   6648    if (1) test_sqshrn2_4s_2d_1(TyD);
   6649    if (1) test_sqshrn2_4s_2d_17(TyD);
   6650    if (1) test_sqshrn2_4s_2d_32(TyD);
   6651    if (1) test_sqshrn_4h_4s_1(TyS);
   6652    if (1) test_sqshrn_4h_4s_9(TyS);
   6653    if (1) test_sqshrn_4h_4s_16(TyS);
   6654    if (1) test_sqshrn2_8h_4s_1(TyS);
   6655    if (1) test_sqshrn2_8h_4s_9(TyS);
   6656    if (1) test_sqshrn2_8h_4s_16(TyS);
   6657    if (1) test_sqshrn_8b_8h_1(TyH);
   6658    if (1) test_sqshrn_8b_8h_4(TyH);
   6659    if (1) test_sqshrn_8b_8h_8(TyH);
   6660    if (1) test_sqshrn2_16b_8h_1(TyH);
   6661    if (1) test_sqshrn2_16b_8h_4(TyH);
   6662    if (1) test_sqshrn2_16b_8h_8(TyH);
   6663    if (1) test_uqshrn_2s_2d_1(TyD);
   6664    if (1) test_uqshrn_2s_2d_17(TyD);
   6665    if (1) test_uqshrn_2s_2d_32(TyD);
   6666    if (1) test_uqshrn2_4s_2d_1(TyD);
   6667    if (1) test_uqshrn2_4s_2d_17(TyD);
   6668    if (1) test_uqshrn2_4s_2d_32(TyD);
   6669    if (1) test_uqshrn_4h_4s_1(TyS);
   6670    if (1) test_uqshrn_4h_4s_9(TyS);
   6671    if (1) test_uqshrn_4h_4s_16(TyS);
   6672    if (1) test_uqshrn2_8h_4s_1(TyS);
   6673    if (1) test_uqshrn2_8h_4s_9(TyS);
   6674    if (1) test_uqshrn2_8h_4s_16(TyS);
   6675    if (1) test_uqshrn_8b_8h_1(TyH);
   6676    if (1) test_uqshrn_8b_8h_4(TyH);
   6677    if (1) test_uqshrn_8b_8h_8(TyH);
   6678    if (1) test_uqshrn2_16b_8h_1(TyH);
   6679    if (1) test_uqshrn2_16b_8h_4(TyH);
   6680    if (1) test_uqshrn2_16b_8h_8(TyH);
   6681    if (1) test_sqrshrun_2s_2d_1(TyD);
   6682    if (1) test_sqrshrun_2s_2d_17(TyD);
   6683    if (1) test_sqrshrun_2s_2d_32(TyD);
   6684    if (1) test_sqrshrun2_4s_2d_1(TyD);
   6685    if (1) test_sqrshrun2_4s_2d_17(TyD);
   6686    if (1) test_sqrshrun2_4s_2d_32(TyD);
   6687    if (1) test_sqrshrun_4h_4s_1(TyS);
   6688    if (1) test_sqrshrun_4h_4s_9(TyS);
   6689    if (1) test_sqrshrun_4h_4s_16(TyS);
   6690    if (1) test_sqrshrun2_8h_4s_1(TyS);
   6691    if (1) test_sqrshrun2_8h_4s_9(TyS);
   6692    if (1) test_sqrshrun2_8h_4s_16(TyS);
   6693    if (1) test_sqrshrun_8b_8h_1(TyH);
   6694    if (1) test_sqrshrun_8b_8h_4(TyH);
   6695    if (1) test_sqrshrun_8b_8h_8(TyH);
   6696    if (1) test_sqrshrun2_16b_8h_1(TyH);
   6697    if (1) test_sqrshrun2_16b_8h_4(TyH);
   6698    if (1) test_sqrshrun2_16b_8h_8(TyH);
   6699    if (1) test_sqshrun_2s_2d_1(TyD);
   6700    if (1) test_sqshrun_2s_2d_17(TyD);
   6701    if (1) test_sqshrun_2s_2d_32(TyD);
   6702    if (1) test_sqshrun2_4s_2d_1(TyD);
   6703    if (1) test_sqshrun2_4s_2d_17(TyD);
   6704    if (1) test_sqshrun2_4s_2d_32(TyD);
   6705    if (1) test_sqshrun_4h_4s_1(TyS);
   6706    if (1) test_sqshrun_4h_4s_9(TyS);
   6707    if (1) test_sqshrun_4h_4s_16(TyS);
   6708    if (1) test_sqshrun2_8h_4s_1(TyS);
   6709    if (1) test_sqshrun2_8h_4s_9(TyS);
   6710    if (1) test_sqshrun2_8h_4s_16(TyS);
   6711    if (1) test_sqshrun_8b_8h_1(TyH);
   6712    if (1) test_sqshrun_8b_8h_4(TyH);
   6713    if (1) test_sqshrun_8b_8h_8(TyH);
   6714    if (1) test_sqshrun2_16b_8h_1(TyH);
   6715    if (1) test_sqshrun2_16b_8h_4(TyH);
   6716    if (1) test_sqshrun2_16b_8h_8(TyH);
   6717 
   6718    // sqshl (imm)  d,s,h,b   _#imm
   6719    // uqshl (imm)  d,s,h,b   _#imm
   6720    // sqshlu (imm) d,s,h,b   _#imm
   6721    if (1) test_sqshl_d_d_0(TyD);
   6722    if (1) test_sqshl_d_d_32(TyD);
   6723    if (1) test_sqshl_d_d_63(TyD);
   6724    if (1) test_sqshl_s_s_0(TyS);
   6725    if (1) test_sqshl_s_s_16(TyS);
   6726    if (1) test_sqshl_s_s_31(TyS);
   6727    if (1) test_sqshl_h_h_0(TyH);
   6728    if (1) test_sqshl_h_h_8(TyH);
   6729    if (1) test_sqshl_h_h_15(TyH);
   6730    if (1) test_sqshl_b_b_0(TyB);
   6731    if (1) test_sqshl_b_b_1(TyB);
   6732    if (1) test_sqshl_b_b_4(TyB);
   6733    if (1) test_sqshl_b_b_6(TyB);
   6734    if (1) test_sqshl_b_b_7(TyB);
   6735    if (1) test_uqshl_d_d_0(TyD);
   6736    if (1) test_uqshl_d_d_32(TyD);
   6737    if (1) test_uqshl_d_d_63(TyD);
   6738    if (1) test_uqshl_s_s_0(TyS);
   6739    if (1) test_uqshl_s_s_16(TyS);
   6740    if (1) test_uqshl_s_s_31(TyS);
   6741    if (1) test_uqshl_h_h_0(TyH);
   6742    if (1) test_uqshl_h_h_8(TyH);
   6743    if (1) test_uqshl_h_h_15(TyH);
   6744    if (1) test_uqshl_b_b_0(TyB);
   6745    if (1) test_uqshl_b_b_1(TyB);
   6746    if (1) test_uqshl_b_b_4(TyB);
   6747    if (1) test_uqshl_b_b_6(TyB);
   6748    if (1) test_uqshl_b_b_7(TyB);
   6749    if (1) test_sqshlu_d_d_0(TyD);
   6750    if (1) test_sqshlu_d_d_32(TyD);
   6751    if (1) test_sqshlu_d_d_63(TyD);
   6752    if (1) test_sqshlu_s_s_0(TyS);
   6753    if (1) test_sqshlu_s_s_16(TyS);
   6754    if (1) test_sqshlu_s_s_31(TyS);
   6755    if (1) test_sqshlu_h_h_0(TyH);
   6756    if (1) test_sqshlu_h_h_8(TyH);
   6757    if (1) test_sqshlu_h_h_15(TyH);
   6758    if (1) test_sqshlu_b_b_0(TyB);
   6759    if (1) test_sqshlu_b_b_1(TyB);
   6760    if (1) test_sqshlu_b_b_2(TyB);
   6761    if (1) test_sqshlu_b_b_3(TyB);
   6762    if (1) test_sqshlu_b_b_4(TyB);
   6763    if (1) test_sqshlu_b_b_5(TyB);
   6764    if (1) test_sqshlu_b_b_6(TyB);
   6765    if (1) test_sqshlu_b_b_7(TyB);
   6766 
   6767    // sqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6768    // uqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6769    // sqshlu (imm) 2d,4s,2s,8h,4h,16b,8b   _#imm
   6770    if (1) test_sqshl_2d_2d_0(TyD);
   6771    if (1) test_sqshl_2d_2d_32(TyD);
   6772    if (1) test_sqshl_2d_2d_63(TyD);
   6773    if (1) test_sqshl_4s_4s_0(TyS);
   6774    if (1) test_sqshl_4s_4s_16(TyS);
   6775    if (1) test_sqshl_4s_4s_31(TyS);
   6776    if (1) test_sqshl_2s_2s_0(TyS);
   6777    if (1) test_sqshl_2s_2s_16(TyS);
   6778    if (1) test_sqshl_2s_2s_31(TyS);
   6779    if (1) test_sqshl_8h_8h_0(TyH);
   6780    if (1) test_sqshl_8h_8h_8(TyH);
   6781    if (1) test_sqshl_8h_8h_15(TyH);
   6782    if (1) test_sqshl_4h_4h_0(TyH);
   6783    if (1) test_sqshl_4h_4h_8(TyH);
   6784    if (1) test_sqshl_4h_4h_15(TyH);
   6785    if (1) test_sqshl_16b_16b_0(TyB);
   6786    if (1) test_sqshl_16b_16b_3(TyB);
   6787    if (1) test_sqshl_16b_16b_7(TyB);
   6788    if (1) test_sqshl_8b_8b_0(TyB);
   6789    if (1) test_sqshl_8b_8b_3(TyB);
   6790    if (1) test_sqshl_8b_8b_7(TyB);
   6791    if (1) test_uqshl_2d_2d_0(TyD);
   6792    if (1) test_uqshl_2d_2d_32(TyD);
   6793    if (1) test_uqshl_2d_2d_63(TyD);
   6794    if (1) test_uqshl_4s_4s_0(TyS);
   6795    if (1) test_uqshl_4s_4s_16(TyS);
   6796    if (1) test_uqshl_4s_4s_31(TyS);
   6797    if (1) test_uqshl_2s_2s_0(TyS);
   6798    if (1) test_uqshl_2s_2s_16(TyS);
   6799    if (1) test_uqshl_2s_2s_31(TyS);
   6800    if (1) test_uqshl_8h_8h_0(TyH);
   6801    if (1) test_uqshl_8h_8h_8(TyH);
   6802    if (1) test_uqshl_8h_8h_15(TyH);
   6803    if (1) test_uqshl_4h_4h_0(TyH);
   6804    if (1) test_uqshl_4h_4h_8(TyH);
   6805    if (1) test_uqshl_4h_4h_15(TyH);
   6806    if (1) test_uqshl_16b_16b_0(TyB);
   6807    if (1) test_uqshl_16b_16b_3(TyB);
   6808    if (1) test_uqshl_16b_16b_7(TyB);
   6809    if (1) test_uqshl_8b_8b_0(TyB);
   6810    if (1) test_uqshl_8b_8b_3(TyB);
   6811    if (1) test_uqshl_8b_8b_7(TyB);
   6812    if (1) test_sqshlu_2d_2d_0(TyD);
   6813    if (1) test_sqshlu_2d_2d_32(TyD);
   6814    if (1) test_sqshlu_2d_2d_63(TyD);
   6815    if (1) test_sqshlu_4s_4s_0(TyS);
   6816    if (1) test_sqshlu_4s_4s_16(TyS);
   6817    if (1) test_sqshlu_4s_4s_31(TyS);
   6818    if (1) test_sqshlu_2s_2s_0(TyS);
   6819    if (1) test_sqshlu_2s_2s_16(TyS);
   6820    if (1) test_sqshlu_2s_2s_31(TyS);
   6821    if (1) test_sqshlu_8h_8h_0(TyH);
   6822    if (1) test_sqshlu_8h_8h_8(TyH);
   6823    if (1) test_sqshlu_8h_8h_15(TyH);
   6824    if (1) test_sqshlu_4h_4h_0(TyH);
   6825    if (1) test_sqshlu_4h_4h_8(TyH);
   6826    if (1) test_sqshlu_4h_4h_15(TyH);
   6827    if (1) test_sqshlu_16b_16b_0(TyB);
   6828    if (1) test_sqshlu_16b_16b_3(TyB);
   6829    if (1) test_sqshlu_16b_16b_7(TyB);
   6830    if (1) test_sqshlu_8b_8b_0(TyB);
   6831    if (1) test_sqshlu_8b_8b_3(TyB);
   6832    if (1) test_sqshlu_8b_8b_7(TyB);
   6833 
   6834    // sqxtn        s_d,h_s,b_h
   6835    // uqxtn        s_d,h_s,b_h
   6836    // sqxtun       s_d,h_s,b_h
   6837    if (1) test_sqxtn_s_d(TyD);
   6838    if (1) test_sqxtn_h_s(TyS);
   6839    if (1) test_sqxtn_b_h(TyH);
   6840    if (1) test_uqxtn_s_d(TyD);
   6841    if (1) test_uqxtn_h_s(TyS);
   6842    if (1) test_uqxtn_b_h(TyH);
   6843    if (1) test_sqxtun_s_d(TyD);
   6844    if (1) test_sqxtun_h_s(TyS);
   6845    if (1) test_sqxtun_b_h(TyH);
   6846 
   6847    // sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6848    // uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6849    // sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6850    if (1) test_sqxtn_2s_2d(TyD);
   6851    if (1) test_sqxtn2_4s_2d(TyD);
   6852    if (1) test_sqxtn_4h_4s(TyS);
   6853    if (1) test_sqxtn2_8h_4s(TyS);
   6854    if (1) test_sqxtn_8b_8h(TyH);
   6855    if (1) test_sqxtn2_16b_8h(TyH);
   6856    if (1) test_uqxtn_2s_2d(TyD);
   6857    if (1) test_uqxtn2_4s_2d(TyD);
   6858    if (1) test_uqxtn_4h_4s(TyS);
   6859    if (1) test_uqxtn2_8h_4s(TyS);
   6860    if (1) test_uqxtn_8b_8h(TyH);
   6861    if (1) test_uqxtn2_16b_8h(TyH);
   6862    if (1) test_sqxtun_2s_2d(TyD);
   6863    if (1) test_sqxtun2_4s_2d(TyD);
   6864    if (1) test_sqxtun_4h_4s(TyS);
   6865    if (1) test_sqxtun2_8h_4s(TyS);
   6866    if (1) test_sqxtun_8b_8h(TyH);
   6867    if (1) test_sqxtun2_16b_8h(TyH);
   6868 
   6869    // srhadd       4s,2s,8h,4h,16b,8b
   6870    // urhadd       4s,2s,8h,4h,16b,8b
   6871    if (1) test_srhadd_4s_4s_4s(TyS);
   6872    if (1) test_srhadd_2s_2s_2s(TyS);
   6873    if (1) test_srhadd_8h_8h_8h(TyH);
   6874    if (1) test_srhadd_4h_4h_4h(TyH);
   6875    if (1) test_srhadd_16b_16b_16b(TyB);
   6876    if (1) test_srhadd_8b_8b_8b(TyB);
   6877    if (1) test_urhadd_4s_4s_4s(TyS);
   6878    if (1) test_urhadd_2s_2s_2s(TyS);
   6879    if (1) test_urhadd_8h_8h_8h(TyH);
   6880    if (1) test_urhadd_4h_4h_4h(TyH);
   6881    if (1) test_urhadd_16b_16b_16b(TyB);
   6882    if (1) test_urhadd_8b_8b_8b(TyB);
   6883 
   6884    // sshl (reg)   d
   6885    // ushl (reg)   d
   6886    if (1) test_sshl_d_d_d(TyD);
   6887    if (1) test_ushl_d_d_d(TyD);
   6888 
   6889    // sshl (reg)   2d,4s,2s,8h,4h,16b,8b
   6890    // ushl (reg)   2d,4s,2s,8h,4h,16b,8b
   6891    if (1) test_sshl_2d_2d_2d(TyD);
   6892    if (1) test_sshl_4s_4s_4s(TyS);
   6893    if (1) test_sshl_2s_2s_2s(TyS);
   6894    if (1) test_sshl_8h_8h_8h(TyH);
   6895    if (1) test_sshl_4h_4h_4h(TyH);
   6896    if (1) test_sshl_16b_16b_16b(TyB);
   6897    if (1) test_sshl_8b_8b_8b(TyB);
   6898    if (1) test_ushl_2d_2d_2d(TyD);
   6899    if (1) test_ushl_4s_4s_4s(TyS);
   6900    if (1) test_ushl_2s_2s_2s(TyS);
   6901    if (1) test_ushl_8h_8h_8h(TyH);
   6902    if (1) test_ushl_4h_4h_4h(TyH);
   6903    if (1) test_ushl_16b_16b_16b(TyB);
   6904    if (1) test_ushl_8b_8b_8b(TyB);
   6905 
   6906    // shl  (imm)   d
   6907    // sshr (imm)   d
   6908    // ushr (imm)   d
   6909    if (1) test_shl_d_d_0(TyD);
   6910    if (1) test_shl_d_d_32(TyD);
   6911    if (1) test_shl_d_d_63(TyD);
   6912    if (1) test_sshr_d_d_1(TyD);
   6913    if (1) test_sshr_d_d_32(TyD);
   6914    if (1) test_sshr_d_d_64(TyD);
   6915    if (1) test_ushr_d_d_1(TyD);
   6916    if (1) test_ushr_d_d_32(TyD);
   6917    if (1) test_ushr_d_d_64(TyD);
   6918 
   6919    // shl  (imm)   16b,8b,8h,4h,4s,2s,2d
   6920    // sshr (imm)   2d,4s,2s,8h,4h,16b,8b
   6921    // ushr (imm)   2d,4s,2s,8h,4h,16b,8b
   6922    if (1) test_shl_2d_2d_0(TyD);
   6923    if (1) test_shl_2d_2d_13(TyD);
   6924    if (1) test_shl_2d_2d_63(TyD);
   6925    if (1) test_shl_4s_4s_0(TyS);
   6926    if (1) test_shl_4s_4s_13(TyS);
   6927    if (1) test_shl_4s_4s_31(TyS);
   6928    if (1) test_shl_2s_2s_0(TyS);
   6929    if (1) test_shl_2s_2s_13(TyS);
   6930    if (1) test_shl_2s_2s_31(TyS);
   6931    if (1) test_shl_8h_8h_0(TyH);
   6932    if (1) test_shl_8h_8h_13(TyH);
   6933    if (1) test_shl_8h_8h_15(TyH);
   6934    if (1) test_shl_4h_4h_0(TyH);
   6935    if (1) test_shl_4h_4h_13(TyH);
   6936    if (1) test_shl_4h_4h_15(TyH);
   6937    if (1) test_shl_16b_16b_0(TyB);
   6938    if (1) test_shl_16b_16b_7(TyB);
   6939    if (1) test_shl_8b_8b_0(TyB);
   6940    if (1) test_shl_8b_8b_7(TyB);
   6941    if (1) test_sshr_2d_2d_1(TyD);
   6942    if (1) test_sshr_2d_2d_13(TyD);
   6943    if (1) test_sshr_2d_2d_64(TyD);
   6944    if (1) test_sshr_4s_4s_1(TyS);
   6945    if (1) test_sshr_4s_4s_13(TyS);
   6946    if (1) test_sshr_4s_4s_32(TyS);
   6947    if (1) test_sshr_2s_2s_1(TyS);
   6948    if (1) test_sshr_2s_2s_13(TyS);
   6949    if (1) test_sshr_2s_2s_32(TyS);
   6950    if (1) test_sshr_8h_8h_1(TyH);
   6951    if (1) test_sshr_8h_8h_13(TyH);
   6952    if (1) test_sshr_8h_8h_16(TyH);
   6953    if (1) test_sshr_4h_4h_1(TyH);
   6954    if (1) test_sshr_4h_4h_13(TyH);
   6955    if (1) test_sshr_4h_4h_16(TyH);
   6956    if (1) test_sshr_16b_16b_1(TyB);
   6957    if (1) test_sshr_16b_16b_8(TyB);
   6958    if (1) test_sshr_8b_8b_1(TyB);
   6959    if (1) test_sshr_8b_8b_8(TyB);
   6960    if (1) test_ushr_2d_2d_1(TyD);
   6961    if (1) test_ushr_2d_2d_13(TyD);
   6962    if (1) test_ushr_2d_2d_64(TyD);
   6963    if (1) test_ushr_4s_4s_1(TyS);
   6964    if (1) test_ushr_4s_4s_13(TyS);
   6965    if (1) test_ushr_4s_4s_32(TyS);
   6966    if (1) test_ushr_2s_2s_1(TyS);
   6967    if (1) test_ushr_2s_2s_13(TyS);
   6968    if (1) test_ushr_2s_2s_32(TyS);
   6969    if (1) test_ushr_8h_8h_1(TyH);
   6970    if (1) test_ushr_8h_8h_13(TyH);
   6971    if (1) test_ushr_8h_8h_16(TyH);
   6972    if (1) test_ushr_4h_4h_1(TyH);
   6973    if (1) test_ushr_4h_4h_13(TyH);
   6974    if (1) test_ushr_4h_4h_16(TyH);
   6975    if (1) test_ushr_16b_16b_1(TyB);
   6976    if (1) test_ushr_16b_16b_8(TyB);
   6977    if (1) test_ushr_8b_8b_1(TyB);
   6978    if (1) test_ushr_8b_8b_8(TyB);
   6979 
   6980    // ssra (imm)   d
   6981    // usra (imm)   d
   6982    if (1) test_ssra_d_d_1(TyD);
   6983    if (1) test_ssra_d_d_32(TyD);
   6984    if (1) test_ssra_d_d_64(TyD);
   6985    if (1) test_usra_d_d_1(TyD);
   6986    if (1) test_usra_d_d_32(TyD);
   6987    if (1) test_usra_d_d_64(TyD);
   6988 
   6989    // ssra (imm)   2d,4s,2s,8h,4h,16b,8b
   6990    // usra (imm)   2d,4s,2s,8h,4h,16b,8b
   6991    if (1) test_ssra_2d_2d_1(TyD);
   6992    if (1) test_ssra_2d_2d_32(TyD);
   6993    if (1) test_ssra_2d_2d_64(TyD);
   6994    if (1) test_ssra_4s_4s_1(TyS);
   6995    if (1) test_ssra_4s_4s_16(TyS);
   6996    if (1) test_ssra_4s_4s_32(TyS);
   6997    if (1) test_ssra_2s_2s_1(TyS);
   6998    if (1) test_ssra_2s_2s_16(TyS);
   6999    if (1) test_ssra_2s_2s_32(TyS);
   7000    if (1) test_ssra_8h_8h_1(TyH);
   7001    if (1) test_ssra_8h_8h_8(TyH);
   7002    if (1) test_ssra_8h_8h_16(TyH);
   7003    if (1) test_ssra_4h_4h_1(TyH);
   7004    if (1) test_ssra_4h_4h_8(TyH);
   7005    if (1) test_ssra_4h_4h_16(TyH);
   7006    if (1) test_ssra_16b_16b_1(TyB);
   7007    if (1) test_ssra_16b_16b_3(TyB);
   7008    if (1) test_ssra_16b_16b_8(TyB);
   7009    if (1) test_ssra_8b_8b_1(TyB);
   7010    if (1) test_ssra_8b_8b_3(TyB);
   7011    if (1) test_ssra_8b_8b_8(TyB);
   7012    if (1) test_usra_2d_2d_1(TyD);
   7013    if (1) test_usra_2d_2d_32(TyD);
   7014    if (1) test_usra_2d_2d_64(TyD);
   7015    if (1) test_usra_4s_4s_1(TyS);
   7016    if (1) test_usra_4s_4s_16(TyS);
   7017    if (1) test_usra_4s_4s_32(TyS);
   7018    if (1) test_usra_2s_2s_1(TyS);
   7019    if (1) test_usra_2s_2s_16(TyS);
   7020    if (1) test_usra_2s_2s_32(TyS);
   7021    if (1) test_usra_8h_8h_1(TyH);
   7022    if (1) test_usra_8h_8h_8(TyH);
   7023    if (1) test_usra_8h_8h_16(TyH);
   7024    if (1) test_usra_4h_4h_1(TyH);
   7025    if (1) test_usra_4h_4h_8(TyH);
   7026    if (1) test_usra_4h_4h_16(TyH);
   7027    if (1) test_usra_16b_16b_1(TyB);
   7028    if (1) test_usra_16b_16b_3(TyB);
   7029    if (1) test_usra_16b_16b_8(TyB);
   7030    if (1) test_usra_8b_8b_1(TyB);
   7031    if (1) test_usra_8b_8b_3(TyB);
   7032    if (1) test_usra_8b_8b_8(TyB);
   7033 
   7034    // srshl (reg)  d
   7035    // urshl (reg)  d
   7036    if (1) test_srshl_d_d_d(TyD);
   7037    if (1) test_urshl_d_d_d(TyD);
   7038 
   7039    // srshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7040    // urshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7041    if (1) test_srshl_2d_2d_2d(TyD);
   7042    if (1) test_srshl_4s_4s_4s(TyS);
   7043    if (1) test_srshl_2s_2s_2s(TyS);
   7044    if (1) test_srshl_8h_8h_8h(TyH);
   7045    if (1) test_srshl_4h_4h_4h(TyH);
   7046    if (1) test_srshl_16b_16b_16b(TyB);
   7047    if (1) test_srshl_8b_8b_8b(TyB);
   7048    if (1) test_urshl_2d_2d_2d(TyD);
   7049    if (1) test_urshl_4s_4s_4s(TyS);
   7050    if (1) test_urshl_2s_2s_2s(TyS);
   7051    if (1) test_urshl_8h_8h_8h(TyH);
   7052    if (1) test_urshl_4h_4h_4h(TyH);
   7053    if (1) test_urshl_16b_16b_16b(TyB);
   7054    if (1) test_urshl_8b_8b_8b(TyB);
   7055 
   7056    // srshr (imm)  d
   7057    // urshr (imm)  d
   7058    if (1) test_srshr_d_d_1(TyD);
   7059    if (1) test_srshr_d_d_32(TyD);
   7060    if (1) test_srshr_d_d_64(TyD);
   7061    if (1) test_urshr_d_d_1(TyD);
   7062    if (1) test_urshr_d_d_32(TyD);
   7063    if (1) test_urshr_d_d_64(TyD);
   7064 
   7065    // srshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7066    // urshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7067    if (1) test_srshr_2d_2d_1(TyD);
   7068    if (1) test_srshr_2d_2d_32(TyD);
   7069    if (1) test_srshr_2d_2d_64(TyD);
   7070    if (1) test_srshr_4s_4s_1(TyS);
   7071    if (1) test_srshr_4s_4s_16(TyS);
   7072    if (1) test_srshr_4s_4s_32(TyS);
   7073    if (1) test_srshr_2s_2s_1(TyS);
   7074    if (1) test_srshr_2s_2s_16(TyS);
   7075    if (1) test_srshr_2s_2s_32(TyS);
   7076    if (1) test_srshr_8h_8h_1(TyH);
   7077    if (1) test_srshr_8h_8h_8(TyH);
   7078    if (1) test_srshr_8h_8h_16(TyH);
   7079    if (1) test_srshr_4h_4h_1(TyH);
   7080    if (1) test_srshr_4h_4h_8(TyH);
   7081    if (1) test_srshr_4h_4h_16(TyH);
   7082    if (1) test_srshr_16b_16b_1(TyB);
   7083    if (1) test_srshr_16b_16b_3(TyB);
   7084    if (1) test_srshr_16b_16b_8(TyB);
   7085    if (1) test_srshr_8b_8b_1(TyB);
   7086    if (1) test_srshr_8b_8b_3(TyB);
   7087    if (1) test_srshr_8b_8b_8(TyB);
   7088    if (1) test_urshr_2d_2d_1(TyD);
   7089    if (1) test_urshr_2d_2d_32(TyD);
   7090    if (1) test_urshr_2d_2d_64(TyD);
   7091    if (1) test_urshr_4s_4s_1(TyS);
   7092    if (1) test_urshr_4s_4s_16(TyS);
   7093    if (1) test_urshr_4s_4s_32(TyS);
   7094    if (1) test_urshr_2s_2s_1(TyS);
   7095    if (1) test_urshr_2s_2s_16(TyS);
   7096    if (1) test_urshr_2s_2s_32(TyS);
   7097    if (1) test_urshr_8h_8h_1(TyH);
   7098    if (1) test_urshr_8h_8h_8(TyH);
   7099    if (1) test_urshr_8h_8h_16(TyH);
   7100    if (1) test_urshr_4h_4h_1(TyH);
   7101    if (1) test_urshr_4h_4h_8(TyH);
   7102    if (1) test_urshr_4h_4h_16(TyH);
   7103    if (1) test_urshr_16b_16b_1(TyB);
   7104    if (1) test_urshr_16b_16b_3(TyB);
   7105    if (1) test_urshr_16b_16b_8(TyB);
   7106    if (1) test_urshr_8b_8b_1(TyB);
   7107    if (1) test_urshr_8b_8b_3(TyB);
   7108    if (1) test_urshr_8b_8b_8(TyB);
   7109 
   7110    // srsra (imm)  d
   7111    // ursra (imm)  d
   7112    if (1) test_srsra_d_d_1(TyD);
   7113    if (1) test_srsra_d_d_32(TyD);
   7114    if (1) test_srsra_d_d_64(TyD);
   7115    if (1) test_ursra_d_d_1(TyD);
   7116    if (1) test_ursra_d_d_32(TyD);
   7117    if (1) test_ursra_d_d_64(TyD);
   7118 
   7119    // srsra (imm)  2d,4s,2s,8h,4h,16b,8b
   7120    // ursra (imm)  2d,4s,2s,8h,4h,16b,8b
   7121    if (1) test_srsra_2d_2d_1(TyD);
   7122    if (1) test_srsra_2d_2d_32(TyD);
   7123    if (1) test_srsra_2d_2d_64(TyD);
   7124    if (1) test_srsra_4s_4s_1(TyS);
   7125    if (1) test_srsra_4s_4s_16(TyS);
   7126    if (1) test_srsra_4s_4s_32(TyS);
   7127    if (1) test_srsra_2s_2s_1(TyS);
   7128    if (1) test_srsra_2s_2s_16(TyS);
   7129    if (1) test_srsra_2s_2s_32(TyS);
   7130    if (1) test_srsra_8h_8h_1(TyH);
   7131    if (1) test_srsra_8h_8h_8(TyH);
   7132    if (1) test_srsra_8h_8h_16(TyH);
   7133    if (1) test_srsra_4h_4h_1(TyH);
   7134    if (1) test_srsra_4h_4h_8(TyH);
   7135    if (1) test_srsra_4h_4h_16(TyH);
   7136    if (1) test_srsra_16b_16b_1(TyB);
   7137    if (1) test_srsra_16b_16b_3(TyB);
   7138    if (1) test_srsra_16b_16b_8(TyB);
   7139    if (1) test_srsra_8b_8b_1(TyB);
   7140    if (1) test_srsra_8b_8b_3(TyB);
   7141    if (1) test_srsra_8b_8b_8(TyB);
   7142    if (1) test_ursra_2d_2d_1(TyD);
   7143    if (1) test_ursra_2d_2d_32(TyD);
   7144    if (1) test_ursra_2d_2d_64(TyD);
   7145    if (1) test_ursra_4s_4s_1(TyS);
   7146    if (1) test_ursra_4s_4s_16(TyS);
   7147    if (1) test_ursra_4s_4s_32(TyS);
   7148    if (1) test_ursra_2s_2s_1(TyS);
   7149    if (1) test_ursra_2s_2s_16(TyS);
   7150    if (1) test_ursra_2s_2s_32(TyS);
   7151    if (1) test_ursra_8h_8h_1(TyH);
   7152    if (1) test_ursra_8h_8h_8(TyH);
   7153    if (1) test_ursra_8h_8h_16(TyH);
   7154    if (1) test_ursra_4h_4h_1(TyH);
   7155    if (1) test_ursra_4h_4h_8(TyH);
   7156    if (1) test_ursra_4h_4h_16(TyH);
   7157    if (1) test_ursra_16b_16b_1(TyB);
   7158    if (1) test_ursra_16b_16b_3(TyB);
   7159    if (1) test_ursra_16b_16b_8(TyB);
   7160    if (1) test_ursra_8b_8b_1(TyB);
   7161    if (1) test_ursra_8b_8b_3(TyB);
   7162    if (1) test_ursra_8b_8b_8(TyB);
   7163 
   7164    // sshll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7165    // ushll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7166    if (1) test_sshll_2d_2s_0(TyS);
   7167    if (1) test_sshll_2d_2s_15(TyS);
   7168    if (1) test_sshll_2d_2s_31(TyS);
   7169    if (1) test_sshll2_2d_4s_0(TyS);
   7170    if (1) test_sshll2_2d_4s_15(TyS);
   7171    if (1) test_sshll2_2d_4s_31(TyS);
   7172    if (1) test_sshll_4s_4h_0(TyH);
   7173    if (1) test_sshll_4s_4h_7(TyH);
   7174    if (1) test_sshll_4s_4h_15(TyH);
   7175    if (1) test_sshll2_4s_8h_0(TyH);
   7176    if (1) test_sshll2_4s_8h_7(TyH);
   7177    if (1) test_sshll2_4s_8h_15(TyH);
   7178    if (1) test_sshll_8h_8b_0(TyB);
   7179    if (1) test_sshll_8h_8b_3(TyB);
   7180    if (1) test_sshll_8h_8b_7(TyB);
   7181    if (1) test_sshll2_8h_16b_0(TyB);
   7182    if (1) test_sshll2_8h_16b_3(TyB);
   7183    if (1) test_sshll2_8h_16b_7(TyB);
   7184    if (1) test_ushll_2d_2s_0(TyS);
   7185    if (1) test_ushll_2d_2s_15(TyS);
   7186    if (1) test_ushll_2d_2s_31(TyS);
   7187    if (1) test_ushll2_2d_4s_0(TyS);
   7188    if (1) test_ushll2_2d_4s_15(TyS);
   7189    if (1) test_ushll2_2d_4s_31(TyS);
   7190    if (1) test_ushll_4s_4h_0(TyH);
   7191    if (1) test_ushll_4s_4h_7(TyH);
   7192    if (1) test_ushll_4s_4h_15(TyH);
   7193    if (1) test_ushll2_4s_8h_0(TyH);
   7194    if (1) test_ushll2_4s_8h_7(TyH);
   7195    if (1) test_ushll2_4s_8h_15(TyH);
   7196    if (1) test_ushll_8h_8b_0(TyB);
   7197    if (1) test_ushll_8h_8b_3(TyB);
   7198    if (1) test_ushll_8h_8b_7(TyB);
   7199    if (1) test_ushll2_8h_16b_0(TyB);
   7200    if (1) test_ushll2_8h_16b_3(TyB);
   7201    if (1) test_ushll2_8h_16b_7(TyB);
   7202 
   7203    // suqadd  d,s,h,b
   7204    // usqadd  d,s,h,b
   7205    if (1) test_suqadd_d_d(TyD);
   7206    if (1) test_suqadd_s_s(TyS);
   7207    if (1) test_suqadd_h_h(TyH);
   7208    if (1) test_suqadd_b_b(TyB);
   7209    if (1) test_usqadd_d_d(TyD);
   7210    if (1) test_usqadd_s_s(TyS);
   7211    if (1) test_usqadd_h_h(TyH);
   7212    if (1) test_usqadd_b_b(TyB);
   7213 
   7214    // suqadd  2d,4s,2s,8h,4h,16b,8b
   7215    // usqadd  2d,4s,2s,8h,4h,16b,8b
   7216    if (1) test_suqadd_2d_2d(TyD);
   7217    if (1) test_suqadd_4s_4s(TyS);
   7218    if (1) test_suqadd_2s_2s(TyS);
   7219    if (1) test_suqadd_8h_8h(TyH);
   7220    if (1) test_suqadd_4h_4h(TyH);
   7221    if (1) test_suqadd_16b_16b(TyB);
   7222    if (1) test_suqadd_8b_8b(TyB);
   7223    if (1) test_usqadd_2d_2d(TyD);
   7224    if (1) test_usqadd_4s_4s(TyS);
   7225    if (1) test_usqadd_2s_2s(TyS);
   7226    if (1) test_usqadd_8h_8h(TyH);
   7227    if (1) test_usqadd_4h_4h(TyH);
   7228    if (1) test_usqadd_16b_16b(TyB);
   7229    if (1) test_usqadd_8b_8b(TyB);
   7230 
   7231    // tbl     8b_{16b}_8b, 16b_{16b}_16b
   7232    // tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7233    // tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7234    // tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7235    if (1) test_tbl_16b_1reg(TyB);
   7236    if (1) test_tbl_16b_2reg(TyB);
   7237    if (1) test_tbl_16b_3reg(TyB);
   7238    if (1) test_tbl_16b_4reg(TyB);
   7239    if (1) test_tbl_8b_1reg(TyB);
   7240    if (1) test_tbl_8b_2reg(TyB);
   7241    if (1) test_tbl_8b_3reg(TyB);
   7242    if (1) test_tbl_8b_4reg(TyB);
   7243 
   7244    // tbx     8b_{16b}_8b, 16b_{16b}_16b
   7245    // tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7246    // tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7247    // tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7248    if (1) test_tbx_16b_1reg(TyB);
   7249    if (1) test_tbx_16b_2reg(TyB);
   7250    if (1) test_tbx_16b_3reg(TyB);
   7251    if (1) test_tbx_16b_4reg(TyB);
   7252    if (1) test_tbx_8b_1reg(TyB);
   7253    if (1) test_tbx_8b_2reg(TyB);
   7254    if (1) test_tbx_8b_3reg(TyB);
   7255    if (1) test_tbx_8b_4reg(TyB);
   7256 
   7257    // trn1    2d,4s,2s,8h,4h,16b,8b
   7258    // trn2    2d,4s,2s,8h,4h,16b,8b
   7259    if (1) test_trn1_2d_2d_2d(TyD);
   7260    if (1) test_trn1_4s_4s_4s(TyS);
   7261    if (1) test_trn1_2s_2s_2s(TyS);
   7262    if (1) test_trn1_8h_8h_8h(TyH);
   7263    if (1) test_trn1_4h_4h_4h(TyH);
   7264    if (1) test_trn1_16b_16b_16b(TyB);
   7265    if (1) test_trn1_8b_8b_8b(TyB);
   7266    if (1) test_trn2_2d_2d_2d(TyD);
   7267    if (1) test_trn2_4s_4s_4s(TyS);
   7268    if (1) test_trn2_2s_2s_2s(TyS);
   7269    if (1) test_trn2_8h_8h_8h(TyH);
   7270    if (1) test_trn2_4h_4h_4h(TyH);
   7271    if (1) test_trn2_16b_16b_16b(TyB);
   7272    if (1) test_trn2_8b_8b_8b(TyB);
   7273 
   7274    // urecpe      4s,2s
   7275    // ursqrte     4s,2s
   7276    if (1) test_urecpe_4s_4s(TyS);
   7277    if (1) test_urecpe_2s_2s(TyS);
   7278    if (1) test_ursqrte_4s_4s(TyS);
   7279    if (1) test_ursqrte_2s_2s(TyS);
   7280 
   7281    // uzp1      2d,4s,2s,8h,4h,16b,8b
   7282    // uzp2      2d,4s,2s,8h,4h,16b,8b
   7283    // zip1      2d,4s,2s,8h,4h,16b,8b
   7284    // zip2      2d,4s,2s,8h,4h,16b,8b
   7285    if (1) test_uzp1_2d_2d_2d(TyD);
   7286    if (1) test_uzp1_4s_4s_4s(TyS);
   7287    if (1) test_uzp1_2s_2s_2s(TyS);
   7288    if (1) test_uzp1_8h_8h_8h(TyH);
   7289    if (1) test_uzp1_4h_4h_4h(TyH);
   7290    if (1) test_uzp1_16b_16b_16b(TyB);
   7291    if (1) test_uzp1_8b_8b_8b(TyB);
   7292    if (1) test_uzp2_2d_2d_2d(TyD);
   7293    if (1) test_uzp2_4s_4s_4s(TyS);
   7294    if (1) test_uzp2_2s_2s_2s(TyS);
   7295    if (1) test_uzp2_8h_8h_8h(TyH);
   7296    if (1) test_uzp2_4h_4h_4h(TyH);
   7297    if (1) test_uzp2_16b_16b_16b(TyB);
   7298    if (1) test_uzp2_8b_8b_8b(TyB);
   7299    if (1) test_zip1_2d_2d_2d(TyD);
   7300    if (1) test_zip1_4s_4s_4s(TyS);
   7301    if (1) test_zip1_2s_2s_2s(TyS);
   7302    if (1) test_zip1_8h_8h_8h(TyH);
   7303    if (1) test_zip1_4h_4h_4h(TyH);
   7304    if (1) test_zip1_16b_16b_16b(TyB);
   7305    if (1) test_zip1_8b_8b_8b(TyB);
   7306    if (1) test_zip2_2d_2d_2d(TyD);
   7307    if (1) test_zip2_4s_4s_4s(TyS);
   7308    if (1) test_zip2_2s_2s_2s(TyS);
   7309    if (1) test_zip2_8h_8h_8h(TyH);
   7310    if (1) test_zip2_4h_4h_4h(TyH);
   7311    if (1) test_zip2_16b_16b_16b(TyB);
   7312    if (1) test_zip2_8b_8b_8b(TyB);
   7313 
   7314    // xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7315    if (1) test_xtn_2s_2d(TyD);
   7316    if (1) test_xtn2_4s_2d(TyD);
   7317    if (1) test_xtn_4h_4s(TyS);
   7318    if (1) test_xtn2_8h_4s(TyS);
   7319    if (1) test_xtn_8b_8h(TyH);
   7320    if (1) test_xtn2_16b_8h(TyH);
   7321 
   7322    // ======================== MEM ========================
   7323 
   7324    // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   7325 
   7326    // ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7327    // ld1  (single 1-element structure to one lane of 1 reg)
   7328    // ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7329 
   7330    // ld2  (multiple 2-element structures to 2 regs)
   7331    // ld2  (single 2-element structure to one lane of 2 regs)
   7332    // ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7333 
   7334    // ld3  (multiple 3-element structures to 3 regs)
   7335    // ld3  (single 3-element structure to one lane of 3 regs)
   7336    // ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7337 
   7338    // ld4  (multiple 4-element structures to 4 regs)
   7339    // ld4  (single 4-element structure to one lane of 4 regs)
   7340    // ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7341 
   7342    // ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7343    //       addr = reg + uimm7 * reg_size
   7344 
   7345    // ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7346    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7347 
   7348    // ldr   q,d,s,h,b from addr
   7349    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7350 
   7351    // ldr   q,d,s from  pc+#imm19
   7352 
   7353    // ldr   q,d,s,h,b from addr
   7354    //       addr = [Xn|SP, R <extend> <shift]
   7355 
   7356    // ldur  q,d,s,h,b from addr
   7357    //       addr = [Xn|SP,#imm] (unscaled offset)
   7358 
   7359    // st1 (multiple 1-element structures from 1/2/3/4 regs)
   7360    // st1 (single 1-element structure for 1 lane of 1 reg)
   7361 
   7362    // st2 (multiple 2-element structures from 2 regs)
   7363    // st2 (single 2-element structure from 1 lane of 2 regs)
   7364 
   7365    // st3 (multiple 3-element structures from 3 regs)
   7366    // st3 (single 3-element structure from 1 lane of 3 regs)
   7367 
   7368    // st4 (multiple 4-element structures from 4 regs)
   7369    // st4 (single 4-element structure from one lane of 4 regs)
   7370 
   7371    // stnp q_q_addr, d_d_addr, s_s_addr
   7372    //      addr = [Xn|SP, #imm]
   7373 
   7374    // stp  q_q_addr, d_d_addr, s_s_addr
   7375    //      addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   7376 
   7377    // str  q,d,s,h,b_addr
   7378    //      addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   7379 
   7380    // str   q,d,s,h,b_addr
   7381    //       addr = [Xn|SP, R <extend> <shift]
   7382 
   7383    // stur  q,d,s,h,b_addr
   7384    //       addr = [Xn|SP,#imm] (unscaled offset)
   7385 
   7386    // ======================== CRYPTO ========================
   7387 
   7388    // aesd       16b (aes single round decryption)
   7389    // aese       16b (aes single round encryption)
   7390    // aesimc     16b (aes inverse mix columns)
   7391    // aesmc      16b (aes mix columns)
   7392    if (0) test_aesd_16b_16b(TyNONE);
   7393    if (0) test_aese_16b_16b(TyNONE);
   7394    if (0) test_aesimc_16b_16b(TyNONE);
   7395    if (0) test_aesmc_16b_16b(TyNONE);
   7396 
   7397    // sha1c      q_s_4s
   7398    // sha1h      s_s
   7399    // sha1m      q_s_4s
   7400    // sha1p      q_s_4s
   7401    // sha1su0    4s_4s_4s
   7402    // sha1su1    4s_4s
   7403    if (0) test_sha1c_q_s_4s(TyNONE);
   7404    if (0) test_sha1h_s_s(TyNONE);
   7405    if (0) test_sha1m_q_s_4s(TyNONE);
   7406    if (0) test_sha1p_q_s_4s(TyNONE);
   7407    if (0) test_sha1su0_4s_4s_4s(TyNONE);
   7408    if (0) test_sha1su1_4s_4s(TyNONE);
   7409 
   7410    // sha256h2   q_q_4s
   7411    // sha256h    q_q_4s
   7412    // sha256su0  4s_4s
   7413    // sha256su1  4s_4s_4s
   7414    if (0) test_sha256h2_q_q_4s(TyNONE);
   7415    if (0) test_sha256h_q_q_4s(TyNONE);
   7416    if (0) test_sha256su0_4s_4s(TyNONE);
   7417    if (0) test_sha256su1_4s_4s_4s(TyNONE);
   7418 
   7419    return 0;
   7420 }
   7421 
   7422 
   7423 /* ---------------------------------------------------------------- */
   7424 /* -- Alphabetical list of insns                                 -- */
   7425 /* ---------------------------------------------------------------- */
   7426 /*
   7427    abs      d
   7428    abs      2d,4s,2s,8h,4h,16b,8b
   7429    add      d
   7430    add      2d,4s,2s,8h,4h,16b,8b
   7431    addhn    2s.2d.2d, 4s.2d.2d, h_from_s and b_from_h (add and get high half)
   7432    addp     d (add pairs, across)
   7433    addp     2d,4s,2s,8h,4h,16b,8b
   7434    addv     4s,8h,4h,16b,18b (reduce across vector)
   7435    aesd     16b (aes single round decryption)
   7436    aese     16b (aes single round encryption)
   7437    aesimc   16b (aes inverse mix columns)
   7438    aesmc    16b (aes mix columns)
   7439    and      16b,8b
   7440 
   7441    bic      4s,2s,8h,4h (vector, imm)
   7442    also movi, mvni, orr
   7443 
   7444    bic      16b,8b (vector,reg) (bit clear)
   7445    bif      16b,8b (vector) (bit insert if false)
   7446    bit      16b,8b (vector) (bit insert if true)
   7447    bsl      16b,8b (vector) (bit select)
   7448 
   7449    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   7450    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   7451 
   7452    cmeq     d
   7453    cmeq     2d,4s,2s,8h,4h,16b,8b
   7454    cmeq_z   d
   7455    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   7456 
   7457    cmge     d
   7458    cmge     2d,4s,2s,8h,4h,16b,8b
   7459    cmge_z   d
   7460    cmge_z   2d,4s,2s,8h,4h,16b,8b
   7461 
   7462    cmgt     d
   7463    cmgt     2d,4s,2s,8h,4h,16b,8b
   7464    cmgt_z   d
   7465    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   7466 
   7467    cmhi     d
   7468    cmhi     2d,4s,2s,8h,4h,16b,8b
   7469 
   7470    cmhs     d
   7471    cmhs     2d,4s,2s,8h,4h,16b,8b
   7472 
   7473    cmle_z   d
   7474    cmle_z   2d,4s,2s,8h,4h,16b,8b
   7475 
   7476    cmlt_z   d
   7477    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   7478 
   7479    cmtst    d
   7480    cmtst    2d,4s,2s,8h,4h,16b,8b
   7481 
   7482    cnt      16b,8b (population count per byte)
   7483 
   7484    dup      d,s,h,b (vec elem to scalar)
   7485    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   7486    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   7487 
   7488    eor      16b,8b (vector)
   7489    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   7490 
   7491    fabd     d,s
   7492    fabd     2d,4s,2s
   7493 
   7494    fabs     d,s
   7495    fabs     2d,4s,2s
   7496 
   7497    facge    s,d  (floating abs compare GE)
   7498    facge    2d,4s,2s
   7499 
   7500    facgt    s,d  (floating abs compare GE)
   7501    facgt    2d,4s,2s
   7502 
   7503    fadd     d,s
   7504    fadd     2d,4s,2s
   7505 
   7506    faddp    d,s (floating add pair)
   7507    faddp    2d,4s,2s
   7508 
   7509    fccmp    d,s (floating point conditional quiet compare)
   7510    fccmpe   d,s (floating point conditional signaling compare)
   7511 
   7512    fcmeq    d,s
   7513    fcmeq    2d,4s,2s
   7514    fcmeq_z  d,s
   7515    fcmeq_z  2d,4s,2s
   7516 
   7517    fcmge    d,s
   7518    fcmge    2d,4s,2s
   7519    fcmge_z  d,s
   7520    fcmge_z  2d,4s,2s
   7521 
   7522    fcmgt    d,s
   7523    fcmgt    2d,4s,2s
   7524    fcmgt_z  d,s
   7525    fcmgt_z  2d,4s,2s
   7526 
   7527    fcmle_z  d,s
   7528    fcmle_z  2d,4s,2s
   7529 
   7530    fcmlt_z  d,s
   7531    fcmlt_z  2d,4s,2s
   7532 
   7533    fcmp     d,s (floating point quiet, set flags)
   7534    fcmp_z   d,s
   7535    fcmpe    d,s (floating point signaling, set flags)
   7536    fcmpe_z  d,s
   7537 
   7538    fcsel    d,s (fp cond select)
   7539 
   7540    fcvt     s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   7541 
   7542    fcvtas   d,s  (fcvt to signed int, nearest, ties away)
   7543    fcvtas   2d,4s,2s
   7544    fcvtas   w_s,x_s,w_d,x_d
   7545 
   7546    fcvtau   d,s  (fcvt to unsigned int, nearest, ties away)
   7547    fcvtau   2d,4s,2s
   7548    fcvtau   w_s,x_s,w_d,x_d
   7549 
   7550    fcvtl{2} 4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   7551 
   7552    fcvtms   d,s  (fcvt to signed int, minus inf)
   7553    fcvtms   2d,4s,2s
   7554    fcvtms   w_s,x_s,w_d,x_d
   7555 
   7556    fcvtmu   d,s  (fcvt to unsigned int, minus inf)
   7557    fcvtmu   2d,4s,2s
   7558    fcvtmu   w_s,x_s,w_d,x_d
   7559 
   7560    fcvtn{2} 4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   7561 
   7562    fcvtns   d,s  (fcvt to signed int, nearest)
   7563    fcvtns   2d,4s,2s
   7564    fcvtns   w_s,x_s,w_d,x_d
   7565 
   7566    fcvtnu   d,s  (fcvt to unsigned int, nearest)
   7567    fcvtnu   2d,4s,2s
   7568    fcvtnu   w_s,x_s,w_d,x_d
   7569 
   7570    fcvtps   d,s  (fcvt to signed int, plus inf)
   7571    fcvtps   2d,4s,2s
   7572    fcvtps   w_s,x_s,w_d,x_d
   7573 
   7574    fcvtpu   d,s  (fcvt to unsigned int, plus inf)
   7575    fcvtpu   2d,4s,2s
   7576    fcvtpu   w_s,x_s,w_d,x_d
   7577 
   7578    fcvtxn   s_d (fcvt to lower prec narrow, rounding to odd)
   7579    fcvtxn   2s_2d,4s_2d
   7580 
   7581    fcvtzs   s,d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7582    fcvtzs   2d,4s,2s
   7583 
   7584    fcvtzs   s,d (fcvt to signed integer, to zero)
   7585    fcvtzs   2d,4s,2s
   7586 
   7587    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7588 
   7589    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed integer, to zero)
   7590 
   7591    fcvtzu   s,d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7592    fcvtzu   2d,4s,2s
   7593 
   7594    fcvtzu   s,d (fcvt to unsigned integer, to zero)
   7595    fcvtzu   2d,4s,2s
   7596 
   7597    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7598 
   7599    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned integer, to zero)
   7600 
   7601    fdiv     d,s
   7602    fdiv     2d,4s,2s
   7603 
   7604    fmadd    d,s
   7605    fnmadd   d,s
   7606    fnmsub   d,s
   7607    fnmul    d,s
   7608 
   7609    fmax     d,s
   7610    fmin     d,s
   7611 
   7612    fmax     2d,4s,2s
   7613    fmin     2d,4s,2s
   7614 
   7615    fmaxnm   d,s ("max number")
   7616    fminnm   d,s
   7617 
   7618    fmaxnm   2d,4s,2s
   7619    fminnm   2d,4s,2s
   7620 
   7621    fmaxnmp  d_2d,s_2s ("max number pairwise")
   7622    fminnmp  d_2d,s_2s
   7623 
   7624    fmaxnmp  2d,4s,2s
   7625    fminnmp  2d,4s,2s
   7626 
   7627    fmaxnmv  s_4s (maxnum across vector)
   7628    fminnmv  s_4s
   7629 
   7630    fmaxp    d_2d,s_2s (max of a pair)
   7631    fminp    d_2d,s_2s (max of a pair)
   7632 
   7633    fmaxp    2d,4s,2s  (max pairwise)
   7634    fminp    2d,4s,2s
   7635 
   7636    fmaxv    s_4s (max across vector)
   7637    fminv    s_4s
   7638 
   7639    fmla     d_d_d[],s_s_s[] (by element)
   7640    fmla     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7641 
   7642    fmla     2d,4s,2s
   7643 
   7644    fmls     d_d_d[],s_s_s[] (by element)
   7645    fmls     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7646 
   7647    fmls     2d,4s,2s
   7648 
   7649    fmov     2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   7650 
   7651    fmov     d_d,s_s
   7652 
   7653    fmov     s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   7654 
   7655    fmov     d,s #imm
   7656 
   7657    fmsub    d,s
   7658 
   7659    fmul     d_d_d[],s_s_s[]
   7660    fmul     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7661 
   7662    fmul     2d,4s,2s
   7663    fmul     d,s
   7664 
   7665    fmulx    d_d_d[],s_s_s[]
   7666    fmulx    2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7667 
   7668    fmulx    d,s
   7669    fmulx    2d,4s,2s
   7670 
   7671    fneg     d,s
   7672    fneg     2d,4s,2s
   7673 
   7674    frecpe   d,s (recip estimate)
   7675    frecpe   2d,4s,2s
   7676 
   7677    frecps   d,s (recip step)
   7678    frecps   2d,4s,2s
   7679 
   7680    frecpx   d,s (recip exponent)
   7681 
   7682    frinta   2d,4s,2s (round to integral, nearest away)
   7683    frinta   d,s
   7684 
   7685    frinti   2d,4s,2s (round to integral, per FPCR)
   7686    frinti   d,s
   7687 
   7688    frintm   2d,4s,2s (round to integral, minus inf)
   7689    frintm   d,s
   7690 
   7691    frintn   2d,4s,2s (round to integral, nearest, to even)
   7692    frintn   d,s
   7693 
   7694    frintp   2d,4s,2s (round to integral, plus inf)
   7695    frintp   d,s
   7696 
   7697    frintx   2d,4s,2s (round to integral exact, per FPCR)
   7698    frintx   d,s
   7699 
   7700    frintz   2d,4s,2s (round to integral, zero)
   7701    frintz   d,s
   7702 
   7703    frsqrte  d,s (est)
   7704    frsqrte  2d,4s,2s
   7705 
   7706    frsqrts  d,s (step)
   7707    frsqrts  2d,4s,2s
   7708 
   7709    fsqrt    d,s
   7710    fsqrt    2d,4s,2s
   7711 
   7712    fsub     d,s
   7713    fsub     2d,4s,2s
   7714 
   7715    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   7716 
   7717    ins      d[]_x, s[]_w, h[]_w, b[]_w
   7718 
   7719    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7720    ld1  (single 1-element structure to one lane of 1 reg)
   7721    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7722 
   7723    ld2  (multiple 2-element structures to 2 regs)
   7724    ld2  (single 2-element structure to one lane of 2 regs)
   7725    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7726 
   7727    ld3  (multiple 3-element structures to 3 regs)
   7728    ld3  (single 3-element structure to one lane of 3 regs)
   7729    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7730 
   7731    ld4  (multiple 4-element structures to 4 regs)
   7732    ld4  (single 4-element structure to one lane of 4 regs)
   7733    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7734 
   7735    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7736          addr = reg + uimm7 * reg_size
   7737 
   7738    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7739          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7740 
   7741    ldr   q,d,s,h,b from addr
   7742          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7743 
   7744    ldr   q,d,s from  pc+#imm19
   7745 
   7746    ldr   q,d,s,h,b from addr
   7747          addr = [Xn|SP, R <extend> <shift]
   7748 
   7749    ldur  q,d,s,h,b from addr
   7750          addr = [Xn|SP,#imm] (unscaled offset)
   7751 
   7752    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7753    mla   4s,2s,8h,4h,16b,8b
   7754 
   7755    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7756    mls   4s,2s,8h,4h,16b,8b
   7757 
   7758    movi  16b,8b   #imm8, LSL #0
   7759    movi  8h,4h    #imm8, LSL #0 or 8
   7760    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   7761    movi  4s,2s    #imm8, MSL #8 or 16
   7762    movi  d,       #imm64
   7763    movi  2d,      #imm64
   7764 
   7765    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7766    mul   4s,2s,8h,4h,16b,8b
   7767 
   7768    mvni  8h,4h    #imm8, LSL #0 or 8
   7769    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   7770    mvni  4s,2s    #imm8, MSL #8 or 16
   7771 
   7772    neg   d
   7773    neg   2d,4s,2s,8h,4h,16b,8b
   7774 
   7775    not   16b,8b
   7776 
   7777    orn   16b,8b
   7778 
   7779    orr   8h,4h   #imm8, LSL #0 or 8
   7780    orr   4s,2s   #imm8, LSL #0, 8, 16 or 24
   7781 
   7782    orr   16b,8b
   7783 
   7784    pmul  16b,8b
   7785 
   7786    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   7787 
   7788    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7789 
   7790    rbit    16b,8b
   7791    rev16   16b,8b
   7792    rev32   16b,8b,8h,4h
   7793    rev64   16b,8b,8h,4h,4s,2s
   7794 
   7795    rshrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7796 
   7797    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7798 
   7799    saba      16b,8b,8h,4h,4s,2s
   7800    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7801 
   7802    sabd      16b,8b,8h,4h,4s,2s
   7803    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7804 
   7805    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7806 
   7807    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7808 
   7809    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7810 
   7811    saddlv    h_16b/8b, s_8h/4h, d_4s
   7812 
   7813    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7814 
   7815    scvtf     d,s        _#fbits
   7816    scvtf     2d,4s,2s   _#fbits
   7817 
   7818    scvtf     d,s
   7819    scvtf     2d,4s,2s
   7820 
   7821    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   7822    scvtf     s_w, d_w, s_x, d_x
   7823 
   7824    sha1c       q_s_4s
   7825    sha1h       s_s
   7826    sha1m       q_s_4s
   7827    sha1p       q_s_4s
   7828    sha1su0     4s_4s_4s
   7829    sha1su1     4s_4s
   7830    sha256h2    q_q_4s
   7831    sha256h     q_q_4s
   7832    sha256su0   4s_4s
   7833    sha256su1   4s_4s_4s
   7834 
   7835    shadd       16b,8b,8h,4h,4s,2s
   7836 
   7837    shl         d_#imm
   7838    shl         16b,8b,8h,4h,4s,2s,2d  _#imm
   7839 
   7840    shll{2}   8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   7841 
   7842    shrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7843 
   7844    shsub       16b,8b,8h,4h,4s,2s
   7845 
   7846    sli         d_#imm
   7847    sli         2d,4s,2s,8h,4h,16b,8b  _#imm
   7848 
   7849    smax        4s,2s,8h,4h,16b,8b
   7850 
   7851    smaxp       4s,2s,8h,4h,16b,8b
   7852 
   7853    smaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   7854 
   7855    smin        4s,2s,8h,4h,16b,8b
   7856 
   7857    sminp       4s,2s,8h,4h,16b,8b
   7858 
   7859    sminv       s_4s,h_8h,h_4h,b_16b,b_8b
   7860 
   7861    smlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7862    smlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7863 
   7864    smlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7865    smlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7866 
   7867    smov        w_b[], w_h[], x_b[], x_h[], x_s[]
   7868 
   7869    smull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   7870    smull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7871 
   7872    sqabs       d,s,h,b
   7873    sqabs       2d,4s,2s,8h,4h,16b,8b
   7874 
   7875    sqadd       d,s,h,b
   7876    sqadd       2d,4s,2s,8h,4h,16b,8b
   7877 
   7878    sqdmlal     d_s_s[], s_h_h[]
   7879    sqdmlal{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7880 
   7881    sqdmlal     d_s_s, s_h_h
   7882    sqdmlal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7883 
   7884    sqdmlsl     d_s_s[], s_h_h[]
   7885    sqdmlsl{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7886 
   7887    sqdmlsl     d_s_s, s_h_h
   7888    sqdmlsl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7889 
   7890    sqdmulh     s_s_s[], h_h_h[]
   7891    sqdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7892 
   7893    sqdmulh     h,s
   7894    sqdmulh     4s,2s,8h,4h
   7895 
   7896    sqdmull     d_s_s[], s_h_h[]
   7897    sqdmull{2}  2d_2s/4s_s[], 4s_4h/2h_h[]
   7898 
   7899    sqdmull     d_s_s,s_h_h
   7900    sqdmull{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7901 
   7902    sqneg       d,s,h,b
   7903    sqneg       2d,4s,2s,8h,4h,16b,8b
   7904 
   7905    sqrdmulh    s_s_s[], h_h_h[]
   7906    sqrdmulh    4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7907 
   7908    sqrdmulh    h,s
   7909    sqrdmulh    4s,2s,8h,4h
   7910 
   7911    sqrshl      d,s,h,b
   7912    sqrshl      2d,4s,2s,8h,4h,16b,8b
   7913 
   7914    sqrshrn     s_d, h_s, b_h   #imm
   7915    sqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7916 
   7917    sqrshrun     s_d, h_s, b_h   #imm
   7918    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7919 
   7920    sqshl        d,s,h,b   _#imm
   7921    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   7922 
   7923    sqshl        d,s,h,b
   7924    sqshl        2d,4s,2s,8h,4h,16b,8b
   7925 
   7926    sqshlu       d,s,h,b  _#imm
   7927    sqshlu       2d,4s,2s,8h,4h,16b,8b  _#imm
   7928 
   7929    sqshrn       s_d, h_s, b_h   #imm
   7930    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7931 
   7932    sqshrun      s_d, h_s, b_h   #imm
   7933    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7934 
   7935    sqsub       d,s,h,b
   7936    sqsub       2d,4s,2s,8h,4h,16b,8b
   7937 
   7938    sqxtn       s_d,h_s,b_h
   7939    sqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7940 
   7941    sqxtun      s_d,h_s,b_h
   7942    sqxtun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7943 
   7944    srhadd      4s,2s,8h,4h,16b,8b
   7945 
   7946    sri         d_#imm
   7947    sri         2d,4s,2s,8h,4h,16b,8b  _#imm
   7948 
   7949    srshl (reg) d
   7950    srshl       2d,4s,2s,8h,4h,16b,8b
   7951 
   7952    srshr (imm) d
   7953    srshr       2d,4s,2s,8h,4h,16b,8b
   7954 
   7955    srsra (imm) d
   7956    srsra       2d,4s,2s,8h,4h,16b,8b
   7957 
   7958    sshl (reg)  d
   7959    sshl        2d,4s,2s,8h,4h,16b,8b
   7960 
   7961    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   7962 
   7963    sshr (imm)  d
   7964    sshr        2d,4s,2s,8h,4h,16b,8b
   7965 
   7966    ssra (imm)  d
   7967    ssra        2d,4s,2s,8h,4h,16b,8b
   7968 
   7969    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7970 
   7971    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7972 
   7973    st1 (multiple 1-element structures from 1/2/3/4 regs)
   7974    st1 (single 1-element structure for 1 lane of 1 reg)
   7975 
   7976    st2 (multiple 2-element structures from 2 regs)
   7977    st2 (single 2-element structure from 1 lane of 2 regs)
   7978 
   7979    st3 (multiple 3-element structures from 3 regs)
   7980    st3 (single 3-element structure from 1 lane of 3 regs)
   7981 
   7982    st4 (multiple 4-element structures from 4 regs)
   7983    st4 (single 4-element structure from one lane of 4 regs)
   7984 
   7985    stnp q_q_addr, d_d_addr, s_s_addr
   7986         addr = [Xn|SP, #imm]
   7987 
   7988    stp  q_q_addr, d_d_addr, s_s_addr
   7989         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   7990 
   7991    str  q,d,s,h,b_addr
   7992         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   7993 
   7994    str   q,d,s,h,b_addr
   7995          addr = [Xn|SP, R <extend> <shift]
   7996 
   7997    stur  q,d,s,h,b_addr
   7998          addr = [Xn|SP,#imm] (unscaled offset)
   7999 
   8000    sub   d
   8001    sub   2d,4s,2s,8h,4h,16b,8b
   8002 
   8003    subhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8004 
   8005    suqadd  d,s,h,b
   8006    suqadd  2d,4s,2s,8h,4h,16b,8b
   8007 
   8008    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8009    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8010    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8011    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8012 
   8013    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8014    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8015    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8016    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8017 
   8018    trn1    2d,4s,2s,8h,4h,16b,8b
   8019    trn2    2d,4s,2s,8h,4h,16b,8b
   8020 
   8021    uaba      16b,8b,8h,4h,4s,2s
   8022    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8023 
   8024    uabd      16b,8b,8h,4h,4s,2s
   8025    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8026 
   8027    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8028 
   8029    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8030 
   8031    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8032 
   8033    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8034 
   8035    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8036 
   8037    ucvtf     d,s        _#fbits
   8038    ucvtf     2d,4s,2s   _#fbits
   8039 
   8040    ucvtf     d,s
   8041    ucvtf     2d,4s,2s
   8042 
   8043    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8044    ucvtf     s_w, d_w, s_x, d_x
   8045 
   8046    uhadd       16b,8b,8h,4h,4s,2s
   8047 
   8048    uhsub       16b,8b,8h,4h,4s,2s
   8049 
   8050    umax        4s,2s,8h,4h,16b,8b
   8051 
   8052    umaxp       4s,2s,8h,4h,16b,8b
   8053 
   8054    umaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   8055 
   8056    umin        4s,2s,8h,4h,16b,8b
   8057 
   8058    uminp       4s,2s,8h,4h,16b,8b
   8059 
   8060    uminv       s_4s,h_8h,h_4h,b_16b,b_8b
   8061 
   8062    umlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8063    umlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8064 
   8065    umlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8066    umlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8067 
   8068    umov        w_b[], w_h[], x_b[], x_h[], x_s[]
   8069 
   8070    umull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   8071    umull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8072 
   8073    uqadd       d,s,h,b
   8074    uqadd       2d,4s,2s,8h,4h,16b,8b
   8075 
   8076    uqrshl      d,s,h,b
   8077    uqrshl      2d,4s,2s,8h,4h,16b,8b
   8078 
   8079    uqrshrn     s_d, h_s, b_h   #imm
   8080    uqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8081 
   8082    uqshl        d,s,h,b   _#imm
   8083    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8084 
   8085    uqshl        d,s,h,b
   8086    uqshl        2d,4s,2s,8h,4h,16b,8b
   8087 
   8088    uqshrn       s_d, h_s, b_h   #imm
   8089    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8090 
   8091    uqsub       d,s,h,b
   8092    uqsub       2d,4s,2s,8h,4h,16b,8b
   8093 
   8094    uqxtn       s_d,h_s,b_h
   8095    uqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8096 
   8097    urecpe      4s,2s
   8098 
   8099    urhadd      4s,2s,8h,4h,16b,8b
   8100 
   8101    urshl (reg) d
   8102    urshl       2d,4s,2s,8h,4h,16b,8b
   8103 
   8104    urshr (imm) d
   8105    urshr       2d,4s,2s,8h,4h,16b,8b
   8106 
   8107    ursqrte     4s,2s
   8108 
   8109    ursra (imm) d
   8110    ursra       2d,4s,2s,8h,4h,16b,8b
   8111 
   8112    ushl (reg)  d
   8113    ushl        2d,4s,2s,8h,4h,16b,8b
   8114 
   8115    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8116 
   8117    ushr (imm)  d
   8118    ushr        2d,4s,2s,8h,4h,16b,8b
   8119 
   8120    usqadd      d,s,h,b
   8121    usqadd      2d,4s,2s,8h,4h,16b,8b
   8122 
   8123    usra (imm)  d
   8124    usra        2d,4s,2s,8h,4h,16b,8b
   8125 
   8126    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8127 
   8128    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8129 
   8130    uzp1      2d,4s,2s,8h,4h,16b,8b
   8131    uzp2      2d,4s,2s,8h,4h,16b,8b
   8132 
   8133    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8134 
   8135    zip1      2d,4s,2s,8h,4h,16b,8b
   8136    zip2      2d,4s,2s,8h,4h,16b,8b
   8137 */
   8138 
   8139 
   8140 /* ---------------------------------------------------------------- */
   8141 /* -- List of insns, grouped somewhat by laneage configuration   -- */
   8142 /* ---------------------------------------------------------------- */
   8143 /*
   8144    ======================== FP ========================
   8145 
   8146    fabs      d,s
   8147    fabs      2d,4s,2s
   8148 
   8149    fneg      d,s
   8150    fneg      2d,4s,2s
   8151 
   8152    fsqrt     d,s
   8153    fsqrt     2d,4s,2s
   8154 
   8155    fadd      d,s
   8156    fsub      d,s
   8157 
   8158    fadd      2d,4s,2s
   8159    fsub      2d,4s,2s
   8160 
   8161    fabd      d,s
   8162    fabd      2d,4s,2s
   8163 
   8164    faddp     d,s (floating add pair)
   8165    faddp     2d,4s,2s
   8166 
   8167    fccmp     d,s (floating point conditional quiet compare)
   8168    fccmpe    d,s (floating point conditional signaling compare)
   8169 
   8170    fcmeq     d,s
   8171    fcmge     d,s
   8172    fcmgt     d,s
   8173    facgt     d,s  (floating abs compare GE)
   8174    facge     d,s  (floating abs compare GE)
   8175 
   8176    fcmeq     2d,4s,2s
   8177    fcmge     2d,4s,2s
   8178    fcmgt     2d,4s,2s
   8179    facge     2d,4s,2s
   8180    facgt     2d,4s,2s
   8181 
   8182    fcmeq_z   d,s
   8183    fcmge_z   d,s
   8184    fcmgt_z   d,s
   8185    fcmle_z   d,s
   8186    fcmlt_z   d,s
   8187 
   8188    fcmeq_z   2d,4s,2s
   8189    fcmge_z   2d,4s,2s
   8190    fcmgt_z   2d,4s,2s
   8191    fcmle_z   2d,4s,2s
   8192    fcmlt_z   2d,4s,2s
   8193 
   8194    fcmp_z    d,s
   8195    fcmpe_z   d,s
   8196    fcmp      d,s (floating point quiet, set flags)
   8197    fcmpe     d,s (floating point signaling, set flags)
   8198 
   8199    fcsel     d,s (fp cond select)
   8200 
   8201    fdiv      d,s
   8202    fdiv      2d,4s,2s
   8203 
   8204    fmadd     d,s
   8205    fnmadd    d,s
   8206    fmsub     d,s
   8207    fnmsub    d,s
   8208 
   8209    fnmul     d,s
   8210 
   8211    fmax      d,s
   8212    fmin      d,s
   8213    fmaxnm    d,s ("max number")
   8214    fminnm    d,s
   8215 
   8216    fmax      2d,4s,2s
   8217    fmin      2d,4s,2s
   8218    fmaxnm    2d,4s,2s
   8219    fminnm    2d,4s,2s
   8220 
   8221    fmaxnmp   d_2d,s_2s ("max number pairwise")
   8222    fminnmp   d_2d,s_2s
   8223 
   8224    fmaxnmp   2d,4s,2s
   8225    fminnmp   2d,4s,2s
   8226 
   8227    fmaxnmv   s_4s (maxnum across vector)
   8228    fminnmv   s_4s
   8229 
   8230    fmaxp     d_2d,s_2s (max of a pair)
   8231    fminp     d_2d,s_2s (max of a pair)
   8232 
   8233    fmaxp     2d,4s,2s  (max pairwise)
   8234    fminp     2d,4s,2s
   8235 
   8236    fmaxv     s_4s (max across vector)
   8237    fminv     s_4s
   8238 
   8239    fmla      2d,4s,2s
   8240    fmls      2d,4s,2s
   8241 
   8242    fmla      d_d_d[],s_s_s[] (by element)
   8243    fmls      d_d_d[],s_s_s[] (by element)
   8244 
   8245    fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8246    fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8247 
   8248    fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   8249 
   8250    fmov      d_d,s_s
   8251 
   8252    fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   8253 
   8254    fmov      d,s #imm
   8255 
   8256    fmul      d_d_d[],s_s_s[]
   8257    fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8258 
   8259    fmul      2d,4s,2s
   8260    fmul      d,s
   8261 
   8262    fmulx     d_d_d[],s_s_s[]
   8263    fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8264 
   8265    fmulx     d,s
   8266    fmulx     2d,4s,2s
   8267 
   8268    frecpe    d,s (recip estimate)
   8269    frecpe    2d,4s,2s
   8270 
   8271    frecps    d,s (recip step)
   8272    frecps    2d,4s,2s
   8273 
   8274    frecpx    d,s (recip exponent)
   8275 
   8276    frinta    d,s
   8277    frinti    d,s
   8278    frintm    d,s
   8279    frintn    d,s
   8280    frintp    d,s
   8281    frintx    d,s
   8282    frintz    d,s
   8283 
   8284    frinta    2d,4s,2s (round to integral, nearest away)
   8285    frinti    2d,4s,2s (round to integral, per FPCR)
   8286    frintm    2d,4s,2s (round to integral, minus inf)
   8287    frintn    2d,4s,2s (round to integral, nearest, to even)
   8288    frintp    2d,4s,2s (round to integral, plus inf)
   8289    frintx    2d,4s,2s (round to integral exact, per FPCR)
   8290    frintz    2d,4s,2s (round to integral, zero)
   8291 
   8292    frsqrte   d,s (est)
   8293    frsqrte   2d,4s,2s
   8294 
   8295    frsqrts   d,s (step)
   8296    frsqrts   2d,4s,2s
   8297 
   8298    ======================== CONV ========================
   8299 
   8300    fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   8301 
   8302    fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   8303 
   8304    fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   8305 
   8306    fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   8307    fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   8308    fcvtas    2d,4s,2s
   8309    fcvtau    2d,4s,2s
   8310    fcvtas    w_s,x_s,w_d,x_d
   8311    fcvtau    w_s,x_s,w_d,x_d
   8312 
   8313    fcvtms    d,s  (fcvt to signed int,   minus inf)
   8314    fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   8315    fcvtms    2d,4s,2s
   8316    fcvtmu    2d,4s,2s
   8317    fcvtms    w_s,x_s,w_d,x_d
   8318    fcvtmu    w_s,x_s,w_d,x_d
   8319 
   8320    fcvtns    d,s  (fcvt to signed int,   nearest)
   8321    fcvtnu    d,s  (fcvt to unsigned int, nearest)
   8322    fcvtns    2d,4s,2s
   8323    fcvtnu    2d,4s,2s
   8324    fcvtns    w_s,x_s,w_d,x_d
   8325    fcvtnu    w_s,x_s,w_d,x_d
   8326 
   8327    fcvtps    d,s  (fcvt to signed int,   plus inf)
   8328    fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   8329    fcvtps    2d,4s,2s
   8330    fcvtpu    2d,4s,2s
   8331    fcvtps    w_s,x_s,w_d,x_d
   8332    fcvtpu    w_s,x_s,w_d,x_d
   8333 
   8334    fcvtzs    d,s (fcvt to signed integer,   to zero)
   8335    fcvtzu    d,s (fcvt to unsigned integer, to zero)
   8336    fcvtzs    2d,4s,2s
   8337    fcvtzu    2d,4s,2s
   8338    fcvtzs    w_s,x_s,w_d,x_d
   8339    fcvtzu    w_s,x_s,w_d,x_d
   8340 
   8341    fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8342    fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8343    fcvtzs    2d,4s,2s
   8344    fcvtzu    2d,4s,2s
   8345    fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8346    fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8347 
   8348    fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   8349    fcvtxn    2s_2d,4s_2d
   8350 
   8351    scvtf     d,s        _#fbits
   8352    ucvtf     d,s        _#fbits
   8353 
   8354    scvtf     2d,4s,2s   _#fbits
   8355    ucvtf     2d,4s,2s   _#fbits
   8356 
   8357    scvtf     d,s
   8358    ucvtf     d,s
   8359 
   8360    scvtf     2d,4s,2s
   8361    ucvtf     2d,4s,2s
   8362 
   8363    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   8364    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8365 
   8366    scvtf     s_w, d_w, s_x, d_x
   8367    ucvtf     s_w, d_w, s_x, d_x
   8368 
   8369    ======================== INT ========================
   8370 
   8371    abs       d
   8372    neg       d
   8373 
   8374    abs       2d,4s,2s,8h,4h,16b,8b
   8375    neg       2d,4s,2s,8h,4h,16b,8b
   8376 
   8377    add       d
   8378    sub       d
   8379 
   8380    add       2d,4s,2s,8h,4h,16b,8b
   8381    sub       2d,4s,2s,8h,4h,16b,8b
   8382 
   8383    addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8384    subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8385    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8386    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8387 
   8388    addp     d (add pairs, across)
   8389    addp     2d,4s,2s,8h,4h,16b,8b
   8390    addv     4s,8h,4h,16b,18b (reduce across vector)
   8391 
   8392    and      16b,8b
   8393 
   8394    orr      8h,4h   #imm8, LSL #0 or 8
   8395    orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8396    bic      8h,4h   #imm8, LSL #0 or 8
   8397    bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8398    also movi, mvni
   8399 
   8400    bic      16b,8b (vector,reg) (bit clear)
   8401    bif      16b,8b (vector) (bit insert if false)
   8402    bit      16b,8b (vector) (bit insert if true)
   8403    bsl      16b,8b (vector) (bit select)
   8404 
   8405    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   8406    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   8407 
   8408    cmeq     d
   8409    cmge     d
   8410    cmgt     d
   8411    cmhi     d
   8412    cmhs     d
   8413    cmtst    d
   8414 
   8415    cmeq     2d,4s,2s,8h,4h,16b,8b
   8416    cmge     2d,4s,2s,8h,4h,16b,8b
   8417    cmgt     2d,4s,2s,8h,4h,16b,8b
   8418    cmhi     2d,4s,2s,8h,4h,16b,8b
   8419    cmhs     2d,4s,2s,8h,4h,16b,8b
   8420    cmtst    2d,4s,2s,8h,4h,16b,8b
   8421 
   8422    cmeq_z   d
   8423    cmge_z   d
   8424    cmgt_z   d
   8425    cmle_z   d
   8426    cmlt_z   d
   8427 
   8428    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   8429    cmge_z   2d,4s,2s,8h,4h,16b,8b
   8430    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   8431    cmle_z   2d,4s,2s,8h,4h,16b,8b
   8432    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   8433 
   8434    cnt      16b,8b (population count per byte)
   8435 
   8436    dup      d,s,h,b (vec elem to scalar)
   8437    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   8438    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   8439 
   8440    eor      16b,8b (vector)
   8441    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   8442 
   8443    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   8444 
   8445    ins      d[]_x, s[]_w, h[]_w, b[]_w
   8446 
   8447    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8448    mla   4s,2s,8h,4h,16b,8b
   8449 
   8450    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8451    mls   4s,2s,8h,4h,16b,8b
   8452 
   8453    movi  16b,8b   #imm8, LSL #0
   8454    movi  8h,4h    #imm8, LSL #0 or 8
   8455    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   8456    movi  4s,2s    #imm8, MSL #8 or 16
   8457    movi  d,       #imm64
   8458    movi  2d,      #imm64
   8459 
   8460    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8461    mul   4s,2s,8h,4h,16b,8b
   8462 
   8463    mvni  8h,4h    #imm8, LSL #0 or 8
   8464    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   8465    mvni  4s,2s    #imm8, MSL #8 or 16
   8466 
   8467    not   16b,8b
   8468 
   8469    orn   16b,8b
   8470    orr   16b,8b
   8471 
   8472    pmul  16b,8b
   8473 
   8474    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   8475 
   8476    rbit    16b,8b
   8477    rev16   16b,8b
   8478    rev32   16b,8b,8h,4h
   8479    rev64   16b,8b,8h,4h,4s,2s
   8480 
   8481    saba      16b,8b,8h,4h,4s,2s
   8482    uaba      16b,8b,8h,4h,4s,2s
   8483 
   8484    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8485    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8486 
   8487    sabd      16b,8b,8h,4h,4s,2s
   8488    uabd      16b,8b,8h,4h,4s,2s
   8489 
   8490    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8491    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8492 
   8493    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8494    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8495 
   8496    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8497    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8498    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8499    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8500 
   8501    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8502    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8503 
   8504    saddlv    h_16b/8b, s_8h/4h, d_4s
   8505    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8506 
   8507    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8508    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8509    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8510    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8511 
   8512    shadd        16b,8b,8h,4h,4s,2s
   8513    uhadd        16b,8b,8h,4h,4s,2s
   8514    shsub        16b,8b,8h,4h,4s,2s
   8515    uhsub        16b,8b,8h,4h,4s,2s
   8516 
   8517    shl          d_#imm
   8518    shl          16b,8b,8h,4h,4s,2s,2d  _#imm
   8519 
   8520    shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   8521 
   8522    shrn{2}      2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8523    rshrn{2}     2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8524 
   8525    sli          d_#imm
   8526    sri          d_#imm
   8527 
   8528    sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   8529    sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   8530 
   8531    smax         4s,2s,8h,4h,16b,8b
   8532    umax         4s,2s,8h,4h,16b,8b
   8533    smin         4s,2s,8h,4h,16b,8b
   8534    umin         4s,2s,8h,4h,16b,8b
   8535 
   8536    smaxp        4s,2s,8h,4h,16b,8b
   8537    umaxp        4s,2s,8h,4h,16b,8b
   8538    sminp        4s,2s,8h,4h,16b,8b
   8539    uminp        4s,2s,8h,4h,16b,8b
   8540 
   8541    smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8542    umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8543    sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8544    uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8545 
   8546    smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8547    umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8548    smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8549    umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8550    smull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8551    umull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8552 
   8553    smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8554    umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8555    smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8556    umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8557    smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8558    umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8559 
   8560    smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8561    umov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8562 
   8563    sqabs        d,s,h,b
   8564    sqneg        d,s,h,b
   8565 
   8566    sqabs        2d,4s,2s,8h,4h,16b,8b
   8567    sqneg        2d,4s,2s,8h,4h,16b,8b
   8568 
   8569    sqadd        d,s,h,b
   8570    uqadd        d,s,h,b
   8571    sqsub        d,s,h,b
   8572    uqsub        d,s,h,b
   8573 
   8574    sqadd        2d,4s,2s,8h,4h,16b,8b
   8575    uqadd        2d,4s,2s,8h,4h,16b,8b
   8576    sqsub        2d,4s,2s,8h,4h,16b,8b
   8577    uqsub        2d,4s,2s,8h,4h,16b,8b
   8578 
   8579    sqdmlal      d_s_s[], s_h_h[]
   8580    sqdmlsl      d_s_s[], s_h_h[]
   8581    sqdmull      d_s_s[], s_h_h[]
   8582 
   8583    sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8584    sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8585    sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   8586 
   8587    sqdmlal      d_s_s, s_h_h
   8588    sqdmlsl      d_s_s, s_h_h
   8589    sqdmull      d_s_s, s_h_h
   8590 
   8591    sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8592    sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8593    sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8594 
   8595    sqdmulh      s_s_s[], h_h_h[]
   8596    sqrdmulh     s_s_s[], h_h_h[]
   8597 
   8598    sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8599    sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8600 
   8601    sqdmulh      h,s
   8602    sqrdmulh     h,s
   8603 
   8604    sqdmulh      4s,2s,8h,4h
   8605    sqrdmulh     4s,2s,8h,4h
   8606 
   8607    sqshl        d,s,h,b
   8608    uqshl        d,s,h,b
   8609    sqrshl       d,s,h,b
   8610    uqrshl       d,s,h,b
   8611 
   8612    sqshl        2d,4s,2s,8h,4h,16b,8b
   8613    uqshl        2d,4s,2s,8h,4h,16b,8b
   8614    sqrshl       2d,4s,2s,8h,4h,16b,8b
   8615    uqrshl       2d,4s,2s,8h,4h,16b,8b
   8616 
   8617    sqrshrn      s_d, h_s, b_h   #imm
   8618    uqrshrn      s_d, h_s, b_h   #imm
   8619    sqshrn       s_d, h_s, b_h   #imm
   8620    uqshrn       s_d, h_s, b_h   #imm
   8621 
   8622    sqrshrun     s_d, h_s, b_h   #imm
   8623    sqshrun      s_d, h_s, b_h   #imm
   8624 
   8625    sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8626    uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8627    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8628    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8629 
   8630    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8631    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8632 
   8633    sqshl        d,s,h,b   _#imm
   8634    uqshl        d,s,h,b   _#imm
   8635    sqshlu       d,s,h,b   _#imm
   8636 
   8637    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8638    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8639    sqshlu       2d,4s,2s,8h,4h,16b,8b   _#imm
   8640 
   8641    sqxtn        s_d,h_s,b_h
   8642    uqxtn        s_d,h_s,b_h
   8643    sqxtun       s_d,h_s,b_h
   8644 
   8645    sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8646    uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8647    sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8648 
   8649    srhadd       4s,2s,8h,4h,16b,8b
   8650    urhadd       4s,2s,8h,4h,16b,8b
   8651 
   8652    sshl (reg)   d
   8653    ushl (reg)   d
   8654    sshr (imm)   d
   8655    ushr (imm)   d
   8656    ssra (imm)   d
   8657    usra (imm)   d
   8658 
   8659    srshl (reg)  d
   8660    urshl (reg)  d
   8661    srshr (imm)  d
   8662    urshr (imm)  d
   8663    srsra (imm)  d
   8664    ursra (imm)  d
   8665 
   8666    sshl         2d,4s,2s,8h,4h,16b,8b
   8667    ushl         2d,4s,2s,8h,4h,16b,8b
   8668    sshr         2d,4s,2s,8h,4h,16b,8b
   8669    ushr         2d,4s,2s,8h,4h,16b,8b
   8670    ssra         2d,4s,2s,8h,4h,16b,8b
   8671    usra         2d,4s,2s,8h,4h,16b,8b
   8672 
   8673    srshl        2d,4s,2s,8h,4h,16b,8b
   8674    urshl        2d,4s,2s,8h,4h,16b,8b
   8675    srshr        2d,4s,2s,8h,4h,16b,8b
   8676    urshr        2d,4s,2s,8h,4h,16b,8b
   8677    srsra        2d,4s,2s,8h,4h,16b,8b
   8678    ursra        2d,4s,2s,8h,4h,16b,8b
   8679 
   8680    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8681    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8682 
   8683    suqadd  d,s,h,b
   8684    suqadd  2d,4s,2s,8h,4h,16b,8b
   8685 
   8686    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8687    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8688    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8689    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8690 
   8691    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8692    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8693    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8694    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8695 
   8696    trn1    2d,4s,2s,8h,4h,16b,8b
   8697    trn2    2d,4s,2s,8h,4h,16b,8b
   8698 
   8699    urecpe      4s,2s
   8700 
   8701    ursqrte     4s,2s
   8702 
   8703    usqadd      d,s,h,b
   8704    usqadd      2d,4s,2s,8h,4h,16b,8b
   8705 
   8706    uzp1      2d,4s,2s,8h,4h,16b,8b
   8707    uzp2      2d,4s,2s,8h,4h,16b,8b
   8708 
   8709    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8710 
   8711    zip1      2d,4s,2s,8h,4h,16b,8b
   8712    zip2      2d,4s,2s,8h,4h,16b,8b
   8713 
   8714    ======================== MEM ========================
   8715 
   8716    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   8717    ld1  (single 1-element structure to one lane of 1 reg)
   8718    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   8719 
   8720    ld2  (multiple 2-element structures to 2 regs)
   8721    ld2  (single 2-element structure to one lane of 2 regs)
   8722    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   8723 
   8724    ld3  (multiple 3-element structures to 3 regs)
   8725    ld3  (single 3-element structure to one lane of 3 regs)
   8726    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   8727 
   8728    ld4  (multiple 4-element structures to 4 regs)
   8729    ld4  (single 4-element structure to one lane of 4 regs)
   8730    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   8731 
   8732    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   8733          addr = reg + uimm7 * reg_size
   8734 
   8735    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   8736          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8737 
   8738    ldr   q,d,s,h,b from addr
   8739          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8740 
   8741    ldr   q,d,s from  pc+#imm19
   8742 
   8743    ldr   q,d,s,h,b from addr
   8744          addr = [Xn|SP, R <extend> <shift]
   8745 
   8746    ldur  q,d,s,h,b from addr
   8747          addr = [Xn|SP,#imm] (unscaled offset)
   8748 
   8749    st1 (multiple 1-element structures from 1/2/3/4 regs)
   8750    st1 (single 1-element structure for 1 lane of 1 reg)
   8751 
   8752    st2 (multiple 2-element structures from 2 regs)
   8753    st2 (single 2-element structure from 1 lane of 2 regs)
   8754 
   8755    st3 (multiple 3-element structures from 3 regs)
   8756    st3 (single 3-element structure from 1 lane of 3 regs)
   8757 
   8758    st4 (multiple 4-element structures from 4 regs)
   8759    st4 (single 4-element structure from one lane of 4 regs)
   8760 
   8761    stnp q_q_addr, d_d_addr, s_s_addr
   8762         addr = [Xn|SP, #imm]
   8763 
   8764    stp  q_q_addr, d_d_addr, s_s_addr
   8765         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   8766 
   8767    str  q,d,s,h,b_addr
   8768         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   8769 
   8770    str   q,d,s,h,b_addr
   8771          addr = [Xn|SP, R <extend> <shift]
   8772 
   8773    stur  q,d,s,h,b_addr
   8774          addr = [Xn|SP,#imm] (unscaled offset)
   8775 
   8776    ======================== CRYPTO ========================
   8777 
   8778    aesd       16b (aes single round decryption)
   8779    aese       16b (aes single round encryption)
   8780    aesimc     16b (aes inverse mix columns)
   8781    aesmc      16b (aes mix columns)
   8782 
   8783    sha1c      q_s_4s
   8784    sha1h      s_s
   8785    sha1m      q_s_4s
   8786    sha1p      q_s_4s
   8787    sha1su0    4s_4s_4s
   8788    sha1su1    4s_4s
   8789 
   8790    sha256h2   q_q_4s
   8791    sha256h    q_q_4s
   8792    sha256su0  4s_4s
   8793    sha256su1  4s_4s_4s
   8794 */
   8795