Home | History | Annotate | Download | only in arm64
      1 
      2 #include <stdio.h>
      3 #include <assert.h>
      4 #include <malloc.h>  // memalign
      5 #include <string.h>  // memset
      6 #include "tests/malloc.h"
      7 #include <math.h>    // isnormal
      8 
      9 typedef  unsigned char           UChar;
     10 typedef  unsigned short int      UShort;
     11 typedef  unsigned int            UInt;
     12 typedef  signed int              Int;
     13 typedef  unsigned char           UChar;
     14 typedef  unsigned long long int  ULong;
     15 typedef  signed long long int    Long;
     16 typedef  double                  Double;
     17 typedef  float                   Float;
     18 
     19 typedef  unsigned char           Bool;
     20 #define False ((Bool)0)
     21 #define True  ((Bool)1)
     22 
     23 
     24 #define ITERS 1
     25 
     26 typedef
     27   enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE }
     28   LaneTy;
     29 
     30 union _V128 {
     31    UChar  u8[16];
     32    UShort u16[8];
     33    UInt   u32[4];
     34    ULong  u64[2];
     35    Float  f32[4];
     36    Double f64[2];
     37 };
     38 typedef  union _V128   V128;
     39 
     40 static inline UChar randUChar ( void )
     41 {
     42    static UInt seed = 80021;
     43    seed = 1103515245 * seed + 12345;
     44    return (seed >> 17) & 0xFF;
     45 }
     46 
     47 static ULong randULong ( LaneTy ty )
     48 {
     49    Int i;
     50    ULong r = 0;
     51    for (i = 0; i < 8; i++) {
     52       r = (r << 8) | (ULong)(0xFF & randUChar());
     53    }
     54    return r;
     55 }
     56 
     57 /* Generates a random V128.  Ensures that that it contains normalised
     58    FP numbers when viewed as either F32x4 or F64x2, so that it is
     59    reasonable to use in FP test cases. */
     60 static void randV128 ( /*OUT*/V128* v, LaneTy ty )
     61 {
     62    static UInt nCalls = 0, nIters = 0;
     63    Int i;
     64    nCalls++;
     65    while (1) {
     66       nIters++;
     67       for (i = 0; i < 16; i++) {
     68          v->u8[i] = randUChar();
     69       }
     70       if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2])
     71           && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1]))
     72         break;
     73    }
     74    if (0 == (nCalls & 0xFF))
     75       printf("randV128: %u calls, %u iters\n", nCalls, nIters);
     76 }
     77 
     78 static void showV128 ( V128* v )
     79 {
     80    Int i;
     81    for (i = 15; i >= 0; i--)
     82       printf("%02x", (Int)v->u8[i]);
     83 }
     84 
     85 static void showBlock ( const char* msg, V128* block, Int nBlock )
     86 {
     87    Int i;
     88    printf("%s\n", msg);
     89    for (i = 0; i < nBlock; i++) {
     90       printf("  ");
     91       showV128(&block[i]);
     92       printf("\n");
     93    }
     94 }
     95 
     96 static ULong dup4x16 ( UInt x )
     97 {
     98    ULong r = x & 0xF;
     99    r |= (r << 4);
    100    r |= (r << 8);
    101    r |= (r << 16);
    102    r |= (r << 32);
    103    return r;
    104 }
    105 
    106 // Generate a random double-precision number.  About 1 time in 2,
    107 // instead return a special value (+/- Inf, +/-Nan, denorm).
    108 // This ensures that many of the groups of 4 calls here will
    109 // return a special value.
    110 
    111 static Double special_values[10];
    112 static Bool   special_values_initted = False;
    113 
    114 static __attribute__((noinline))
    115 Double negate ( Double d ) { return -d; }
    116 static __attribute__((noinline))
    117 Double divf64 ( Double x, Double y ) { return x/y; }
    118 
    119 static __attribute__((noinline))
    120 Double plusZero  ( void ) { return 0.0; }
    121 static __attribute__((noinline))
    122 Double minusZero ( void ) { return negate(plusZero()); }
    123 
    124 static __attribute__((noinline))
    125 Double plusOne  ( void ) { return 1.0; }
    126 static __attribute__((noinline))
    127 Double minusOne ( void ) { return negate(plusOne()); }
    128 
    129 static __attribute__((noinline))
    130 Double plusInf   ( void ) { return 1.0 / 0.0; }
    131 static __attribute__((noinline))
    132 Double minusInf  ( void ) { return negate(plusInf()); }
    133 
    134 static __attribute__((noinline))
    135 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
    136 static __attribute__((noinline))
    137 Double minusNaN ( void ) { return negate(plusNaN()); }
    138 
    139 static __attribute__((noinline))
    140 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
    141 static __attribute__((noinline))
    142 Double minusDenorm ( void ) { return negate(plusDenorm()); }
    143 
    144 
    145 static void ensure_special_values_initted ( void )
    146 {
    147    if (special_values_initted) return;
    148    special_values[0] = plusZero();
    149    special_values[1] = minusZero();
    150    special_values[2] = plusOne();
    151    special_values[3] = minusOne();
    152    special_values[4] = plusInf();
    153    special_values[5] = minusInf();
    154    special_values[6] = plusNaN();
    155    special_values[7] = minusNaN();
    156    special_values[8] = plusDenorm();
    157    special_values[9] = minusDenorm();
    158    special_values_initted = True;
    159    int i;
    160    printf("\n");
    161    for (i = 0; i < 10; i++) {
    162       printf("special value %d = %e\n", i, special_values[i]);
    163    }
    164    printf("\n");
    165 }
    166 
    167 static Double randDouble ( void )
    168 {
    169    ensure_special_values_initted();
    170    UChar c = randUChar();
    171    if (c >= 128) {
    172       // return a normal number most of the time.
    173       // 0 .. 2^63-1
    174       ULong u64 = randULong(TyDF);
    175       // -2^62 .. 2^62-1
    176       Long s64 = (Long)u64;
    177       // -2^55 .. 2^55-1
    178       s64 >>= (62-55);
    179       // and now as a float
    180       return (Double)s64;
    181    }
    182    c = randUChar() % 10;
    183    return special_values[c];
    184 }
    185 
    186 static Float randFloat ( void )
    187 {
    188    ensure_special_values_initted();
    189    UChar c = randUChar();
    190    if (c >= 128) {
    191       // return a normal number most of the time.
    192       // 0 .. 2^63-1
    193       ULong u64 = randULong(TyDF);
    194       // -2^62 .. 2^62-1
    195       Long s64 = (Long)u64;
    196       // -2^25 .. 2^25-1
    197       s64 >>= (62-25);
    198       // and now as a float
    199       return (Float)s64;
    200    }
    201    c = randUChar() % 10;
    202    return special_values[c];
    203 }
    204 
    205 void randBlock_Doubles ( V128* block, Int nBlock )
    206 {
    207    Int i;
    208    for (i = 0; i < nBlock; i++) {
    209       block[i].f64[0] = randDouble();
    210       block[i].f64[1] = randDouble();
    211    }
    212 }
    213 
    214 void randBlock_Floats ( V128* block, Int nBlock )
    215 {
    216    Int i;
    217    for (i = 0; i < nBlock; i++) {
    218       block[i].f32[0] = randFloat();
    219       block[i].f32[1] = randFloat();
    220       block[i].f32[2] = randFloat();
    221       block[i].f32[3] = randFloat();
    222    }
    223 }
    224 
    225 
    226 /* ---------------------------------------------------------------- */
    227 /* -- Parameterisable test macros                                -- */
    228 /* ---------------------------------------------------------------- */
    229 
    230 #define DO50(_action) \
    231    do { \
    232       Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \
    233    } while (0)
    234 
    235 
    236 /* Note this also sets the destination register to a known value (0x55..55)
    237    since it can sometimes be an input to the instruction too. */
    238 #define GEN_UNARY_TEST(INSN,SUFFIXD,SUFFIXN) \
    239   __attribute__((noinline)) \
    240   static void test_##INSN##_##SUFFIXD##_##SUFFIXN ( LaneTy ty ) { \
    241      Int i; \
    242      for (i = 0; i < ITERS; i++) { \
    243         V128 block[2+1]; \
    244         memset(block, 0x55, sizeof(block)); \
    245         randV128(&block[0], ty); \
    246         randV128(&block[1], ty); \
    247         __asm__ __volatile__( \
    248            "mov   x30, #0 ; msr fpsr, x30 ; " \
    249            "ldr   q7, [%0, #0]   ; " \
    250            "ldr   q8, [%0, #16]   ; " \
    251            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN " ; " \
    252            "str   q8, [%0, #16] ; " \
    253            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    254            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    255         ); \
    256         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN); \
    257         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    258         showV128(&block[0]); printf("  "); \
    259         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    260      } \
    261   }
    262 
    263 
    264 /* Note this also sets the destination register to a known value (0x55..55)
    265    since it can sometimes be an input to the instruction too. */
    266 #define GEN_BINARY_TEST(INSN,SUFFIXD,SUFFIXN,SUFFIXM)  \
    267   __attribute__((noinline)) \
    268   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##SUFFIXM ( LaneTy ty ) { \
    269      Int i; \
    270      for (i = 0; i < ITERS; i++) { \
    271         V128 block[3+1]; \
    272         memset(block, 0x55, sizeof(block)); \
    273         randV128(&block[0], ty); \
    274         randV128(&block[1], ty); \
    275         randV128(&block[2], ty); \
    276         __asm__ __volatile__( \
    277            "mov   x30, #0 ; msr fpsr, x30 ; " \
    278            "ldr   q7, [%0, #0]   ; " \
    279            "ldr   q8, [%0, #16]   ; " \
    280            "ldr   q9, [%0, #32]   ; " \
    281            #INSN " v9." #SUFFIXD ", v7." #SUFFIXN ", v8." #SUFFIXM " ; " \
    282            "str   q9, [%0, #32] ; " \
    283            "mrs   x30, fpsr ; str x30, [%0, #48] " \
    284            : : "r"(&block[0]) : "memory", "v7", "v8", "v9", "x30" \
    285         ); \
    286         printf(#INSN   " v9." #SUFFIXD \
    287                ", v7." #SUFFIXN ", v8." #SUFFIXM "  ");   \
    288         UInt fpsr = 0xFFFFFF60 & block[3].u32[0]; \
    289         showV128(&block[0]); printf("  "); \
    290         showV128(&block[1]); printf("  "); \
    291         showV128(&block[2]); printf(" fpsr=%08x\n", fpsr); \
    292      } \
    293   }
    294 
    295 
    296 /* Note this also sets the destination register to a known value (0x55..55)
    297    since it can sometimes be an input to the instruction too. */
    298 #define GEN_SHIFT_TEST(INSN,SUFFIXD,SUFFIXN,AMOUNT) \
    299   __attribute__((noinline)) \
    300   static void test_##INSN##_##SUFFIXD##_##SUFFIXN##_##AMOUNT ( LaneTy ty ) { \
    301      Int i; \
    302      for (i = 0; i < ITERS; i++) { \
    303         V128 block[2+1]; \
    304         memset(block, 0x55, sizeof(block)); \
    305         randV128(&block[0], ty); \
    306         randV128(&block[1], ty); \
    307         __asm__ __volatile__( \
    308            "mov   x30, #0 ; msr fpsr, x30 ; " \
    309            "ldr   q7, [%0, #0]   ; " \
    310            "ldr   q8, [%0, #16]   ; " \
    311            #INSN " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT " ; " \
    312            "str   q8, [%0, #16] ; " \
    313            "mrs   x30, fpsr ; str x30, [%0, #32] " \
    314            : : "r"(&block[0]) : "memory", "v7", "v8", "x30" \
    315         ); \
    316         printf(#INSN   " v8." #SUFFIXD ", v7." #SUFFIXN ", #" #AMOUNT "  "); \
    317         UInt fpsr = 0xFFFFFF60 & block[2].u32[0]; \
    318         showV128(&block[0]); printf("  "); \
    319         showV128(&block[1]); printf(" fpsr=%08x\n", fpsr); \
    320      } \
    321   }
    322 
    323 
    324 /* Generate a test that involves one integer reg and one vector reg,
    325    with no bias as towards which is input or output. */
    326 #define GEN_ONEINT_ONEVEC_TEST(TESTNAME,INSN,INTREGNO,VECREGNO) \
    327   __attribute__((noinline)) \
    328   static void test_##TESTNAME ( LaneTy ty ) { \
    329      Int i; \
    330      assert(INTREGNO != 30); \
    331      for (i = 0; i < ITERS; i++) { \
    332         V128 block[4+1]; \
    333         memset(block, 0x55, sizeof(block)); \
    334         randV128(&block[0], ty); \
    335         randV128(&block[1], ty); \
    336         randV128(&block[2], ty); \
    337         randV128(&block[3], ty); \
    338         __asm__ __volatile__( \
    339            "mov   x30, #0 ; msr fpsr, x30 ; " \
    340            "ldr   q"#VECREGNO", [%0, #0]  ; " \
    341            "ldr   x"#INTREGNO", [%0, #16] ; " \
    342            INSN " ; " \
    343            "str   q"#VECREGNO", [%0, #32] ; " \
    344            "str   x"#INTREGNO", [%0, #48] ; " \
    345            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    346            : : "r"(&block[0]) : "memory", "v"#VECREGNO, "x"#INTREGNO, "x30" \
    347         ); \
    348         printf(INSN   "   "); \
    349         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    350         showV128(&block[0]); printf("  "); \
    351         showV128(&block[1]); printf("  "); \
    352         showV128(&block[2]); printf("  "); \
    353         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    354      } \
    355   }
    356 
    357 
    358 /* Generate a test that involves two vector regs,
    359    with no bias as towards which is input or output.
    360    It's OK to use x10 as scratch.*/
    361 #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \
    362   __attribute__((noinline)) \
    363   static void test_##TESTNAME ( LaneTy ty ) { \
    364      Int i; \
    365      for (i = 0; i < ITERS; i++) { \
    366         V128 block[4+1]; \
    367         memset(block, 0x55, sizeof(block)); \
    368         randV128(&block[0], ty); \
    369         randV128(&block[1], ty); \
    370         randV128(&block[2], ty); \
    371         randV128(&block[3], ty); \
    372         __asm__ __volatile__( \
    373            "mov   x30, #0 ; msr fpsr, x30 ; " \
    374            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    375            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    376            INSN " ; " \
    377            "str   q"#VECREG1NO", [%0, #32] ; " \
    378            "str   q"#VECREG2NO", [%0, #48] ; " \
    379            "mrs   x30, fpsr ; str x30, [%0, #64] " \
    380            : : "r"(&block[0]) \
    381              : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "x10", "x30" \
    382         ); \
    383         printf(INSN   "   "); \
    384         UInt fpsr = 0xFFFFFF60 & block[4].u32[0]; \
    385         showV128(&block[0]); printf("  "); \
    386         showV128(&block[1]); printf("  "); \
    387         showV128(&block[2]); printf("  "); \
    388         showV128(&block[3]); printf(" fpsr=%08x\n", fpsr); \
    389      } \
    390   }
    391 
    392 
    393 /* Generate a test that involves three vector regs,
    394    with no bias as towards which is input or output.  It's also OK
    395    to use v16, v17, v18 as scratch. */
    396 #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO)  \
    397   __attribute__((noinline)) \
    398   static void test_##TESTNAME ( LaneTy ty ) { \
    399      Int i; \
    400      for (i = 0; i < ITERS; i++) { \
    401         V128 block[6+1]; \
    402         memset(block, 0x55, sizeof(block)); \
    403         randV128(&block[0], ty); \
    404         randV128(&block[1], ty); \
    405         randV128(&block[2], ty); \
    406         randV128(&block[3], ty); \
    407         randV128(&block[4], ty); \
    408         randV128(&block[5], ty); \
    409         __asm__ __volatile__( \
    410            "mov   x30, #0 ; msr fpsr, x30 ; " \
    411            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    412            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    413            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    414            INSN " ; " \
    415            "str   q"#VECREG1NO", [%0, #48] ; " \
    416            "str   q"#VECREG2NO", [%0, #64] ; " \
    417            "str   q"#VECREG3NO", [%0, #80] ; " \
    418            "mrs   x30, fpsr ; str x30, [%0, #96] " \
    419            : : "r"(&block[0]) \
    420            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, "v"#VECREG3NO, \
    421              "v16", "v17", "v18", "x30" \
    422         ); \
    423         printf(INSN   "   "); \
    424         UInt fpsr = 0xFFFFFF60 & block[6].u32[0]; \
    425         showV128(&block[0]); printf("  "); \
    426         showV128(&block[1]); printf("  "); \
    427         showV128(&block[2]); printf("  "); \
    428         showV128(&block[3]); printf("  "); \
    429         showV128(&block[4]); printf("  "); \
    430         showV128(&block[5]); printf(" fpsr=%08x\n", fpsr); \
    431      } \
    432   }
    433 
    434 
    435 /* Generate a test that involves four vector regs,
    436    with no bias as towards which is input or output.  It's also OK
    437    to use v16, v17, v18 as scratch. */
    438 #define GEN_FOURVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO, \
    439                                        VECREG3NO,VECREG4NO)  \
    440   __attribute__((noinline)) \
    441   static void test_##TESTNAME ( LaneTy ty ) { \
    442      Int i; \
    443      for (i = 0; i < ITERS; i++) { \
    444         V128 block[8+1]; \
    445         memset(block, 0x55, sizeof(block)); \
    446         randV128(&block[0], ty); \
    447         randV128(&block[1], ty); \
    448         randV128(&block[2], ty); \
    449         randV128(&block[3], ty); \
    450         randV128(&block[4], ty); \
    451         randV128(&block[5], ty); \
    452         randV128(&block[6], ty); \
    453         randV128(&block[7], ty); \
    454         __asm__ __volatile__( \
    455            "mov   x30, #0 ; msr fpsr, x30 ; " \
    456            "ldr   q"#VECREG1NO", [%0, #0]  ; " \
    457            "ldr   q"#VECREG2NO", [%0, #16] ; " \
    458            "ldr   q"#VECREG3NO", [%0, #32] ; " \
    459            "ldr   q"#VECREG4NO", [%0, #48] ; " \
    460            INSN " ; " \
    461            "str   q"#VECREG1NO", [%0, #64] ; " \
    462            "str   q"#VECREG2NO", [%0, #80] ; " \
    463            "str   q"#VECREG3NO", [%0, #96] ; " \
    464            "str   q"#VECREG4NO", [%0, #112] ; " \
    465            "mrs   x30, fpsr ; str x30, [%0, #128] " \
    466            : : "r"(&block[0]) \
    467            : "memory", "v"#VECREG1NO, "v"#VECREG2NO, \
    468                        "v"#VECREG3NO, "v"#VECREG4NO, \
    469              "v16", "v17", "v18", "x30" \
    470         ); \
    471         printf(INSN   "   "); \
    472         UInt fpsr = 0xFFFFFF60 & block[8].u32[0]; \
    473         showV128(&block[0]); printf("  "); \
    474         showV128(&block[1]); printf("  "); \
    475         showV128(&block[2]); printf("  "); \
    476         showV128(&block[3]); printf("  "); \
    477         showV128(&block[4]); printf("  "); \
    478         showV128(&block[5]); printf("  "); \
    479         showV128(&block[6]); printf("  "); \
    480         showV128(&block[7]); printf(" fpsr=%08x\n", fpsr); \
    481      } \
    482   }
    483 
    484 
    485 /* ---------------------------------------------------------------- */
    486 /* -- Test functions and non-parameterisable test macros         -- */
    487 /* ---------------------------------------------------------------- */
    488 
    489 void test_UMINV ( void )
    490 {
    491   int i;
    492   V128 block[2];
    493 
    494   /* -- 4s -- */
    495 
    496   for (i = 0; i < 10; i++) {
    497     memset(&block, 0x55, sizeof(block));
    498     randV128(&block[0], TyS);
    499     randV128(&block[1], TyS);
    500     __asm__ __volatile__(
    501        "ldr   q7, [%0, #0]   ; "
    502        "uminv s8, v7.4s   ; "
    503        "str   q8, [%0, #16] "
    504        : : "r"(&block[0]) : "memory", "v7", "v8"
    505                          );
    506     printf("UMINV v8, v7.4s  ");
    507     showV128(&block[0]); printf("  ");
    508     showV128(&block[1]); printf("\n");
    509   }
    510 
    511   /* -- 8h -- */
    512 
    513   for (i = 0; i < 10; i++) {
    514     memset(&block, 0x55, sizeof(block));
    515     randV128(&block[0], TyH);
    516     randV128(&block[1], TyH);
    517     __asm__ __volatile__(
    518        "ldr   q7, [%0, #0]   ; "
    519        "uminv h8, v7.8h   ; "
    520        "str   q8, [%0, #16] "
    521        : : "r"(&block[0]) : "memory", "v7", "v8"
    522                          );
    523     printf("UMINV h8, v7.8h  ");
    524     showV128(&block[0]); printf("  ");
    525     showV128(&block[1]); printf("\n");
    526   }
    527 
    528   /* -- 4h -- */
    529 
    530   for (i = 0; i < 10; i++) {
    531     memset(&block, 0x55, sizeof(block));
    532     randV128(&block[0], TyH);
    533     randV128(&block[1], TyH);
    534     __asm__ __volatile__(
    535        "ldr   q7, [%0, #0]   ; "
    536        "uminv h8, v7.4h   ; "
    537        "str   q8, [%0, #16] "
    538        : : "r"(&block[0]) : "memory", "v7", "v8"
    539                          );
    540     printf("UMINV h8, v7.4h  ");
    541     showV128(&block[0]); printf("  ");
    542     showV128(&block[1]); printf("\n");
    543   }
    544 
    545   /* -- 16b -- */
    546 
    547   for (i = 0; i < 10; i++) {
    548     memset(&block, 0x55, sizeof(block));
    549     randV128(&block[0], TyB);
    550     randV128(&block[1], TyB);
    551     __asm__ __volatile__(
    552        "ldr   q7, [%0, #0]   ; "
    553        "uminv b8, v7.16b   ; "
    554        "str   q8, [%0, #16] "
    555        : : "r"(&block[0]) : "memory", "v7", "v8"
    556                          );
    557     printf("UMINV b8, v7.16b  ");
    558     showV128(&block[0]); printf("  ");
    559     showV128(&block[1]); printf("\n");
    560   }
    561 
    562   /* -- 8b -- */
    563 
    564   for (i = 0; i < 10; i++) {
    565     memset(&block, 0x55, sizeof(block));
    566     randV128(&block[0], TyB);
    567     randV128(&block[1], TyB);
    568     __asm__ __volatile__(
    569        "ldr   q7, [%0, #0]   ; "
    570        "uminv b8, v7.8b   ; "
    571        "str   q8, [%0, #16] "
    572        : : "r"(&block[0]) : "memory", "v7", "v8"
    573                          );
    574     printf("UMINV b8, v7.8b  ");
    575     showV128(&block[0]); printf("  ");
    576     showV128(&block[1]); printf("\n");
    577   }
    578 
    579 }
    580 
    581 
    582 void test_UMAXV ( void )
    583 {
    584   int i;
    585   V128 block[2];
    586 
    587   /* -- 4s -- */
    588 
    589   for (i = 0; i < 10; i++) {
    590     memset(&block, 0x55, sizeof(block));
    591     randV128(&block[0], TyS);
    592     randV128(&block[1], TyS);
    593     __asm__ __volatile__(
    594        "ldr   q7, [%0, #0]   ; "
    595        "umaxv s8, v7.4s   ; "
    596        "str   q8, [%0, #16] "
    597        : : "r"(&block[0]) : "memory", "v7", "v8"
    598                          );
    599     printf("UMAXV v8, v7.4s  ");
    600     showV128(&block[0]); printf("  ");
    601     showV128(&block[1]); printf("\n");
    602   }
    603 
    604   /* -- 8h -- */
    605 
    606   for (i = 0; i < 10; i++) {
    607     memset(&block, 0x55, sizeof(block));
    608     randV128(&block[0], TyH);
    609     randV128(&block[1], TyH);
    610     __asm__ __volatile__(
    611        "ldr   q7, [%0, #0]   ; "
    612        "umaxv h8, v7.8h   ; "
    613        "str   q8, [%0, #16] "
    614        : : "r"(&block[0]) : "memory", "v7", "v8"
    615                          );
    616     printf("UMAXV h8, v7.8h  ");
    617     showV128(&block[0]); printf("  ");
    618     showV128(&block[1]); printf("\n");
    619   }
    620 
    621   /* -- 4h -- */
    622 
    623   for (i = 0; i < 10; i++) {
    624     memset(&block, 0x55, sizeof(block));
    625     randV128(&block[0], TyH);
    626     randV128(&block[1], TyH);
    627     __asm__ __volatile__(
    628        "ldr   q7, [%0, #0]   ; "
    629        "umaxv h8, v7.4h   ; "
    630        "str   q8, [%0, #16] "
    631        : : "r"(&block[0]) : "memory", "v7", "v8"
    632                          );
    633     printf("UMAXV h8, v7.4h  ");
    634     showV128(&block[0]); printf("  ");
    635     showV128(&block[1]); printf("\n");
    636   }
    637 
    638   /* -- 16b -- */
    639 
    640   for (i = 0; i < 10; i++) {
    641     memset(&block, 0x55, sizeof(block));
    642     randV128(&block[0], TyB);
    643     randV128(&block[1], TyB);
    644     __asm__ __volatile__(
    645        "ldr   q7, [%0, #0]   ; "
    646        "umaxv b8, v7.16b   ; "
    647        "str   q8, [%0, #16] "
    648        : : "r"(&block[0]) : "memory", "v7", "v8"
    649                          );
    650     printf("UMAXV b8, v7.16b  ");
    651     showV128(&block[0]); printf("  ");
    652     showV128(&block[1]); printf("\n");
    653   }
    654 
    655   /* -- 8b -- */
    656 
    657   for (i = 0; i < 10; i++) {
    658     memset(&block, 0x55, sizeof(block));
    659     randV128(&block[0], TyB);
    660     randV128(&block[1], TyB);
    661     __asm__ __volatile__(
    662        "ldr   q7, [%0, #0]   ; "
    663        "umaxv b8, v7.8b   ; "
    664        "str   q8, [%0, #16] "
    665        : : "r"(&block[0]) : "memory", "v7", "v8"
    666                          );
    667     printf("UMAXV b8, v7.8b  ");
    668     showV128(&block[0]); printf("  ");
    669     showV128(&block[1]); printf("\n");
    670   }
    671 
    672 }
    673 
    674 
    675 void test_INS_general ( void )
    676 {
    677   V128 block[3];
    678 
    679   /* -- D[0..1] -- */
    680 
    681   memset(&block, 0x55, sizeof(block));
    682   block[1].u64[0] = randULong(TyD);
    683   __asm__ __volatile__(
    684      "ldr q7, [%0, #0]   ; "
    685      "ldr x19, [%0, #16] ; "
    686      "ins v7.d[0], x19   ; "
    687      "str q7, [%0, #32] "
    688      : : "r"(&block[0]) : "memory", "x19", "v7"
    689   );
    690   printf("INS v7.u64[0],x19  ");
    691   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    692   showV128(&block[2]); printf("\n");
    693 
    694   memset(&block, 0x55, sizeof(block));
    695   block[1].u64[0] = randULong(TyD);
    696   __asm__ __volatile__(
    697      "ldr q7, [%0, #0]   ; "
    698      "ldr x19, [%0, #16] ; "
    699      "ins v7.d[1], x19   ; "
    700      "str q7, [%0, #32] "
    701      : : "r"(&block[0]) : "memory", "x19", "v7"
    702   );
    703   printf("INS v7.d[1],x19  ");
    704   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    705   showV128(&block[2]); printf("\n");
    706 
    707   /* -- S[0..3] -- */
    708 
    709   memset(&block, 0x55, sizeof(block));
    710   block[1].u64[0] = randULong(TyS);
    711   __asm__ __volatile__(
    712      "ldr q7, [%0, #0]   ; "
    713      "ldr x19, [%0, #16] ; "
    714      "ins v7.s[0], w19   ; "
    715      "str q7, [%0, #32] "
    716      : : "r"(&block[0]) : "memory", "x19", "v7"
    717   );
    718   printf("INS v7.s[0],x19  ");
    719   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    720   showV128(&block[2]); printf("\n");
    721 
    722   memset(&block, 0x55, sizeof(block));
    723   block[1].u64[0] = randULong(TyS);
    724   __asm__ __volatile__(
    725      "ldr q7, [%0, #0]   ; "
    726      "ldr x19, [%0, #16] ; "
    727      "ins v7.s[1], w19   ; "
    728      "str q7, [%0, #32] "
    729      : : "r"(&block[0]) : "memory", "x19", "v7"
    730   );
    731   printf("INS v7.s[1],x19  ");
    732   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    733   showV128(&block[2]); printf("\n");
    734 
    735   memset(&block, 0x55, sizeof(block));
    736   block[1].u64[0] = randULong(TyS);
    737   __asm__ __volatile__(
    738      "ldr q7, [%0, #0]   ; "
    739      "ldr x19, [%0, #16] ; "
    740      "ins v7.s[2], w19   ; "
    741      "str q7, [%0, #32] "
    742      : : "r"(&block[0]) : "memory", "x19", "v7"
    743   );
    744   printf("INS v7.s[2],x19  ");
    745   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    746   showV128(&block[2]); printf("\n");
    747 
    748   memset(&block, 0x55, sizeof(block));
    749   block[1].u64[0] = randULong(TyS);
    750   __asm__ __volatile__(
    751      "ldr q7, [%0, #0]   ; "
    752      "ldr x19, [%0, #16] ; "
    753      "ins v7.s[3], w19   ; "
    754      "str q7, [%0, #32] "
    755      : : "r"(&block[0]) : "memory", "x19", "v7"
    756   );
    757   printf("INS v7.s[3],x19  ");
    758   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    759   showV128(&block[2]); printf("\n");
    760 
    761   /* -- H[0..7] -- */
    762 
    763   memset(&block, 0x55, sizeof(block));
    764   block[1].u64[0] = randULong(TyH);
    765   __asm__ __volatile__(
    766      "ldr q7, [%0, #0]   ; "
    767      "ldr x19, [%0, #16] ; "
    768      "ins v7.h[0], w19   ; "
    769      "str q7, [%0, #32] "
    770      : : "r"(&block[0]) : "memory", "x19", "v7"
    771   );
    772   printf("INS v7.h[0],x19  ");
    773   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    774   showV128(&block[2]); printf("\n");
    775 
    776   memset(&block, 0x55, sizeof(block));
    777   block[1].u64[0] = randULong(TyH);
    778   __asm__ __volatile__(
    779      "ldr q7, [%0, #0]   ; "
    780      "ldr x19, [%0, #16] ; "
    781      "ins v7.h[1], w19   ; "
    782      "str q7, [%0, #32] "
    783      : : "r"(&block[0]) : "memory", "x19", "v7"
    784   );
    785   printf("INS v7.h[1],x19  ");
    786   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    787   showV128(&block[2]); printf("\n");
    788 
    789   memset(&block, 0x55, sizeof(block));
    790   block[1].u64[0] = randULong(TyH);
    791   __asm__ __volatile__(
    792      "ldr q7, [%0, #0]   ; "
    793      "ldr x19, [%0, #16] ; "
    794      "ins v7.h[2], w19   ; "
    795      "str q7, [%0, #32] "
    796      : : "r"(&block[0]) : "memory", "x19", "v7"
    797   );
    798   printf("INS v7.h[2],x19  ");
    799   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    800   showV128(&block[2]); printf("\n");
    801 
    802   memset(&block, 0x55, sizeof(block));
    803   block[1].u64[0] = randULong(TyH);
    804   __asm__ __volatile__(
    805      "ldr q7, [%0, #0]   ; "
    806      "ldr x19, [%0, #16] ; "
    807      "ins v7.h[3], w19   ; "
    808      "str q7, [%0, #32] "
    809      : : "r"(&block[0]) : "memory", "x19", "v7"
    810   );
    811   printf("INS v7.h[3],x19  ");
    812   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    813   showV128(&block[2]); printf("\n");
    814 
    815   memset(&block, 0x55, sizeof(block));
    816   block[1].u64[0] = randULong(TyH);
    817   __asm__ __volatile__(
    818      "ldr q7, [%0, #0]   ; "
    819      "ldr x19, [%0, #16] ; "
    820      "ins v7.h[4], w19   ; "
    821      "str q7, [%0, #32] "
    822      : : "r"(&block[0]) : "memory", "x19", "v7"
    823   );
    824   printf("INS v7.h[4],x19  ");
    825   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    826   showV128(&block[2]); printf("\n");
    827 
    828   memset(&block, 0x55, sizeof(block));
    829   block[1].u64[0] = randULong(TyH);
    830   __asm__ __volatile__(
    831      "ldr q7, [%0, #0]   ; "
    832      "ldr x19, [%0, #16] ; "
    833      "ins v7.h[5], w19   ; "
    834      "str q7, [%0, #32] "
    835      : : "r"(&block[0]) : "memory", "x19", "v7"
    836   );
    837   printf("INS v7.h[5],x19  ");
    838   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    839   showV128(&block[2]); printf("\n");
    840 
    841   memset(&block, 0x55, sizeof(block));
    842   block[1].u64[0] = randULong(TyH);
    843   __asm__ __volatile__(
    844      "ldr q7, [%0, #0]   ; "
    845      "ldr x19, [%0, #16] ; "
    846      "ins v7.h[6], w19   ; "
    847      "str q7, [%0, #32] "
    848      : : "r"(&block[0]) : "memory", "x19", "v7"
    849   );
    850   printf("INS v7.h[6],x19  ");
    851   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    852   showV128(&block[2]); printf("\n");
    853 
    854   memset(&block, 0x55, sizeof(block));
    855   block[1].u64[0] = randULong(TyH);
    856   __asm__ __volatile__(
    857      "ldr q7, [%0, #0]   ; "
    858      "ldr x19, [%0, #16] ; "
    859      "ins v7.h[7], w19   ; "
    860      "str q7, [%0, #32] "
    861      : : "r"(&block[0]) : "memory", "x19", "v7"
    862   );
    863   printf("INS v7.h[7],x19  ");
    864   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    865   showV128(&block[2]); printf("\n");
    866 
    867   /* -- B[0,15] -- */
    868 
    869   memset(&block, 0x55, sizeof(block));
    870   block[1].u64[0] = randULong(TyB);
    871   __asm__ __volatile__(
    872      "ldr q7, [%0, #0]   ; "
    873      "ldr x19, [%0, #16] ; "
    874      "ins v7.b[0], w19   ; "
    875      "str q7, [%0, #32] "
    876      : : "r"(&block[0]) : "memory", "x19", "v7"
    877   );
    878   printf("INS v7.b[0],x19  ");
    879   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    880   showV128(&block[2]); printf("\n");
    881 
    882   memset(&block, 0x55, sizeof(block));
    883   block[1].u64[0] = randULong(TyB);
    884   __asm__ __volatile__(
    885      "ldr q7, [%0, #0]   ; "
    886      "ldr x19, [%0, #16] ; "
    887      "ins v7.b[15], w19   ; "
    888      "str q7, [%0, #32] "
    889      : : "r"(&block[0]) : "memory", "x19", "v7"
    890   );
    891   printf("INS v7.b[15],x19 ");
    892   showV128(&block[0]); printf("  %016llx  ", block[1].u64[0]);
    893   showV128(&block[2]); printf("\n");
    894 }
    895 
    896 
    897 
    898 void test_SMINV ( void )
    899 {
    900   int i;
    901   V128 block[2];
    902 
    903   /* -- 4s -- */
    904 
    905   for (i = 0; i < 10; i++) {
    906     memset(&block, 0x55, sizeof(block));
    907     randV128(&block[0], TyS);
    908     randV128(&block[1], TyS);
    909     __asm__ __volatile__(
    910        "ldr   q7, [%0, #0]   ; "
    911        "sminv s8, v7.4s   ; "
    912        "str   q8, [%0, #16] "
    913        : : "r"(&block[0]) : "memory", "v7", "v8"
    914                          );
    915     printf("SMINV v8, v7.4s  ");
    916     showV128(&block[0]); printf("  ");
    917     showV128(&block[1]); printf("\n");
    918   }
    919 
    920   /* -- 8h -- */
    921 
    922   for (i = 0; i < 10; i++) {
    923     memset(&block, 0x55, sizeof(block));
    924     randV128(&block[0], TyH);
    925     randV128(&block[1], TyH);
    926     __asm__ __volatile__(
    927        "ldr   q7, [%0, #0]   ; "
    928        "sminv h8, v7.8h   ; "
    929        "str   q8, [%0, #16] "
    930        : : "r"(&block[0]) : "memory", "v7", "v8"
    931                          );
    932     printf("SMINV h8, v7.8h  ");
    933     showV128(&block[0]); printf("  ");
    934     showV128(&block[1]); printf("\n");
    935   }
    936 
    937   /* -- 4h -- */
    938 
    939   for (i = 0; i < 10; i++) {
    940     memset(&block, 0x55, sizeof(block));
    941     randV128(&block[0], TyH);
    942     randV128(&block[1], TyH);
    943     __asm__ __volatile__(
    944        "ldr   q7, [%0, #0]   ; "
    945        "sminv h8, v7.4h   ; "
    946        "str   q8, [%0, #16] "
    947        : : "r"(&block[0]) : "memory", "v7", "v8"
    948                          );
    949     printf("SMINV h8, v7.4h  ");
    950     showV128(&block[0]); printf("  ");
    951     showV128(&block[1]); printf("\n");
    952   }
    953 
    954   /* -- 16b -- */
    955 
    956   for (i = 0; i < 10; i++) {
    957     memset(&block, 0x55, sizeof(block));
    958     randV128(&block[0], TyB);
    959     randV128(&block[1], TyB);
    960     __asm__ __volatile__(
    961        "ldr   q7, [%0, #0]   ; "
    962        "sminv b8, v7.16b   ; "
    963        "str   q8, [%0, #16] "
    964        : : "r"(&block[0]) : "memory", "v7", "v8"
    965                          );
    966     printf("SMINV b8, v7.16b  ");
    967     showV128(&block[0]); printf("  ");
    968     showV128(&block[1]); printf("\n");
    969   }
    970 
    971   /* -- 8b -- */
    972 
    973   for (i = 0; i < 10; i++) {
    974     memset(&block, 0x55, sizeof(block));
    975     randV128(&block[0], TyB);
    976     randV128(&block[1], TyB);
    977     __asm__ __volatile__(
    978        "ldr   q7, [%0, #0]   ; "
    979        "sminv b8, v7.8b   ; "
    980        "str   q8, [%0, #16] "
    981        : : "r"(&block[0]) : "memory", "v7", "v8"
    982                          );
    983     printf("SMINV b8, v7.8b  ");
    984     showV128(&block[0]); printf("  ");
    985     showV128(&block[1]); printf("\n");
    986   }
    987 
    988 }
    989 
    990 
    991 void test_SMAXV ( void )
    992 {
    993   int i;
    994   V128 block[2];
    995 
    996   /* -- 4s -- */
    997 
    998   for (i = 0; i < 10; i++) {
    999     memset(&block, 0x55, sizeof(block));
   1000     randV128(&block[0], TyS);
   1001     randV128(&block[1], TyS);
   1002     __asm__ __volatile__(
   1003        "ldr   q7, [%0, #0]   ; "
   1004        "smaxv s8, v7.4s   ; "
   1005        "str   q8, [%0, #16] "
   1006        : : "r"(&block[0]) : "memory", "v7", "v8"
   1007                          );
   1008     printf("SMAXV v8, v7.4s  ");
   1009     showV128(&block[0]); printf("  ");
   1010     showV128(&block[1]); printf("\n");
   1011   }
   1012 
   1013   /* -- 8h -- */
   1014 
   1015   for (i = 0; i < 10; i++) {
   1016     memset(&block, 0x55, sizeof(block));
   1017     randV128(&block[0], TyH);
   1018     randV128(&block[1], TyH);
   1019     __asm__ __volatile__(
   1020        "ldr   q7, [%0, #0]   ; "
   1021        "smaxv h8, v7.8h   ; "
   1022        "str   q8, [%0, #16] "
   1023        : : "r"(&block[0]) : "memory", "v7", "v8"
   1024                          );
   1025     printf("SMAXV h8, v7.8h  ");
   1026     showV128(&block[0]); printf("  ");
   1027     showV128(&block[1]); printf("\n");
   1028   }
   1029 
   1030   /* -- 4h -- */
   1031 
   1032   for (i = 0; i < 10; i++) {
   1033     memset(&block, 0x55, sizeof(block));
   1034     randV128(&block[0], TyH);
   1035     randV128(&block[1], TyH);
   1036     __asm__ __volatile__(
   1037        "ldr   q7, [%0, #0]   ; "
   1038        "smaxv h8, v7.4h   ; "
   1039        "str   q8, [%0, #16] "
   1040        : : "r"(&block[0]) : "memory", "v7", "v8"
   1041                          );
   1042     printf("SMAXV h8, v7.4h  ");
   1043     showV128(&block[0]); printf("  ");
   1044     showV128(&block[1]); printf("\n");
   1045   }
   1046 
   1047   /* -- 16b -- */
   1048 
   1049   for (i = 0; i < 10; i++) {
   1050     memset(&block, 0x55, sizeof(block));
   1051     randV128(&block[0], TyB);
   1052     randV128(&block[1], TyB);
   1053     __asm__ __volatile__(
   1054        "ldr   q7, [%0, #0]   ; "
   1055        "smaxv b8, v7.16b   ; "
   1056        "str   q8, [%0, #16] "
   1057        : : "r"(&block[0]) : "memory", "v7", "v8"
   1058                          );
   1059     printf("SMAXV b8, v7.16b  ");
   1060     showV128(&block[0]); printf("  ");
   1061     showV128(&block[1]); printf("\n");
   1062   }
   1063 
   1064   /* -- 8b -- */
   1065 
   1066   for (i = 0; i < 10; i++) {
   1067     memset(&block, 0x55, sizeof(block));
   1068     randV128(&block[0], TyB);
   1069     randV128(&block[1], TyB);
   1070     __asm__ __volatile__(
   1071        "ldr   q7, [%0, #0]   ; "
   1072        "smaxv b8, v7.8b   ; "
   1073        "str   q8, [%0, #16] "
   1074        : : "r"(&block[0]) : "memory", "v7", "v8"
   1075                          );
   1076     printf("SMAXV b8, v7.8b  ");
   1077     showV128(&block[0]); printf("  ");
   1078     showV128(&block[1]); printf("\n");
   1079   }
   1080 
   1081 }
   1082 
   1083 
   1084 //======== FCCMP_D ========//
   1085 
   1086 #define GEN_test_FCCMP_D_D_0xF_EQ \
   1087   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_EQ ( void ) \
   1088   { \
   1089      V128 block[4]; \
   1090      randBlock_Doubles(&block[0], 3); \
   1091      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1092      showBlock("FCCMP_D_D_0xF_EQ before", &block[0], 4); \
   1093      __asm__ __volatile__( \
   1094         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1095         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1096         "fccmp d29, d11, #0xf, eq; " \
   1097         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1098         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1099         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1100      ); \
   1101      showBlock("FCCMP_D_D_0xF_EQ after", &block[0], 4); \
   1102      printf("\n"); \
   1103   }
   1104 
   1105 #define GEN_test_FCCMP_D_D_0xF_NE \
   1106   __attribute__((noinline)) static void test_FCCMP_D_D_0xF_NE ( void ) \
   1107   { \
   1108      V128 block[4]; \
   1109      randBlock_Doubles(&block[0], 3); \
   1110      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1111      showBlock("FCCMP_D_D_0xF_NE before", &block[0], 4); \
   1112      __asm__ __volatile__( \
   1113         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1114         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1115         "fccmp d29, d11, #0xf, ne; " \
   1116         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1117         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1118         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1119      ); \
   1120      showBlock("FCCMP_D_D_0xF_NE after", &block[0], 4); \
   1121      printf("\n"); \
   1122   }
   1123 
   1124 #define GEN_test_FCCMP_D_D_0x0_EQ \
   1125   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_EQ ( void ) \
   1126   { \
   1127      V128 block[4]; \
   1128      randBlock_Doubles(&block[0], 3); \
   1129      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1130      showBlock("FCCMP_D_D_0x0_EQ before", &block[0], 4); \
   1131      __asm__ __volatile__( \
   1132         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1133         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1134         "fccmp d29, d11, #0x0, eq; " \
   1135         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1136         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1137         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1138      ); \
   1139      showBlock("FCCMP_D_D_0x0_EQ after", &block[0], 4); \
   1140      printf("\n"); \
   1141   }
   1142 
   1143 #define GEN_test_FCCMP_D_D_0x0_NE \
   1144   __attribute__((noinline)) static void test_FCCMP_D_D_0x0_NE ( void ) \
   1145   { \
   1146      V128 block[4]; \
   1147      randBlock_Doubles(&block[0], 3); \
   1148      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1149      showBlock("FCCMP_D_D_0x0_NE before", &block[0], 4); \
   1150      __asm__ __volatile__( \
   1151         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1152         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1153         "fccmp d29, d11, #0x0, ne; " \
   1154         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1155         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1156         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1157      ); \
   1158      showBlock("FCCMP_D_D_0x0_NE after", &block[0], 4); \
   1159      printf("\n"); \
   1160   }
   1161 
   1162 //======== FCCMP_S ========//
   1163 
   1164 #define GEN_test_FCCMP_S_S_0xF_EQ \
   1165   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_EQ ( void ) \
   1166   { \
   1167      V128 block[4]; \
   1168      randBlock_Floats(&block[0], 3); \
   1169      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1170      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1171      __asm__ __volatile__( \
   1172         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1173         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1174         "fccmp s29, s11, #0xf, eq; " \
   1175         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1176         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1177         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1178      ); \
   1179      showBlock("FCCMP_S_S_0xF_EQ after", &block[0], 4); \
   1180      printf("\n"); \
   1181   }
   1182 
   1183 #define GEN_test_FCCMP_S_S_0xF_NE \
   1184   __attribute__((noinline)) static void test_FCCMP_S_S_0xF_NE ( void ) \
   1185   { \
   1186      V128 block[4]; \
   1187      randBlock_Floats(&block[0], 3); \
   1188      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1189      showBlock("FCCMP_S_S_0xF_NE before", &block[0], 4); \
   1190      __asm__ __volatile__( \
   1191         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1192         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1193         "fccmp s29, s11, #0xf, ne; " \
   1194         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1195         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1196         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1197      ); \
   1198      showBlock("FCCMP_S_S_0xF_NE after", &block[0], 4); \
   1199      printf("\n"); \
   1200   }
   1201 
   1202 #define GEN_test_FCCMP_S_S_0x0_EQ \
   1203   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_EQ ( void ) \
   1204   { \
   1205      V128 block[4]; \
   1206      randBlock_Floats(&block[0], 3); \
   1207      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1208      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1209      __asm__ __volatile__( \
   1210         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1211         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1212         "fccmp s29, s11, #0x0, eq; " \
   1213         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1214         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1215         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1216      ); \
   1217      showBlock("FCCMP_S_S_0x0_EQ after", &block[0], 4); \
   1218      printf("\n"); \
   1219   }
   1220 
   1221 #define GEN_test_FCCMP_S_S_0x0_NE \
   1222   __attribute__((noinline)) static void test_FCCMP_S_S_0x0_NE ( void ) \
   1223   { \
   1224      V128 block[4]; \
   1225      randBlock_Floats(&block[0], 3); \
   1226      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1227      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1228      __asm__ __volatile__( \
   1229         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1230         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1231         "fccmp s29, s11, #0x0, ne; " \
   1232         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1233         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1234         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1235      ); \
   1236      showBlock("FCCMP_S_S_0x0_NE after", &block[0], 4); \
   1237      printf("\n"); \
   1238   }
   1239 
   1240 //======== FCCMPE_D ========//
   1241 
   1242 #define GEN_test_FCCMPE_D_D_0xF_EQ \
   1243   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_EQ ( void ) \
   1244   { \
   1245      V128 block[4]; \
   1246      randBlock_Doubles(&block[0], 3); \
   1247      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1248      showBlock("FCCMPE_D_D_0xF_EQ before", &block[0], 4); \
   1249      __asm__ __volatile__( \
   1250         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1251         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1252         "fccmpe d29, d11, #0xf, eq; " \
   1253         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1254         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1255         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1256      ); \
   1257      showBlock("FCCMPE_D_D_0xF_EQ after", &block[0], 4); \
   1258      printf("\n"); \
   1259   }
   1260 
   1261 #define GEN_test_FCCMPE_D_D_0xF_NE \
   1262   __attribute__((noinline)) static void test_FCCMPE_D_D_0xF_NE ( void ) \
   1263   { \
   1264      V128 block[4]; \
   1265      randBlock_Doubles(&block[0], 3); \
   1266      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1267      showBlock("FCCMPE_D_D_0xF_NE before", &block[0], 4); \
   1268      __asm__ __volatile__( \
   1269         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1270         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1271         "fccmpe d29, d11, #0xf, ne; " \
   1272         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1273         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1274         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1275      ); \
   1276      showBlock("FCCMPE_D_D_0xF_NE after", &block[0], 4); \
   1277      printf("\n"); \
   1278   }
   1279 
   1280 #define GEN_test_FCCMPE_D_D_0x0_EQ \
   1281   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_EQ ( void ) \
   1282   { \
   1283      V128 block[4]; \
   1284      randBlock_Doubles(&block[0], 3); \
   1285      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1286      showBlock("FCCMPE_D_D_0x0_EQ before", &block[0], 4); \
   1287      __asm__ __volatile__( \
   1288         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1289         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1290         "fccmpe d29, d11, #0x0, eq; " \
   1291         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1292         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1293         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1294      ); \
   1295      showBlock("FCCMPE_D_D_0x0_EQ after", &block[0], 4); \
   1296      printf("\n"); \
   1297   }
   1298 
   1299 #define GEN_test_FCCMPE_D_D_0x0_NE \
   1300   __attribute__((noinline)) static void test_FCCMPE_D_D_0x0_NE ( void ) \
   1301   { \
   1302      V128 block[4]; \
   1303      randBlock_Doubles(&block[0], 3); \
   1304      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1305      showBlock("FCCMPE_D_D_0x0_NE before", &block[0], 4); \
   1306      __asm__ __volatile__( \
   1307         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1308         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1309         "fccmpe d29, d11, #0x0, ne; " \
   1310         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1311         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1312         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1313      ); \
   1314      showBlock("FCCMPE_D_D_0x0_NE after", &block[0], 4); \
   1315      printf("\n"); \
   1316   }
   1317 
   1318 //======== FCCMPE_S ========//
   1319 
   1320 #define GEN_test_FCCMPE_S_S_0xF_EQ \
   1321   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_EQ ( void ) \
   1322   { \
   1323      V128 block[4]; \
   1324      randBlock_Floats(&block[0], 3); \
   1325      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1326      showBlock("FCCMP_S_S_0xF_EQ before", &block[0], 4); \
   1327      __asm__ __volatile__( \
   1328         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1329         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1330         "fccmpe s29, s11, #0xf, eq; " \
   1331         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1332         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1333         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1334      ); \
   1335      showBlock("FCCMPE_S_S_0xF_EQ after", &block[0], 4); \
   1336      printf("\n"); \
   1337   }
   1338 
   1339 #define GEN_test_FCCMPE_S_S_0xF_NE \
   1340   __attribute__((noinline)) static void test_FCCMPE_S_S_0xF_NE ( void ) \
   1341   { \
   1342      V128 block[4]; \
   1343      randBlock_Floats(&block[0], 3); \
   1344      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1345      showBlock("FCCMPE_S_S_0xF_NE before", &block[0], 4); \
   1346      __asm__ __volatile__( \
   1347         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1348         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1349         "fccmpe s29, s11, #0xf, ne; " \
   1350         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1351         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1352         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1353      ); \
   1354      showBlock("FCCMPE_S_S_0xF_NE after", &block[0], 4); \
   1355      printf("\n"); \
   1356   }
   1357 
   1358 #define GEN_test_FCCMPE_S_S_0x0_EQ \
   1359   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_EQ ( void ) \
   1360   { \
   1361      V128 block[4]; \
   1362      randBlock_Floats(&block[0], 3); \
   1363      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1364      showBlock("FCCMP_S_S_0x0_EQ before", &block[0], 4); \
   1365      __asm__ __volatile__( \
   1366         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1367         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1368         "fccmpe s29, s11, #0x0, eq; " \
   1369         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1370         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1371         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1372      ); \
   1373      showBlock("FCCMPE_S_S_0x0_EQ after", &block[0], 4); \
   1374      printf("\n"); \
   1375   }
   1376 
   1377 #define GEN_test_FCCMPE_S_S_0x0_NE \
   1378   __attribute__((noinline)) static void test_FCCMPE_S_S_0x0_NE ( void ) \
   1379   { \
   1380      V128 block[4]; \
   1381      randBlock_Floats(&block[0], 3); \
   1382      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1383      showBlock("FCCMP_S_S_0x0_NE before", &block[0], 4); \
   1384      __asm__ __volatile__( \
   1385         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1386         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1387         "fccmpe s29, s11, #0x0, ne; " \
   1388         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1389         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1390         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1391      ); \
   1392      showBlock("FCCMPE_S_S_0x0_NE after", &block[0], 4); \
   1393      printf("\n"); \
   1394   }
   1395 
   1396 //======== FCMEQ_D_D_D ========//
   1397 
   1398 #define GEN_test_FCMEQ_D_D_D \
   1399   __attribute__((noinline)) static void test_FCMEQ_D_D_D ( void ) \
   1400   { \
   1401      V128 block[4]; \
   1402      randBlock_Doubles(&block[0], 3); \
   1403      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1404      showBlock("FCMEQ_D_D_D before", &block[0], 4); \
   1405      __asm__ __volatile__( \
   1406         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1407         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1408         "fcmeq d29, d11, d9; " \
   1409         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1410         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1411         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1412      ); \
   1413      showBlock("FCMEQ_D_D_D after", &block[0], 4); \
   1414      printf("\n"); \
   1415   }
   1416 
   1417 //======== FCMEQ_S_S_S ========//
   1418 
   1419 #define GEN_test_FCMEQ_S_S_S \
   1420   __attribute__((noinline)) static void test_FCMEQ_S_S_S ( void ) \
   1421   { \
   1422      V128 block[4]; \
   1423      randBlock_Floats(&block[0], 3); \
   1424      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1425      showBlock("FCMEQ_S_S_S before", &block[0], 4); \
   1426      __asm__ __volatile__( \
   1427         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1428         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1429         "fcmeq s29, s11, s9; " \
   1430         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1431         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1432         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1433      ); \
   1434      showBlock("FCMEQ_S_S_S after", &block[0], 4); \
   1435      printf("\n"); \
   1436   }
   1437 
   1438 //======== FCMGE_D_D_D ========//
   1439 
   1440 #define GEN_test_FCMGE_D_D_D \
   1441   __attribute__((noinline)) static void test_FCMGE_D_D_D ( void ) \
   1442   { \
   1443      V128 block[4]; \
   1444      randBlock_Doubles(&block[0], 3); \
   1445      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1446      showBlock("FCMGE_D_D_D before", &block[0], 4); \
   1447      __asm__ __volatile__( \
   1448         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1449         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1450         "fcmge d29, d11, d9; " \
   1451         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1452         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1453         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1454      ); \
   1455      showBlock("FCMGE_D_D_D after", &block[0], 4); \
   1456      printf("\n"); \
   1457   }
   1458 
   1459 //======== FCMGE_S_S_S ========//
   1460 
   1461 #define GEN_test_FCMGE_S_S_S \
   1462   __attribute__((noinline)) static void test_FCMGE_S_S_S ( void ) \
   1463   { \
   1464      V128 block[4]; \
   1465      randBlock_Floats(&block[0], 3); \
   1466      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1467      showBlock("FCMGE_S_S_S before", &block[0], 4); \
   1468      __asm__ __volatile__( \
   1469         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1470         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1471         "fcmge s29, s11, s9; " \
   1472         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1473         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1474         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1475      ); \
   1476      showBlock("FCMGE_S_S_S after", &block[0], 4); \
   1477      printf("\n"); \
   1478   }
   1479 
   1480 //======== FCMGT_D_D_D ========//
   1481 
   1482 #define GEN_test_FCMGT_D_D_D \
   1483   __attribute__((noinline)) static void test_FCMGT_D_D_D ( void ) \
   1484   { \
   1485      V128 block[4]; \
   1486      randBlock_Doubles(&block[0], 3); \
   1487      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1488      showBlock("FCMGT_D_D_D before", &block[0], 4); \
   1489      __asm__ __volatile__( \
   1490         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1491         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1492         "fcmgt d29, d11, d9; " \
   1493         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1494         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1495         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1496      ); \
   1497      showBlock("FCMGT_D_D_D after", &block[0], 4); \
   1498      printf("\n"); \
   1499   }
   1500 
   1501 //======== FCMGT_S_S_S ========//
   1502 
   1503 #define GEN_test_FCMGT_S_S_S \
   1504   __attribute__((noinline)) static void test_FCMGT_S_S_S ( void ) \
   1505   { \
   1506      V128 block[4]; \
   1507      randBlock_Floats(&block[0], 3); \
   1508      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1509      showBlock("FCMGT_S_S_S before", &block[0], 4); \
   1510      __asm__ __volatile__( \
   1511         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1512         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1513         "fcmgt s29, s11, s9; " \
   1514         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1515         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1516         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1517      ); \
   1518      showBlock("FCMGT_S_S_S after", &block[0], 4); \
   1519      printf("\n"); \
   1520   }
   1521 
   1522 //======== FACGT_D_D_D ========//
   1523 
   1524 #define GEN_test_FACGT_D_D_D \
   1525   __attribute__((noinline)) static void test_FACGT_D_D_D ( void ) \
   1526   { \
   1527      V128 block[4]; \
   1528      randBlock_Doubles(&block[0], 3); \
   1529      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1530      showBlock("FACGT_D_D_D before", &block[0], 4); \
   1531      __asm__ __volatile__( \
   1532         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1533         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1534         "facgt d29, d11, d9; " \
   1535         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1536         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1537         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1538      ); \
   1539      showBlock("FACGT_D_D_D after", &block[0], 4); \
   1540      printf("\n"); \
   1541   }
   1542 
   1543 //======== FACGT_S_S_S ========//
   1544 
   1545 #define GEN_test_FACGT_S_S_S \
   1546   __attribute__((noinline)) static void test_FACGT_S_S_S ( void ) \
   1547   { \
   1548      V128 block[4]; \
   1549      randBlock_Floats(&block[0], 3); \
   1550      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1551      showBlock("FACGT_S_S_S before", &block[0], 4); \
   1552      __asm__ __volatile__( \
   1553         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1554         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1555         "facgt s29, s11, s9; " \
   1556         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1557         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1558         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1559      ); \
   1560      showBlock("FACGT_S_S_S after", &block[0], 4); \
   1561      printf("\n"); \
   1562   }
   1563 
   1564 //======== FACGE_D_D_D ========//
   1565 
   1566 #define GEN_test_FACGE_D_D_D \
   1567   __attribute__((noinline)) static void test_FACGE_D_D_D ( void ) \
   1568   { \
   1569      V128 block[4]; \
   1570      randBlock_Doubles(&block[0], 3); \
   1571      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1572      showBlock("FACGE_D_D_D before", &block[0], 4); \
   1573      __asm__ __volatile__( \
   1574         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1575         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1576         "facge d29, d11, d9; " \
   1577         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1578         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1579         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1580      ); \
   1581      showBlock("FACGE_D_D_D after", &block[0], 4); \
   1582      printf("\n"); \
   1583   }
   1584 
   1585 //======== FACGE_S_S_S ========//
   1586 
   1587 #define GEN_test_FACGE_S_S_S \
   1588   __attribute__((noinline)) static void test_FACGE_S_S_S ( void ) \
   1589   { \
   1590      V128 block[4]; \
   1591      randBlock_Floats(&block[0], 3); \
   1592      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1593      showBlock("FACGE_S_S_S before", &block[0], 4); \
   1594      __asm__ __volatile__( \
   1595         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1596         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1597         "facge s29, s11, s9; " \
   1598         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1599         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1600         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1601      ); \
   1602      showBlock("FACGE_S_S_S after", &block[0], 4); \
   1603      printf("\n"); \
   1604   }
   1605 
   1606 //======== FCMEQ_D_D_Z ========//
   1607 
   1608 #define GEN_test_FCMEQ_D_D_Z \
   1609   __attribute__((noinline)) static void test_FCMEQ_D_D_Z ( void ) \
   1610   { \
   1611      V128 block[4]; \
   1612      randBlock_Doubles(&block[0], 3); \
   1613      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1614      showBlock("FCMEQ_D_D_Z before", &block[0], 4); \
   1615      __asm__ __volatile__( \
   1616         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1617         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1618         "fcmeq d29, d11, #0; " \
   1619         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1620         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1621         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1622      ); \
   1623      showBlock("FCMEQ_D_D_Z after", &block[0], 4); \
   1624      printf("\n"); \
   1625   }
   1626 
   1627 //======== FCMEQ_S_S_Z ========//
   1628 
   1629 #define GEN_test_FCMEQ_S_S_Z \
   1630   __attribute__((noinline)) static void test_FCMEQ_S_S_Z ( void ) \
   1631   { \
   1632      V128 block[4]; \
   1633      randBlock_Floats(&block[0], 3); \
   1634      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1635      showBlock("FCMEQ_S_S_Z before", &block[0], 4); \
   1636      __asm__ __volatile__( \
   1637         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1638         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1639         "fcmeq s29, s11, #0; " \
   1640         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1641         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1642         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1643      ); \
   1644      showBlock("FCMEQ_S_S_Z after", &block[0], 4); \
   1645      printf("\n"); \
   1646   }
   1647 
   1648 //======== FCMGE_D_D_Z ========//
   1649 
   1650 #define GEN_test_FCMGE_D_D_Z \
   1651   __attribute__((noinline)) static void test_FCMGE_D_D_Z ( void ) \
   1652   { \
   1653      V128 block[4]; \
   1654      randBlock_Doubles(&block[0], 3); \
   1655      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1656      showBlock("FCMGE_D_D_Z before", &block[0], 4); \
   1657      __asm__ __volatile__( \
   1658         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1659         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1660         "fcmge d29, d11, #0; " \
   1661         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1662         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1663         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1664      ); \
   1665      showBlock("FCMGE_D_D_Z after", &block[0], 4); \
   1666      printf("\n"); \
   1667   }
   1668 
   1669 //======== FCMGE_S_S_Z ========//
   1670 
   1671 #define GEN_test_FCMGE_S_S_Z \
   1672   __attribute__((noinline)) static void test_FCMGE_S_S_Z ( void ) \
   1673   { \
   1674      V128 block[4]; \
   1675      randBlock_Floats(&block[0], 3); \
   1676      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1677      showBlock("FCMGE_S_S_Z before", &block[0], 4); \
   1678      __asm__ __volatile__( \
   1679         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1680         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1681         "fcmge s29, s11, #0; " \
   1682         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1683         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1684         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1685      ); \
   1686      showBlock("FCMGE_S_S_Z after", &block[0], 4); \
   1687      printf("\n"); \
   1688   }
   1689 
   1690 //======== FCMGT_D_D_Z ========//
   1691 
   1692 #define GEN_test_FCMGT_D_D_Z \
   1693   __attribute__((noinline)) static void test_FCMGT_D_D_Z ( void ) \
   1694   { \
   1695      V128 block[4]; \
   1696      randBlock_Doubles(&block[0], 3); \
   1697      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1698      showBlock("FCMGT_D_D_Z before", &block[0], 4); \
   1699      __asm__ __volatile__( \
   1700         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1701         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1702         "fcmgt d29, d11, #0; " \
   1703         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1704         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1705         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1706      ); \
   1707      showBlock("FCMGT_D_D_Z after", &block[0], 4); \
   1708      printf("\n"); \
   1709   }
   1710 
   1711 //======== FCMGT_S_S_Z ========//
   1712 
   1713 #define GEN_test_FCMGT_S_S_Z \
   1714   __attribute__((noinline)) static void test_FCMGT_S_S_Z ( void ) \
   1715   { \
   1716      V128 block[4]; \
   1717      randBlock_Floats(&block[0], 3); \
   1718      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1719      showBlock("FCMGT_S_S_Z before", &block[0], 4); \
   1720      __asm__ __volatile__( \
   1721         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1722         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1723         "fcmgt s29, s11, #0; " \
   1724         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1725         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1726         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1727      ); \
   1728      showBlock("FCMGT_S_S_Z after", &block[0], 4); \
   1729      printf("\n"); \
   1730   }
   1731 
   1732 //======== FCMLE_D_D_Z ========//
   1733 
   1734 #define GEN_test_FCMLE_D_D_Z \
   1735   __attribute__((noinline)) static void test_FCMLE_D_D_Z ( void ) \
   1736   { \
   1737      V128 block[4]; \
   1738      randBlock_Doubles(&block[0], 3); \
   1739      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1740      showBlock("FCMLE_D_D_Z before", &block[0], 4); \
   1741      __asm__ __volatile__( \
   1742         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1743         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1744         "fcmle d29, d11, #0; " \
   1745         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1746         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1747         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1748      ); \
   1749      showBlock("FCMLE_D_D_Z after", &block[0], 4); \
   1750      printf("\n"); \
   1751   }
   1752 
   1753 //======== FCMLE_S_S_Z ========//
   1754 
   1755 #define GEN_test_FCMLE_S_S_Z \
   1756   __attribute__((noinline)) static void test_FCMLE_S_S_Z ( void ) \
   1757   { \
   1758      V128 block[4]; \
   1759      randBlock_Floats(&block[0], 3); \
   1760      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1761      showBlock("FCMLE_S_S_Z before", &block[0], 4); \
   1762      __asm__ __volatile__( \
   1763         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1764         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1765         "fcmle s29, s11, #0; " \
   1766         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1767         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1768         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1769      ); \
   1770      showBlock("FCMLE_S_S_Z after", &block[0], 4); \
   1771      printf("\n"); \
   1772   }
   1773 
   1774 //======== FCMLT_D_D_Z ========//
   1775 
   1776 #define GEN_test_FCMLT_D_D_Z \
   1777   __attribute__((noinline)) static void test_FCMLT_D_D_Z ( void ) \
   1778   { \
   1779      V128 block[4]; \
   1780      randBlock_Doubles(&block[0], 3); \
   1781      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1782      showBlock("FCMLT_D_D_Z before", &block[0], 4); \
   1783      __asm__ __volatile__( \
   1784         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1785         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1786         "fcmlt d29, d11, #0; " \
   1787         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1788         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1789         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1790      ); \
   1791      showBlock("FCMLT_D_D_Z after", &block[0], 4); \
   1792      printf("\n"); \
   1793   }
   1794 
   1795 //======== FCMLT_S_S_Z ========//
   1796 
   1797 #define GEN_test_FCMLT_S_S_Z \
   1798   __attribute__((noinline)) static void test_FCMLT_S_S_Z ( void ) \
   1799   { \
   1800      V128 block[4]; \
   1801      randBlock_Floats(&block[0], 3); \
   1802      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1803      showBlock("FCMLT_S_S_Z before", &block[0], 4); \
   1804      __asm__ __volatile__( \
   1805         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1806         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1807         "fcmlt s29, s11, #0; " \
   1808         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1809         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1810         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1811      ); \
   1812      showBlock("FCMLT_S_S_Z after", &block[0], 4); \
   1813      printf("\n"); \
   1814   }
   1815 
   1816 //======== FCMP_D_D ========//
   1817 
   1818 #define GEN_test_FCMP_D_D \
   1819   __attribute__((noinline)) static void test_FCMP_D_D ( void ) \
   1820   { \
   1821      V128 block[4]; \
   1822      randBlock_Doubles(&block[0], 3); \
   1823      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1824      showBlock("FCMP_D_D before", &block[0], 4); \
   1825      __asm__ __volatile__( \
   1826         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1827         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1828         "fcmp d29, d11; " \
   1829         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1830         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1831         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1832      ); \
   1833      showBlock("FCMP_D_D after", &block[0], 4); \
   1834      printf("\n"); \
   1835   }
   1836 
   1837 //======== FCMP_S_S ========//
   1838 
   1839 #define GEN_test_FCMP_S_S \
   1840   __attribute__((noinline)) static void test_FCMP_S_S ( void ) \
   1841   { \
   1842      V128 block[4]; \
   1843      randBlock_Floats(&block[0], 3); \
   1844      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1845      showBlock("FCMP_S_S before", &block[0], 4); \
   1846      __asm__ __volatile__( \
   1847         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1848         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1849         "fcmp s29, s11; " \
   1850         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1851         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1852         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1853      ); \
   1854      showBlock("FCMP_S_S after", &block[0], 4); \
   1855      printf("\n"); \
   1856   }
   1857 
   1858 //======== FCMPE_D_D ========//
   1859 
   1860 #define GEN_test_FCMPE_D_D \
   1861   __attribute__((noinline)) static void test_FCMPE_D_D ( void ) \
   1862   { \
   1863      V128 block[4]; \
   1864      randBlock_Doubles(&block[0], 3); \
   1865      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1866      showBlock("FCMPE_D_D before", &block[0], 4); \
   1867      __asm__ __volatile__( \
   1868         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1869         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1870         "fcmpe d29, d11; " \
   1871         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1872         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1873         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1874      ); \
   1875      showBlock("FCMPE_D_D after", &block[0], 4); \
   1876      printf("\n"); \
   1877   }
   1878 
   1879 //======== FCMPE_S_S ========//
   1880 
   1881 #define GEN_test_FCMPE_S_S \
   1882   __attribute__((noinline)) static void test_FCMPE_S_S ( void ) \
   1883   { \
   1884      V128 block[4]; \
   1885      randBlock_Floats(&block[0], 3); \
   1886      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1887      showBlock("FCMPE_S_S before", &block[0], 4); \
   1888      __asm__ __volatile__( \
   1889         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1890         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1891         "fcmpe s29, s11; " \
   1892         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1893         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1894         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1895      ); \
   1896      showBlock("FCMPE_S_S after", &block[0], 4); \
   1897      printf("\n"); \
   1898   }
   1899 
   1900 //======== FCMP_D_Z ========//
   1901 
   1902 #define GEN_test_FCMP_D_Z \
   1903   __attribute__((noinline)) static void test_FCMP_D_Z ( void ) \
   1904   { \
   1905      V128 block[4]; \
   1906      randBlock_Doubles(&block[0], 3); \
   1907      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1908      showBlock("FCMP_D_Z before", &block[0], 4); \
   1909      __asm__ __volatile__( \
   1910         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1911         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1912         "fcmp d29, #0; " \
   1913         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1914         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1915         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1916      ); \
   1917      showBlock("FCMP_D_Z after", &block[0], 4); \
   1918      printf("\n"); \
   1919   }
   1920 
   1921 //======== FCMP_S_Z ========//
   1922 
   1923 #define GEN_test_FCMP_S_Z \
   1924   __attribute__((noinline)) static void test_FCMP_S_Z ( void ) \
   1925   { \
   1926      V128 block[4]; \
   1927      randBlock_Floats(&block[0], 3); \
   1928      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1929      showBlock("FCMP_S_Z before", &block[0], 4); \
   1930      __asm__ __volatile__( \
   1931         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1932         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1933         "fcmp s29, #0; " \
   1934         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1935         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1936         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1937      ); \
   1938      showBlock("FCMP_S_Z after", &block[0], 4); \
   1939      printf("\n"); \
   1940   }
   1941 
   1942 //======== FCMPE_D_Z ========//
   1943 
   1944 #define GEN_test_FCMPE_D_Z \
   1945   __attribute__((noinline)) static void test_FCMPE_D_Z ( void ) \
   1946   { \
   1947      V128 block[4]; \
   1948      randBlock_Doubles(&block[0], 3); \
   1949      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1950      showBlock("FCMPE_D_Z before", &block[0], 4); \
   1951      __asm__ __volatile__( \
   1952         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1953         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1954         "fcmpe d29, #0; " \
   1955         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1956         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1957         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1958      ); \
   1959      showBlock("FCMPE_D_Z after", &block[0], 4); \
   1960      printf("\n"); \
   1961   }
   1962 
   1963 //======== FCMPE_S_Z ========//
   1964 
   1965 #define GEN_test_FCMPE_S_Z \
   1966   __attribute__((noinline)) static void test_FCMPE_S_Z ( void ) \
   1967   { \
   1968      V128 block[4]; \
   1969      randBlock_Floats(&block[0], 3); \
   1970      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1971      showBlock("FCMPE_S_Z before", &block[0], 4); \
   1972      __asm__ __volatile__( \
   1973         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1974         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1975         "fcmpe s29, #0; " \
   1976         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1977         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1978         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   1979      ); \
   1980      showBlock("FCMPE_S_Z after", &block[0], 4); \
   1981      printf("\n"); \
   1982   }
   1983 
   1984 //======== FCSEL_D_D_D_EQ ========//
   1985 
   1986 #define GEN_test_FCSEL_D_D_D_EQ \
   1987   __attribute__((noinline)) static void test_FCSEL_D_D_D_EQ ( void ) \
   1988   { \
   1989      V128 block[4]; \
   1990      randBlock_Doubles(&block[0], 3); \
   1991      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   1992      showBlock("FCSEL_D_D_D_EQ before", &block[0], 4); \
   1993      __asm__ __volatile__( \
   1994         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   1995         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   1996         "fcsel d29, d11, d9, eq; " \
   1997         "mrs x9, nzcv; str x9, [%0, 48]; " \
   1998         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   1999         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2000      ); \
   2001      showBlock("FCSEL_D_D_D_EQ after", &block[0], 4); \
   2002      printf("\n"); \
   2003   }
   2004 
   2005 //======== FCSEL_D_D_D_NE ========//
   2006 
   2007 #define GEN_test_FCSEL_D_D_D_NE \
   2008   __attribute__((noinline)) static void test_FCSEL_D_D_D_NE ( void ) \
   2009   { \
   2010      V128 block[4]; \
   2011      randBlock_Doubles(&block[0], 3); \
   2012      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2013      showBlock("FCSEL_D_D_D_NE before", &block[0], 4); \
   2014      __asm__ __volatile__( \
   2015         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2016         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2017         "fcsel d29, d11, d9, ne; " \
   2018         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2019         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2020         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2021      ); \
   2022      showBlock("FCSEL_D_D_D_NE after", &block[0], 4); \
   2023      printf("\n"); \
   2024   }
   2025 
   2026 //======== FCSEL_S_S_S_EQ ========//
   2027 
   2028 #define GEN_test_FCSEL_S_S_S_EQ \
   2029   __attribute__((noinline)) static void test_FCSEL_S_S_S_EQ ( void ) \
   2030   { \
   2031      V128 block[4]; \
   2032      randBlock_Doubles(&block[0], 3); \
   2033      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2034      showBlock("FCSEL_S_S_S_EQ before", &block[0], 4); \
   2035      __asm__ __volatile__( \
   2036         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2037         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2038         "fcsel s29, s11, s9, eq; " \
   2039         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2040         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2041         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2042      ); \
   2043      showBlock("FCSEL_S_S_S_EQ after", &block[0], 4); \
   2044      printf("\n"); \
   2045   }
   2046 
   2047 //======== FCSEL_S_S_S_NE ========//
   2048 
   2049 #define GEN_test_FCSEL_S_S_S_NE \
   2050   __attribute__((noinline)) static void test_FCSEL_S_S_S_NE ( void ) \
   2051   { \
   2052      V128 block[4]; \
   2053      randBlock_Doubles(&block[0], 3); \
   2054      block[3].u64[0] = dup4x16(0x5); block[3].u64[1] = dup4x16(0xA); \
   2055      showBlock("FCSEL_S_S_S_NE before", &block[0], 4); \
   2056      __asm__ __volatile__( \
   2057         "ldr x9,  [%0, 48];  msr nzcv, x9; " \
   2058         "ldr q29, [%0, #0];  ldr q11, [%0, #16];  ldr q9, [%0, #32]; " \
   2059         "fcsel s29, s11, s9, ne; " \
   2060         "mrs x9, nzcv; str x9, [%0, 48]; " \
   2061         "str q29, [%0, #0];  str q11, [%0, #16];  str q9, [%0, #32]; " \
   2062         ::"r"(&block[0]) : "x9","cc","memory","v9","v11","v29" \
   2063      ); \
   2064      showBlock("FCSEL_S_S_S_NE after", &block[0], 4); \
   2065      printf("\n"); \
   2066   }
   2067 
   2068 
   2069 /* ---------------------------------------------------------------- */
   2070 /* -- Tests, in the same order that they appear in main()        -- */
   2071 /* ---------------------------------------------------------------- */
   2072 
   2073 // ======================== FP ========================
   2074 
   2075 GEN_TWOVEC_TEST(fabs_d_d,   "fabs d22,    d23",    22, 23)
   2076 GEN_TWOVEC_TEST(fabs_s_s,   "fabs s22,    s23",    22, 23)
   2077 GEN_TWOVEC_TEST(fabs_2d_2d, "fabs v22.2d, v23.2d", 22, 23)
   2078 GEN_TWOVEC_TEST(fabs_4s_4s, "fabs v22.4s, v23.4s", 22, 23)
   2079 GEN_TWOVEC_TEST(fabs_2s_2s, "fabs v22.2s, v23.2s", 22, 23)
   2080 
   2081 GEN_TWOVEC_TEST(fneg_d_d,   "fneg d22, d23",       22, 23)
   2082 GEN_TWOVEC_TEST(fneg_s_s,   "fneg s22, s23",       22, 23)
   2083 GEN_TWOVEC_TEST(fneg_2d_2d, "fneg v22.2d, v23.2d", 22, 23)
   2084 GEN_TWOVEC_TEST(fneg_4s_4s, "fneg v22.4s, v23.4s", 22, 23)
   2085 GEN_TWOVEC_TEST(fneg_2s_2s, "fneg v22.2s, v23.2s", 22, 23)
   2086 
   2087 GEN_TWOVEC_TEST(fsqrt_d_d,   "fsqrt d22, d23",       22, 23)
   2088 GEN_TWOVEC_TEST(fsqrt_s_s,   "fsqrt s22, s23",       22, 23)
   2089 GEN_TWOVEC_TEST(fsqrt_2d_2d, "fsqrt v22.2d, v23.2d", 22, 23)
   2090 GEN_TWOVEC_TEST(fsqrt_4s_4s, "fsqrt v22.4s, v23.4s", 22, 23)
   2091 GEN_TWOVEC_TEST(fsqrt_2s_2s, "fsqrt v22.2s, v23.2s", 22, 23)
   2092 
   2093 GEN_THREEVEC_TEST(fadd_d_d_d,  "fadd d2, d11, d29", 2, 11, 29)
   2094 GEN_THREEVEC_TEST(fadd_s_s_s,  "fadd s2, s11, s29", 2, 11, 29)
   2095 GEN_THREEVEC_TEST(fsub_d_d_d,  "fsub d2, d11, d29", 2, 11, 29)
   2096 GEN_THREEVEC_TEST(fsub_s_s_s,  "fsub s2, s11, s29", 2, 11, 29)
   2097 
   2098 GEN_BINARY_TEST(fadd, 2d, 2d, 2d)
   2099 GEN_BINARY_TEST(fadd, 4s, 4s, 4s)
   2100 GEN_BINARY_TEST(fadd, 2s, 2s, 2s)
   2101 GEN_BINARY_TEST(fsub, 2d, 2d, 2d)
   2102 GEN_BINARY_TEST(fsub, 4s, 4s, 4s)
   2103 GEN_BINARY_TEST(fsub, 2s, 2s, 2s)
   2104 
   2105 GEN_THREEVEC_TEST(fabd_d_d_d,  "fabd d2, d11, d29", 2, 11, 29)
   2106 GEN_THREEVEC_TEST(fabd_s_s_s,  "fabd s2, s11, s29", 2, 11, 29)
   2107 GEN_BINARY_TEST(fabd, 2d, 2d, 2d)
   2108 GEN_BINARY_TEST(fabd, 4s, 4s, 4s)
   2109 GEN_BINARY_TEST(fabd, 2s, 2s, 2s)
   2110 
   2111 GEN_TWOVEC_TEST(faddp_d_2d,     "faddp d2, v23.2d",    2, 23)
   2112 GEN_TWOVEC_TEST(faddp_s_2s,     "faddp s2, v23.2s",    2, 23)
   2113 GEN_THREEVEC_TEST(faddp_2d_2d_2d, "faddp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2114 GEN_THREEVEC_TEST(faddp_4s_4s_4s, "faddp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2115 GEN_THREEVEC_TEST(faddp_2s_2s_2s, "faddp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2116 
   2117 GEN_test_FCCMP_D_D_0xF_EQ
   2118 GEN_test_FCCMP_D_D_0xF_NE
   2119 GEN_test_FCCMP_D_D_0x0_EQ
   2120 GEN_test_FCCMP_D_D_0x0_NE
   2121 GEN_test_FCCMP_S_S_0xF_EQ
   2122 GEN_test_FCCMP_S_S_0xF_NE
   2123 GEN_test_FCCMP_S_S_0x0_EQ
   2124 GEN_test_FCCMP_S_S_0x0_NE
   2125 GEN_test_FCCMPE_D_D_0xF_EQ
   2126 GEN_test_FCCMPE_D_D_0xF_NE
   2127 GEN_test_FCCMPE_D_D_0x0_EQ
   2128 GEN_test_FCCMPE_D_D_0x0_NE
   2129 GEN_test_FCCMPE_S_S_0xF_EQ
   2130 GEN_test_FCCMPE_S_S_0xF_NE
   2131 GEN_test_FCCMPE_S_S_0x0_EQ
   2132 GEN_test_FCCMPE_S_S_0x0_NE
   2133 
   2134 GEN_test_FCMEQ_D_D_D
   2135 GEN_test_FCMEQ_S_S_S
   2136 GEN_test_FCMGE_D_D_D
   2137 GEN_test_FCMGE_S_S_S
   2138 GEN_test_FCMGT_D_D_D
   2139 GEN_test_FCMGT_S_S_S
   2140 GEN_test_FACGT_D_D_D
   2141 GEN_test_FACGT_S_S_S
   2142 GEN_test_FACGE_D_D_D
   2143 GEN_test_FACGE_S_S_S
   2144 
   2145 GEN_THREEVEC_TEST(fcmeq_2d_2d_2d, "fcmeq v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2146 GEN_THREEVEC_TEST(fcmeq_4s_4s_4s, "fcmeq v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2147 GEN_THREEVEC_TEST(fcmeq_2s_2s_2s, "fcmeq v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2148 GEN_THREEVEC_TEST(fcmge_2d_2d_2d, "fcmge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2149 GEN_THREEVEC_TEST(fcmge_4s_4s_4s, "fcmge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2150 GEN_THREEVEC_TEST(fcmge_2s_2s_2s, "fcmge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2151 GEN_THREEVEC_TEST(fcmgt_2d_2d_2d, "fcmgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2152 GEN_THREEVEC_TEST(fcmgt_4s_4s_4s, "fcmgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2153 GEN_THREEVEC_TEST(fcmgt_2s_2s_2s, "fcmgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2154 GEN_THREEVEC_TEST(facge_2d_2d_2d, "facge v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2155 GEN_THREEVEC_TEST(facge_4s_4s_4s, "facge v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2156 GEN_THREEVEC_TEST(facge_2s_2s_2s, "facge v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2157 GEN_THREEVEC_TEST(facgt_2d_2d_2d, "facgt v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2158 GEN_THREEVEC_TEST(facgt_4s_4s_4s, "facgt v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2159 GEN_THREEVEC_TEST(facgt_2s_2s_2s, "facgt v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2160 
   2161 GEN_test_FCMEQ_D_D_Z
   2162 GEN_test_FCMEQ_S_S_Z
   2163 GEN_test_FCMGE_D_D_Z
   2164 GEN_test_FCMGE_S_S_Z
   2165 GEN_test_FCMGT_D_D_Z
   2166 GEN_test_FCMGT_S_S_Z
   2167 GEN_test_FCMLE_D_D_Z
   2168 GEN_test_FCMLE_S_S_Z
   2169 GEN_test_FCMLT_D_D_Z
   2170 GEN_test_FCMLT_S_S_Z
   2171 
   2172 GEN_TWOVEC_TEST(fcmeq_z_2d_2d, "fcmeq v2.2d, v23.2d, #0", 2, 23)
   2173 GEN_TWOVEC_TEST(fcmeq_z_4s_4s, "fcmeq v2.4s, v23.4s, #0", 2, 23)
   2174 GEN_TWOVEC_TEST(fcmeq_z_2s_2s, "fcmeq v2.2s, v23.2s, #0", 2, 23)
   2175 GEN_TWOVEC_TEST(fcmge_z_2d_2d, "fcmge v2.2d, v23.2d, #0", 2, 23)
   2176 GEN_TWOVEC_TEST(fcmge_z_4s_4s, "fcmge v2.4s, v23.4s, #0", 2, 23)
   2177 GEN_TWOVEC_TEST(fcmge_z_2s_2s, "fcmge v2.2s, v23.2s, #0", 2, 23)
   2178 GEN_TWOVEC_TEST(fcmgt_z_2d_2d, "fcmgt v2.2d, v23.2d, #0", 2, 23)
   2179 GEN_TWOVEC_TEST(fcmgt_z_4s_4s, "fcmgt v2.4s, v23.4s, #0", 2, 23)
   2180 GEN_TWOVEC_TEST(fcmgt_z_2s_2s, "fcmgt v2.2s, v23.2s, #0", 2, 23)
   2181 GEN_TWOVEC_TEST(fcmle_z_2d_2d, "fcmle v2.2d, v23.2d, #0", 2, 23)
   2182 GEN_TWOVEC_TEST(fcmle_z_4s_4s, "fcmle v2.4s, v23.4s, #0", 2, 23)
   2183 GEN_TWOVEC_TEST(fcmle_z_2s_2s, "fcmle v2.2s, v23.2s, #0", 2, 23)
   2184 GEN_TWOVEC_TEST(fcmlt_z_2d_2d, "fcmlt v2.2d, v23.2d, #0", 2, 23)
   2185 GEN_TWOVEC_TEST(fcmlt_z_4s_4s, "fcmlt v2.4s, v23.4s, #0", 2, 23)
   2186 GEN_TWOVEC_TEST(fcmlt_z_2s_2s, "fcmlt v2.2s, v23.2s, #0", 2, 23)
   2187 
   2188 GEN_test_FCMP_D_Z
   2189 GEN_test_FCMP_S_Z
   2190 GEN_test_FCMPE_D_Z
   2191 GEN_test_FCMPE_S_Z
   2192 GEN_test_FCMP_D_D
   2193 GEN_test_FCMP_S_S
   2194 GEN_test_FCMPE_D_D
   2195 GEN_test_FCMPE_S_S
   2196 
   2197 GEN_test_FCSEL_D_D_D_EQ
   2198 GEN_test_FCSEL_D_D_D_NE
   2199 GEN_test_FCSEL_S_S_S_EQ
   2200 GEN_test_FCSEL_S_S_S_NE
   2201 
   2202 GEN_THREEVEC_TEST(fdiv_d_d_d,  "fdiv d2, d11, d29", 2, 11, 29)
   2203 GEN_THREEVEC_TEST(fdiv_s_s_s,  "fdiv s2, s11, s29", 2, 11, 29)
   2204 GEN_BINARY_TEST(fdiv, 2d, 2d, 2d)
   2205 GEN_BINARY_TEST(fdiv, 4s, 4s, 4s)
   2206 GEN_BINARY_TEST(fdiv, 2s, 2s, 2s)
   2207 
   2208 GEN_FOURVEC_TEST(fmadd_d_d_d_d,  "fmadd  d2, d11, d29, d3", 2, 11, 29, 3)
   2209 GEN_FOURVEC_TEST(fmadd_s_s_s_s,  "fmadd  s2, s11, s29, s3", 2, 11, 29, 3)
   2210 GEN_FOURVEC_TEST(fnmadd_d_d_d_d, "fnmadd d2, d11, d29, d3", 2, 11, 29, 3)
   2211 GEN_FOURVEC_TEST(fnmadd_s_s_s_s, "fnmadd s2, s11, s29, s3", 2, 11, 29, 3)
   2212 GEN_FOURVEC_TEST(fmsub_d_d_d_d,  "fmsub  d2, d11, d29, d3", 2, 11, 29, 3)
   2213 GEN_FOURVEC_TEST(fmsub_s_s_s_s,  "fmsub  s2, s11, s29, s3", 2, 11, 29, 3)
   2214 GEN_FOURVEC_TEST(fnmsub_d_d_d_d, "fnmsub d2, d11, d29, d3", 2, 11, 29, 3)
   2215 GEN_FOURVEC_TEST(fnmsub_s_s_s_s, "fnmsub s2, s11, s29, s3", 2, 11, 29, 3)
   2216 
   2217 GEN_THREEVEC_TEST(fnmul_d_d_d, "fnmul d2, d11, d29", 2, 11, 29)
   2218 GEN_THREEVEC_TEST(fnmul_s_s_s, "fnmul s2, s11, s29", 2, 11, 29)
   2219 
   2220 GEN_THREEVEC_TEST(fmax_d_d_d,  "fmax d2, d11, d29", 2, 11, 29)
   2221 GEN_THREEVEC_TEST(fmax_s_s_s,  "fmax s2, s11, s29", 2, 11, 29)
   2222 GEN_THREEVEC_TEST(fmin_d_d_d,  "fmin d2, d11, d29", 2, 11, 29)
   2223 GEN_THREEVEC_TEST(fmin_s_s_s,  "fmin s2, s11, s29", 2, 11, 29)
   2224 GEN_THREEVEC_TEST(fmaxnm_d_d_d,  "fmaxnm d2, d11, d29", 2, 11, 29)
   2225 GEN_THREEVEC_TEST(fmaxnm_s_s_s,  "fmaxnm s2, s11, s29", 2, 11, 29)
   2226 GEN_THREEVEC_TEST(fminnm_d_d_d,  "fminnm d2, d11, d29", 2, 11, 29)
   2227 GEN_THREEVEC_TEST(fminnm_s_s_s,  "fminnm s2, s11, s29", 2, 11, 29)
   2228 
   2229 GEN_THREEVEC_TEST(fmax_2d_2d_2d, "fmax v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2230 GEN_THREEVEC_TEST(fmax_4s_4s_4s, "fmax v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2231 GEN_THREEVEC_TEST(fmax_2s_2s_2s, "fmax v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2232 GEN_THREEVEC_TEST(fmin_2d_2d_2d, "fmin v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2233 GEN_THREEVEC_TEST(fmin_4s_4s_4s, "fmin v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2234 GEN_THREEVEC_TEST(fmin_2s_2s_2s, "fmin v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2235 GEN_THREEVEC_TEST(fmaxnm_2d_2d_2d, "fmaxnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2236 GEN_THREEVEC_TEST(fmaxnm_4s_4s_4s, "fmaxnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2237 GEN_THREEVEC_TEST(fmaxnm_2s_2s_2s, "fmaxnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2238 GEN_THREEVEC_TEST(fminnm_2d_2d_2d, "fminnm v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2239 GEN_THREEVEC_TEST(fminnm_4s_4s_4s, "fminnm v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2240 GEN_THREEVEC_TEST(fminnm_2s_2s_2s, "fminnm v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2241 
   2242 GEN_TWOVEC_TEST(fmaxnmp_d_2d, "fmaxnmp d2, v23.2d", 2, 23)
   2243 GEN_TWOVEC_TEST(fmaxnmp_s_2s, "fmaxnmp s2, v23.2s", 2, 23)
   2244 GEN_TWOVEC_TEST(fminnmp_d_2d, "fminnmp d2, v23.2d", 2, 23)
   2245 GEN_TWOVEC_TEST(fminnmp_s_2s, "fminnmp s2, v23.2s", 2, 23)
   2246 
   2247 GEN_THREEVEC_TEST(fmaxnmp_2d_2d_2d, "fmaxnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2248 GEN_THREEVEC_TEST(fmaxnmp_4s_4s_4s, "fmaxnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2249 GEN_THREEVEC_TEST(fmaxnmp_2s_2s_2s, "fmaxnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2250 GEN_THREEVEC_TEST(fminnmp_2d_2d_2d, "fminnmp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2251 GEN_THREEVEC_TEST(fminnmp_4s_4s_4s, "fminnmp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2252 GEN_THREEVEC_TEST(fminnmp_2s_2s_2s, "fminnmp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2253 
   2254 GEN_TWOVEC_TEST(fmaxnmv_s_4s, "fmaxnmv s2, v23.4s", 2, 23)
   2255 GEN_TWOVEC_TEST(fminnmv_s_4s, "fminnmv s2, v23.4s", 2, 23)
   2256 
   2257 GEN_TWOVEC_TEST(fmaxp_d_2d, "fmaxp d2, v23.2d", 2, 23)
   2258 GEN_TWOVEC_TEST(fmaxp_s_2s, "fmaxp s2, v23.2s", 2, 23)
   2259 GEN_TWOVEC_TEST(fminp_d_2d, "fminp d2, v23.2d", 2, 23)
   2260 GEN_TWOVEC_TEST(fminp_s_2s, "fminp s2, v23.2s", 2, 23)
   2261 
   2262 GEN_THREEVEC_TEST(fmaxp_2d_2d_2d, "fmaxp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2263 GEN_THREEVEC_TEST(fmaxp_4s_4s_4s, "fmaxp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2264 GEN_THREEVEC_TEST(fmaxp_2s_2s_2s, "fmaxp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2265 GEN_THREEVEC_TEST(fminp_2d_2d_2d, "fminp v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2266 GEN_THREEVEC_TEST(fminp_4s_4s_4s, "fminp v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2267 GEN_THREEVEC_TEST(fminp_2s_2s_2s, "fminp v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2268 
   2269 GEN_TWOVEC_TEST(fmaxv_s_4s, "fmaxv s2, v23.4s", 2, 23)
   2270 GEN_TWOVEC_TEST(fminv_s_4s, "fminv s2, v23.4s", 2, 23)
   2271 
   2272 GEN_THREEVEC_TEST(fmla_2d_2d_2d, "fmla v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2273 GEN_THREEVEC_TEST(fmla_4s_4s_4s, "fmla v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2274 GEN_THREEVEC_TEST(fmla_2s_2s_2s, "fmla v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2275 GEN_THREEVEC_TEST(fmls_2d_2d_2d, "fmls v2.2d, v23.2d, v11.2d", 2, 23, 11)
   2276 GEN_THREEVEC_TEST(fmls_4s_4s_4s, "fmls v2.4s, v23.4s, v11.4s", 2, 23, 11)
   2277 GEN_THREEVEC_TEST(fmls_2s_2s_2s, "fmls v2.2s, v23.2s, v11.2s", 2, 23, 11)
   2278 
   2279 GEN_THREEVEC_TEST(fmla_d_d_d0, "fmla d2, d11, v29.d[0]", 2, 11, 29)
   2280 GEN_THREEVEC_TEST(fmla_d_d_d1, "fmla d2, d11, v29.d[1]", 2, 11, 29)
   2281 GEN_THREEVEC_TEST(fmla_s_s_s0, "fmla s2, s11, v29.s[0]", 2, 11, 29)
   2282 GEN_THREEVEC_TEST(fmla_s_s_s3, "fmla s2, s11, v29.s[3]", 2, 11, 29)
   2283 GEN_THREEVEC_TEST(fmls_d_d_d0, "fmls d2, d11, v29.d[0]", 2, 11, 29)
   2284 GEN_THREEVEC_TEST(fmls_d_d_d1, "fmls d2, d11, v29.d[1]", 2, 11, 29)
   2285 GEN_THREEVEC_TEST(fmls_s_s_s0, "fmls s2, s11, v29.s[0]", 2, 11, 29)
   2286 GEN_THREEVEC_TEST(fmls_s_s_s3, "fmls s2, s11, v29.s[3]", 2, 11, 29)
   2287 
   2288 GEN_THREEVEC_TEST(fmla_2d_2d_d0, "fmla v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2289 GEN_THREEVEC_TEST(fmla_2d_2d_d1, "fmla v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2290 GEN_THREEVEC_TEST(fmla_4s_4s_s0, "fmla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2291 GEN_THREEVEC_TEST(fmla_4s_4s_s3, "fmla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2292 GEN_THREEVEC_TEST(fmla_2s_2s_s0, "fmla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2293 GEN_THREEVEC_TEST(fmla_2s_2s_s3, "fmla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2294 
   2295 GEN_THREEVEC_TEST(fmls_2d_2d_d0, "fmls v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2296 GEN_THREEVEC_TEST(fmls_2d_2d_d1, "fmls v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2297 GEN_THREEVEC_TEST(fmls_4s_4s_s0, "fmls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2298 GEN_THREEVEC_TEST(fmls_4s_4s_s3, "fmls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2299 GEN_THREEVEC_TEST(fmls_2s_2s_s0, "fmls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2300 GEN_THREEVEC_TEST(fmls_2s_2s_s3, "fmls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2301 
   2302 GEN_TWOVEC_TEST(fmov_2d_imm_01, "fmov v22.2d, #0.125", 22, 23)
   2303 GEN_TWOVEC_TEST(fmov_2d_imm_02, "fmov v22.2d, #-4.0",  22, 23)
   2304 GEN_TWOVEC_TEST(fmov_2d_imm_03, "fmov v22.2d, #1.0",   22, 23)
   2305 GEN_TWOVEC_TEST(fmov_4s_imm_01, "fmov v22.4s, #0.125", 22, 23)
   2306 GEN_TWOVEC_TEST(fmov_4s_imm_02, "fmov v22.4s, #-4.0",  22, 23)
   2307 GEN_TWOVEC_TEST(fmov_4s_imm_03, "fmov v22.4s, #1.0",   22, 23)
   2308 GEN_TWOVEC_TEST(fmov_2s_imm_01, "fmov v22.2s, #0.125", 22, 23)
   2309 GEN_TWOVEC_TEST(fmov_2s_imm_02, "fmov v22.2s, #-4.0",  22, 23)
   2310 GEN_TWOVEC_TEST(fmov_2s_imm_03, "fmov v22.2s, #1.0",   22, 23)
   2311 
   2312 GEN_TWOVEC_TEST(fmov_d_d,  "fmov d22, d23",   22, 23)
   2313 GEN_TWOVEC_TEST(fmov_s_s,  "fmov s22, s23",   22, 23)
   2314 
   2315 GEN_ONEINT_ONEVEC_TEST(fmov_s_w,  "fmov s7,      w15", 15, 7)
   2316 GEN_ONEINT_ONEVEC_TEST(fmov_d_x,  "fmov d7,      x15", 15, 7)
   2317 GEN_ONEINT_ONEVEC_TEST(fmov_d1_x, "fmov v7.d[1], x15", 15, 7)
   2318 GEN_ONEINT_ONEVEC_TEST(fmov_w_s,  "fmov w15,      s7", 15, 7)
   2319 GEN_ONEINT_ONEVEC_TEST(fmov_x_d,  "fmov x15,      d7", 15, 7)
   2320 GEN_ONEINT_ONEVEC_TEST(fmov_x_d1, "fmov x15, v7.d[1]", 15, 7)
   2321 
   2322 /* overkill -- don't need two vecs, only one */
   2323 GEN_TWOVEC_TEST(fmov_d_imm_01, "fmov d22, #0.125", 22, 23)
   2324 GEN_TWOVEC_TEST(fmov_d_imm_02, "fmov d22, #-4.0",  22, 23)
   2325 GEN_TWOVEC_TEST(fmov_d_imm_03, "fmov d22, #1.0",   22, 23)
   2326 GEN_TWOVEC_TEST(fmov_s_imm_01, "fmov s22, #0.125", 22, 23)
   2327 GEN_TWOVEC_TEST(fmov_s_imm_02, "fmov s22, #-4.0",  22, 23)
   2328 GEN_TWOVEC_TEST(fmov_s_imm_03, "fmov s22, #-1.0",   22, 23)
   2329 
   2330 GEN_THREEVEC_TEST(fmul_d_d_d0, "fmul d2, d11, v29.d[0]", 2, 11, 29)
   2331 GEN_THREEVEC_TEST(fmul_d_d_d1, "fmul d2, d11, v29.d[1]", 2, 11, 29)
   2332 GEN_THREEVEC_TEST(fmul_s_s_s0, "fmul s2, s11, v29.s[0]", 2, 11, 29)
   2333 GEN_THREEVEC_TEST(fmul_s_s_s3, "fmul s2, s11, v29.s[3]", 2, 11, 29)
   2334 
   2335 GEN_THREEVEC_TEST(fmul_2d_2d_d0, "fmul v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2336 GEN_THREEVEC_TEST(fmul_2d_2d_d1, "fmul v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2337 GEN_THREEVEC_TEST(fmul_4s_4s_s0, "fmul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2338 GEN_THREEVEC_TEST(fmul_4s_4s_s3, "fmul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2339 GEN_THREEVEC_TEST(fmul_2s_2s_s0, "fmul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2340 GEN_THREEVEC_TEST(fmul_2s_2s_s3, "fmul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2341 
   2342 GEN_THREEVEC_TEST(fmul_d_d_d,    "fmul d2, d11, d29", 2, 11, 29)
   2343 GEN_THREEVEC_TEST(fmul_s_s_s,    "fmul s2, s11, s29", 2, 11, 29)
   2344 GEN_THREEVEC_TEST(fmul_2d_2d_2d, "fmul v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2345 GEN_THREEVEC_TEST(fmul_4s_4s_4s, "fmul v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2346 GEN_THREEVEC_TEST(fmul_2s_2s_2s, "fmul v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2347 
   2348 GEN_THREEVEC_TEST(fmulx_d_d_d0, "fmulx d2, d11, v29.d[0]", 2, 11, 29)
   2349 GEN_THREEVEC_TEST(fmulx_d_d_d1, "fmulx d2, d11, v29.d[1]", 2, 11, 29)
   2350 GEN_THREEVEC_TEST(fmulx_s_s_s0, "fmulx s2, s11, v29.s[0]", 2, 11, 29)
   2351 GEN_THREEVEC_TEST(fmulx_s_s_s3, "fmulx s2, s11, v29.s[3]", 2, 11, 29)
   2352 GEN_THREEVEC_TEST(fmulx_2d_2d_d0, "fmulx v2.2d, v11.2d, v29.d[0]", 2, 11, 29)
   2353 GEN_THREEVEC_TEST(fmulx_2d_2d_d1, "fmulx v2.2d, v11.2d, v29.d[1]", 2, 11, 29)
   2354 GEN_THREEVEC_TEST(fmulx_4s_4s_s0, "fmulx v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   2355 GEN_THREEVEC_TEST(fmulx_4s_4s_s3, "fmulx v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   2356 GEN_THREEVEC_TEST(fmulx_2s_2s_s0, "fmulx v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   2357 GEN_THREEVEC_TEST(fmulx_2s_2s_s3, "fmulx v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   2358 
   2359 GEN_THREEVEC_TEST(fmulx_d_d_d,    "fmulx d2, d11, d29", 2, 11, 29)
   2360 GEN_THREEVEC_TEST(fmulx_s_s_s,    "fmulx s2, s11, s29", 2, 11, 29)
   2361 GEN_THREEVEC_TEST(fmulx_2d_2d_2d, "fmulx v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2362 GEN_THREEVEC_TEST(fmulx_4s_4s_4s, "fmulx v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2363 GEN_THREEVEC_TEST(fmulx_2s_2s_2s, "fmulx v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2364 
   2365 GEN_TWOVEC_TEST(frecpe_d_d,   "frecpe d22, d23",       22, 23)
   2366 GEN_TWOVEC_TEST(frecpe_s_s,   "frecpe s22, s23",       22, 23)
   2367 GEN_TWOVEC_TEST(frecpe_2d_2d, "frecpe v22.2d, v23.2d", 22, 23)
   2368 GEN_TWOVEC_TEST(frecpe_4s_4s, "frecpe v22.4s, v23.4s", 22, 23)
   2369 GEN_TWOVEC_TEST(frecpe_2s_2s, "frecpe v22.2s, v23.2s", 22, 23)
   2370 
   2371 GEN_THREEVEC_TEST(frecps_d_d_d,    "frecps d2, d11, d29", 2, 11, 29)
   2372 GEN_THREEVEC_TEST(frecps_s_s_s,    "frecps s2, s11, s29", 2, 11, 29)
   2373 GEN_THREEVEC_TEST(frecps_2d_2d_2d, "frecps v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2374 GEN_THREEVEC_TEST(frecps_4s_4s_4s, "frecps v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2375 GEN_THREEVEC_TEST(frecps_2s_2s_2s, "frecps v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2376 
   2377 GEN_TWOVEC_TEST(frecpx_d_d,   "frecpx d22, d23",       22, 23)
   2378 GEN_TWOVEC_TEST(frecpx_s_s,   "frecpx s22, s23",       22, 23)
   2379 
   2380 GEN_TWOVEC_TEST(frinta_d_d,   "frinta d22, d23",       22, 23)
   2381 GEN_TWOVEC_TEST(frinta_s_s,   "frinta s22, s23",       22, 23)
   2382 GEN_TWOVEC_TEST(frinti_d_d,   "frinti d22, d23",       22, 23)
   2383 GEN_TWOVEC_TEST(frinti_s_s,   "frinti s22, s23",       22, 23)
   2384 GEN_TWOVEC_TEST(frintm_d_d,   "frintm d22, d23",       22, 23)
   2385 GEN_TWOVEC_TEST(frintm_s_s,   "frintm s22, s23",       22, 23)
   2386 GEN_TWOVEC_TEST(frintn_d_d,   "frintn d22, d23",       22, 23)
   2387 GEN_TWOVEC_TEST(frintn_s_s,   "frintn s22, s23",       22, 23)
   2388 GEN_TWOVEC_TEST(frintp_d_d,   "frintp d22, d23",       22, 23)
   2389 GEN_TWOVEC_TEST(frintp_s_s,   "frintp s22, s23",       22, 23)
   2390 GEN_TWOVEC_TEST(frintx_d_d,   "frintx d22, d23",       22, 23)
   2391 GEN_TWOVEC_TEST(frintx_s_s,   "frintx s22, s23",       22, 23)
   2392 GEN_TWOVEC_TEST(frintz_d_d,   "frintz d22, d23",       22, 23)
   2393 GEN_TWOVEC_TEST(frintz_s_s,   "frintz s22, s23",       22, 23)
   2394 
   2395 GEN_TWOVEC_TEST(frinta_2d_2d, "frinta v2.2d, v11.2d", 2, 11)
   2396 GEN_TWOVEC_TEST(frinta_4s_4s, "frinta v2.4s, v11.4s", 2, 11)
   2397 GEN_TWOVEC_TEST(frinta_2s_2s, "frinta v2.2s, v11.2s", 2, 11)
   2398 GEN_TWOVEC_TEST(frinti_2d_2d, "frinti v2.2d, v11.2d", 2, 11)
   2399 GEN_TWOVEC_TEST(frinti_4s_4s, "frinti v2.4s, v11.4s", 2, 11)
   2400 GEN_TWOVEC_TEST(frinti_2s_2s, "frinti v2.2s, v11.2s", 2, 11)
   2401 GEN_TWOVEC_TEST(frintm_2d_2d, "frintm v2.2d, v11.2d", 2, 11)
   2402 GEN_TWOVEC_TEST(frintm_4s_4s, "frintm v2.4s, v11.4s", 2, 11)
   2403 GEN_TWOVEC_TEST(frintm_2s_2s, "frintm v2.2s, v11.2s", 2, 11)
   2404 GEN_TWOVEC_TEST(frintn_2d_2d, "frintn v2.2d, v11.2d", 2, 11)
   2405 GEN_TWOVEC_TEST(frintn_4s_4s, "frintn v2.4s, v11.4s", 2, 11)
   2406 GEN_TWOVEC_TEST(frintn_2s_2s, "frintn v2.2s, v11.2s", 2, 11)
   2407 GEN_TWOVEC_TEST(frintp_2d_2d, "frintp v2.2d, v11.2d", 2, 11)
   2408 GEN_TWOVEC_TEST(frintp_4s_4s, "frintp v2.4s, v11.4s", 2, 11)
   2409 GEN_TWOVEC_TEST(frintp_2s_2s, "frintp v2.2s, v11.2s", 2, 11)
   2410 GEN_TWOVEC_TEST(frintx_2d_2d, "frintx v2.2d, v11.2d", 2, 11)
   2411 GEN_TWOVEC_TEST(frintx_4s_4s, "frintx v2.4s, v11.4s", 2, 11)
   2412 GEN_TWOVEC_TEST(frintx_2s_2s, "frintx v2.2s, v11.2s", 2, 11)
   2413 GEN_TWOVEC_TEST(frintz_2d_2d, "frintz v2.2d, v11.2d", 2, 11)
   2414 GEN_TWOVEC_TEST(frintz_4s_4s, "frintz v2.4s, v11.4s", 2, 11)
   2415 GEN_TWOVEC_TEST(frintz_2s_2s, "frintz v2.2s, v11.2s", 2, 11)
   2416 
   2417 GEN_TWOVEC_TEST(frsqrte_d_d,   "frsqrte d22, d23",       22, 23)
   2418 GEN_TWOVEC_TEST(frsqrte_s_s,   "frsqrte s22, s23",       22, 23)
   2419 GEN_TWOVEC_TEST(frsqrte_2d_2d, "frsqrte v22.2d, v23.2d", 22, 23)
   2420 GEN_TWOVEC_TEST(frsqrte_4s_4s, "frsqrte v22.4s, v23.4s", 22, 23)
   2421 GEN_TWOVEC_TEST(frsqrte_2s_2s, "frsqrte v22.2s, v23.2s", 22, 23)
   2422 
   2423 GEN_THREEVEC_TEST(frsqrts_d_d_d,    "frsqrts d2, d11, d29", 2, 11, 29)
   2424 GEN_THREEVEC_TEST(frsqrts_s_s_s,    "frsqrts s2, s11, s29", 2, 11, 29)
   2425 GEN_THREEVEC_TEST(frsqrts_2d_2d_2d, "frsqrts v2.2d, v11.2d, v29.2d", 2, 11, 29)
   2426 GEN_THREEVEC_TEST(frsqrts_4s_4s_4s, "frsqrts v2.4s, v11.4s, v29.4s", 2, 11, 29)
   2427 GEN_THREEVEC_TEST(frsqrts_2s_2s_2s, "frsqrts v2.2s, v11.2s, v29.2s", 2, 11, 29)
   2428 
   2429 // ======================== CONV ========================
   2430 
   2431 GEN_TWOVEC_TEST(fcvt_s_h, "fcvt s7, h16", 7, 16)
   2432 GEN_TWOVEC_TEST(fcvt_d_h, "fcvt d7, h16", 7, 16)
   2433 GEN_TWOVEC_TEST(fcvt_h_s, "fcvt h7, s16", 7, 16)
   2434 GEN_TWOVEC_TEST(fcvt_d_s, "fcvt d7, s16", 7, 16)
   2435 GEN_TWOVEC_TEST(fcvt_h_d, "fcvt h7, d16", 7, 16)
   2436 GEN_TWOVEC_TEST(fcvt_s_d, "fcvt s7, d16", 7, 16)
   2437 
   2438 GEN_TWOVEC_TEST(fcvtl_4s_4h, "fcvtl  v11.4s, v29.4h", 11, 29)
   2439 GEN_TWOVEC_TEST(fcvtl_4s_8h, "fcvtl2 v11.4s, v29.8h", 11, 29)
   2440 GEN_TWOVEC_TEST(fcvtl_2d_2s, "fcvtl  v11.2d, v29.2s", 11, 29)
   2441 GEN_TWOVEC_TEST(fcvtl_2d_4s, "fcvtl2 v11.2d, v29.4s", 11, 29)
   2442 
   2443 GEN_TWOVEC_TEST(fcvtn_4h_4s, "fcvtn  v22.4h, v23.4s", 22, 23)
   2444 GEN_TWOVEC_TEST(fcvtn_8h_4s, "fcvtn2 v22.8h, v23.4s", 22, 23)
   2445 GEN_TWOVEC_TEST(fcvtn_2s_2d, "fcvtn  v22.2s, v23.2d", 22, 23)
   2446 GEN_TWOVEC_TEST(fcvtn_4s_2d, "fcvtn2 v22.4s, v23.2d", 22, 23)
   2447 
   2448 GEN_TWOVEC_TEST(fcvtas_d_d,   "fcvtas d10, d21",       10, 21)
   2449 GEN_TWOVEC_TEST(fcvtau_d_d,   "fcvtau d21, d10",       21, 10)
   2450 GEN_TWOVEC_TEST(fcvtas_s_s,   "fcvtas s10, s21",       10, 21)
   2451 GEN_TWOVEC_TEST(fcvtau_s_s,   "fcvtau s21, s10",       21, 10)
   2452 GEN_TWOVEC_TEST(fcvtas_2d_2d, "fcvtas v10.2d, v21.2d", 10, 21)
   2453 GEN_TWOVEC_TEST(fcvtau_2d_2d, "fcvtau v10.2d, v21.2d", 10, 21)
   2454 GEN_TWOVEC_TEST(fcvtas_4s_4s, "fcvtas v10.4s, v21.4s", 10, 21)
   2455 GEN_TWOVEC_TEST(fcvtau_4s_4s, "fcvtau v10.4s, v21.4s", 10, 21)
   2456 GEN_TWOVEC_TEST(fcvtas_2s_2s, "fcvtas v10.2s, v21.2s", 10, 21)
   2457 GEN_TWOVEC_TEST(fcvtau_2s_2s, "fcvtau v10.2s, v21.2s", 10, 21)
   2458 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_s, "fcvtas w21, s10", 21, 10)
   2459 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_s, "fcvtau w21, s10", 21, 10)
   2460 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_s, "fcvtas x21, s10", 21, 10)
   2461 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_s, "fcvtau x21, s10", 21, 10)
   2462 GEN_ONEINT_ONEVEC_TEST(fcvtas_w_d, "fcvtas w21, d10", 21, 10)
   2463 GEN_ONEINT_ONEVEC_TEST(fcvtau_w_d, "fcvtau w21, d10", 21, 10)
   2464 GEN_ONEINT_ONEVEC_TEST(fcvtas_x_d, "fcvtas x21, d10", 21, 10)
   2465 GEN_ONEINT_ONEVEC_TEST(fcvtau_x_d, "fcvtau x21, d10", 21, 10)
   2466 
   2467 GEN_TWOVEC_TEST(fcvtms_d_d,   "fcvtms d10, d21",       10, 21)
   2468 GEN_TWOVEC_TEST(fcvtmu_d_d,   "fcvtmu d21, d10",       21, 10)
   2469 GEN_TWOVEC_TEST(fcvtms_s_s,   "fcvtms s10, s21",       10, 21)
   2470 GEN_TWOVEC_TEST(fcvtmu_s_s,   "fcvtmu s21, s10",       21, 10)
   2471 GEN_TWOVEC_TEST(fcvtms_2d_2d, "fcvtms v10.2d, v21.2d", 10, 21)
   2472 GEN_TWOVEC_TEST(fcvtmu_2d_2d, "fcvtmu v10.2d, v21.2d", 10, 21)
   2473 GEN_TWOVEC_TEST(fcvtms_4s_4s, "fcvtms v10.4s, v21.4s", 10, 21)
   2474 GEN_TWOVEC_TEST(fcvtmu_4s_4s, "fcvtmu v10.4s, v21.4s", 10, 21)
   2475 GEN_TWOVEC_TEST(fcvtms_2s_2s, "fcvtms v10.2s, v21.2s", 10, 21)
   2476 GEN_TWOVEC_TEST(fcvtmu_2s_2s, "fcvtmu v10.2s, v21.2s", 10, 21)
   2477 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_s, "fcvtms w21, s10", 21, 10)
   2478 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_s, "fcvtmu w21, s10", 21, 10)
   2479 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_s, "fcvtms x21, s10", 21, 10)
   2480 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_s, "fcvtmu x21, s10", 21, 10)
   2481 GEN_ONEINT_ONEVEC_TEST(fcvtms_w_d, "fcvtms w21, d10", 21, 10)
   2482 GEN_ONEINT_ONEVEC_TEST(fcvtmu_w_d, "fcvtmu w21, d10", 21, 10)
   2483 GEN_ONEINT_ONEVEC_TEST(fcvtms_x_d, "fcvtms x21, d10", 21, 10)
   2484 GEN_ONEINT_ONEVEC_TEST(fcvtmu_x_d, "fcvtmu x21, d10", 21, 10)
   2485 
   2486 GEN_TWOVEC_TEST(fcvtns_d_d,   "fcvtns d10, d21",       10, 21)
   2487 GEN_TWOVEC_TEST(fcvtnu_d_d,   "fcvtnu d21, d10",       21, 10)
   2488 GEN_TWOVEC_TEST(fcvtns_s_s,   "fcvtns s10, s21",       10, 21)
   2489 GEN_TWOVEC_TEST(fcvtnu_s_s,   "fcvtnu s21, s10",       21, 10)
   2490 GEN_TWOVEC_TEST(fcvtns_2d_2d, "fcvtns v10.2d, v21.2d", 10, 21)
   2491 GEN_TWOVEC_TEST(fcvtnu_2d_2d, "fcvtnu v10.2d, v21.2d", 10, 21)
   2492 GEN_TWOVEC_TEST(fcvtns_4s_4s, "fcvtns v10.4s, v21.4s", 10, 21)
   2493 GEN_TWOVEC_TEST(fcvtnu_4s_4s, "fcvtnu v10.4s, v21.4s", 10, 21)
   2494 GEN_TWOVEC_TEST(fcvtns_2s_2s, "fcvtns v10.2s, v21.2s", 10, 21)
   2495 GEN_TWOVEC_TEST(fcvtnu_2s_2s, "fcvtnu v10.2s, v21.2s", 10, 21)
   2496 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_s, "fcvtns w21, s10", 21, 10)
   2497 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_s, "fcvtnu w21, s10", 21, 10)
   2498 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_s, "fcvtns x21, s10", 21, 10)
   2499 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_s, "fcvtnu x21, s10", 21, 10)
   2500 GEN_ONEINT_ONEVEC_TEST(fcvtns_w_d, "fcvtns w21, d10", 21, 10)
   2501 GEN_ONEINT_ONEVEC_TEST(fcvtnu_w_d, "fcvtnu w21, d10", 21, 10)
   2502 GEN_ONEINT_ONEVEC_TEST(fcvtns_x_d, "fcvtns x21, d10", 21, 10)
   2503 GEN_ONEINT_ONEVEC_TEST(fcvtnu_x_d, "fcvtnu x21, d10", 21, 10)
   2504 
   2505 GEN_TWOVEC_TEST(fcvtps_d_d,   "fcvtps d10, d21",       10, 21)
   2506 GEN_TWOVEC_TEST(fcvtpu_d_d,   "fcvtpu d21, d10",       21, 10)
   2507 GEN_TWOVEC_TEST(fcvtps_s_s,   "fcvtps s10, s21",       10, 21)
   2508 GEN_TWOVEC_TEST(fcvtpu_s_s,   "fcvtpu s21, s10",       21, 10)
   2509 GEN_TWOVEC_TEST(fcvtps_2d_2d, "fcvtps v10.2d, v21.2d", 10, 21)
   2510 GEN_TWOVEC_TEST(fcvtpu_2d_2d, "fcvtpu v10.2d, v21.2d", 10, 21)
   2511 GEN_TWOVEC_TEST(fcvtps_4s_4s, "fcvtps v10.4s, v21.4s", 10, 21)
   2512 GEN_TWOVEC_TEST(fcvtpu_4s_4s, "fcvtpu v10.4s, v21.4s", 10, 21)
   2513 GEN_TWOVEC_TEST(fcvtps_2s_2s, "fcvtps v10.2s, v21.2s", 10, 21)
   2514 GEN_TWOVEC_TEST(fcvtpu_2s_2s, "fcvtpu v10.2s, v21.2s", 10, 21)
   2515 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_s, "fcvtps w21, s10", 21, 10)
   2516 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_s, "fcvtpu w21, s10", 21, 10)
   2517 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_s, "fcvtps x21, s10", 21, 10)
   2518 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_s, "fcvtpu x21, s10", 21, 10)
   2519 GEN_ONEINT_ONEVEC_TEST(fcvtps_w_d, "fcvtps w21, d10", 21, 10)
   2520 GEN_ONEINT_ONEVEC_TEST(fcvtpu_w_d, "fcvtpu w21, d10", 21, 10)
   2521 GEN_ONEINT_ONEVEC_TEST(fcvtps_x_d, "fcvtps x21, d10", 21, 10)
   2522 GEN_ONEINT_ONEVEC_TEST(fcvtpu_x_d, "fcvtpu x21, d10", 21, 10)
   2523 
   2524 GEN_TWOVEC_TEST(fcvtzs_d_d,   "fcvtzs d10, d21",       10, 21)
   2525 GEN_TWOVEC_TEST(fcvtzu_d_d,   "fcvtzu d21, d10",       21, 10)
   2526 GEN_TWOVEC_TEST(fcvtzs_s_s,   "fcvtzs s10, s21",       10, 21)
   2527 GEN_TWOVEC_TEST(fcvtzu_s_s,   "fcvtzu s21, s10",       21, 10)
   2528 GEN_TWOVEC_TEST(fcvtzs_2d_2d, "fcvtzs v10.2d, v21.2d", 10, 21)
   2529 GEN_TWOVEC_TEST(fcvtzu_2d_2d, "fcvtzu v10.2d, v21.2d", 10, 21)
   2530 GEN_TWOVEC_TEST(fcvtzs_4s_4s, "fcvtzs v10.4s, v21.4s", 10, 21)
   2531 GEN_TWOVEC_TEST(fcvtzu_4s_4s, "fcvtzu v10.4s, v21.4s", 10, 21)
   2532 GEN_TWOVEC_TEST(fcvtzs_2s_2s, "fcvtzs v10.2s, v21.2s", 10, 21)
   2533 GEN_TWOVEC_TEST(fcvtzu_2s_2s, "fcvtzu v10.2s, v21.2s", 10, 21)
   2534 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s, "fcvtzs w21, s10", 21, 10)
   2535 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s, "fcvtzu w21, s10", 21, 10)
   2536 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s, "fcvtzs x21, s10", 21, 10)
   2537 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s, "fcvtzu x21, s10", 21, 10)
   2538 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d, "fcvtzs w21, d10", 21, 10)
   2539 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d, "fcvtzu w21, d10", 21, 10)
   2540 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d, "fcvtzs x21, d10", 21, 10)
   2541 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d, "fcvtzu x21, d10", 21, 10)
   2542 
   2543 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits1,    "fcvtzs d10, d21, #1",   10, 21)
   2544 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits32,   "fcvtzs d10, d21, #32",  10, 21)
   2545 GEN_TWOVEC_TEST(fcvtzs_d_d_fbits64,   "fcvtzs d10, d21, #64",  10, 21)
   2546 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits1,    "fcvtzu d10, d21, #1",   10, 21)
   2547 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits32,   "fcvtzu d10, d21, #32",  10, 21)
   2548 GEN_TWOVEC_TEST(fcvtzu_d_d_fbits64,   "fcvtzu d10, d21, #64",  10, 21)
   2549 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits1,    "fcvtzs s10, s21, #1",   10, 21)
   2550 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits16,   "fcvtzs s10, s21, #16",  10, 21)
   2551 GEN_TWOVEC_TEST(fcvtzs_s_s_fbits32,   "fcvtzs s10, s21, #32",  10, 21)
   2552 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits1,    "fcvtzu s10, s21, #1",   10, 21)
   2553 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits16,   "fcvtzu s10, s21, #16",  10, 21)
   2554 GEN_TWOVEC_TEST(fcvtzu_s_s_fbits32,   "fcvtzu s10, s21, #32",  10, 21)
   2555 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits1,  "fcvtzs v10.2d, v21.2d, #1",  10, 21)
   2556 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits32, "fcvtzs v10.2d, v21.2d, #32", 10, 21)
   2557 GEN_TWOVEC_TEST(fcvtzs_2d_2d_fbits64, "fcvtzs v10.2d, v21.2d, #64", 10, 21)
   2558 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits1,  "fcvtzu v10.2d, v21.2d, #1",  10, 21)
   2559 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits32, "fcvtzu v10.2d, v21.2d, #32", 10, 21)
   2560 GEN_TWOVEC_TEST(fcvtzu_2d_2d_fbits64, "fcvtzu v10.2d, v21.2d, #64", 10, 21)
   2561 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits1,  "fcvtzs v10.4s, v21.4s, #1",  10, 21)
   2562 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits16, "fcvtzs v10.4s, v21.4s, #16", 10, 21)
   2563 GEN_TWOVEC_TEST(fcvtzs_4s_4s_fbits32, "fcvtzs v10.4s, v21.4s, #32", 10, 21)
   2564 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits1,  "fcvtzu v10.4s, v21.4s, #1",  10, 21)
   2565 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits16, "fcvtzu v10.4s, v21.4s, #16", 10, 21)
   2566 GEN_TWOVEC_TEST(fcvtzu_4s_4s_fbits32, "fcvtzu v10.4s, v21.4s, #32", 10, 21)
   2567 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits1,  "fcvtzs v10.2s, v21.2s, #1",  10, 21)
   2568 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits16, "fcvtzs v10.2s, v21.2s, #16", 10, 21)
   2569 GEN_TWOVEC_TEST(fcvtzs_2s_2s_fbits32, "fcvtzs v10.2s, v21.2s, #32", 10, 21)
   2570 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits1,  "fcvtzu v10.2s, v21.2s, #1",  10, 21)
   2571 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits16, "fcvtzu v10.2s, v21.2s, #16", 10, 21)
   2572 GEN_TWOVEC_TEST(fcvtzu_2s_2s_fbits32, "fcvtzu v10.2s, v21.2s, #32", 10, 21)
   2573 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits1,  "fcvtzs w21, s10, #1",  21, 10)
   2574 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits16, "fcvtzs w21, s10, #16", 21, 10)
   2575 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_s_fbits32, "fcvtzs w21, s10, #32", 21, 10)
   2576 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits1,  "fcvtzu w21, s10, #1",  21, 10)
   2577 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits16, "fcvtzu w21, s10, #16", 21, 10)
   2578 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_s_fbits32, "fcvtzu w21, s10, #32", 21, 10)
   2579 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits1,  "fcvtzs x21, s10, #1",  21, 10)
   2580 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits32, "fcvtzs x21, s10, #32", 21, 10)
   2581 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_s_fbits64, "fcvtzs x21, s10, #64", 21, 10)
   2582 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits1,  "fcvtzu x21, s10, #1",  21, 10)
   2583 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits32, "fcvtzu x21, s10, #32", 21, 10)
   2584 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_s_fbits64, "fcvtzu x21, s10, #64", 21, 10)
   2585 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits1,  "fcvtzs w21, d10, #1",  21, 10)
   2586 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits16, "fcvtzs w21, d10, #16", 21, 10)
   2587 GEN_ONEINT_ONEVEC_TEST(fcvtzs_w_d_fbits32, "fcvtzs w21, d10, #32", 21, 10)
   2588 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits1,  "fcvtzu w21, d10, #1",  21, 10)
   2589 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits16, "fcvtzu w21, d10, #16", 21, 10)
   2590 GEN_ONEINT_ONEVEC_TEST(fcvtzu_w_d_fbits32, "fcvtzu w21, d10, #32", 21, 10)
   2591 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits1,  "fcvtzs x21, d10, #1",  21, 10)
   2592 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits32, "fcvtzs x21, d10, #32", 21, 10)
   2593 GEN_ONEINT_ONEVEC_TEST(fcvtzs_x_d_fbits64, "fcvtzs x21, d10, #64", 21, 10)
   2594 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits1,  "fcvtzu x21, d10, #1",  21, 10)
   2595 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits32, "fcvtzu x21, d10, #32", 21, 10)
   2596 GEN_ONEINT_ONEVEC_TEST(fcvtzu_x_d_fbits64, "fcvtzu x21, d10, #64", 21, 10)
   2597 
   2598 GEN_TWOVEC_TEST(fcvtxn_s_d,   "fcvtxn s10, d21", 10, 21)
   2599 GEN_TWOVEC_TEST(fcvtxn_2s_2d, "fcvtxn  v10.2s, v21.2d", 10, 21)
   2600 GEN_TWOVEC_TEST(fcvtxn_4s_2d, "fcvtxn2 v10.4s, v21.2d", 10, 21)
   2601 
   2602 GEN_TWOVEC_TEST(scvtf_d_d_fbits1,    "scvtf d10, d21      , #1",  10, 21)
   2603 GEN_TWOVEC_TEST(scvtf_d_d_fbits32,   "scvtf d10, d21      , #32", 10, 21)
   2604 GEN_TWOVEC_TEST(scvtf_d_d_fbits64,   "scvtf d10, d21      , #64", 10, 21)
   2605 GEN_TWOVEC_TEST(ucvtf_d_d_fbits1,    "ucvtf d21, d10      , #1",  21, 10)
   2606 GEN_TWOVEC_TEST(ucvtf_d_d_fbits32,   "ucvtf d21, d10      , #32", 21, 10)
   2607 GEN_TWOVEC_TEST(ucvtf_d_d_fbits64,   "ucvtf d21, d10      , #64", 21, 10)
   2608 GEN_TWOVEC_TEST(scvtf_s_s_fbits1,    "scvtf s10, s21      , #1",  10, 21)
   2609 GEN_TWOVEC_TEST(scvtf_s_s_fbits16,   "scvtf s10, s21      , #16", 10, 21)
   2610 GEN_TWOVEC_TEST(scvtf_s_s_fbits32,   "scvtf s10, s21      , #32", 10, 21)
   2611 GEN_TWOVEC_TEST(ucvtf_s_s_fbits1,    "ucvtf s21, s10      , #1",  21, 10)
   2612 GEN_TWOVEC_TEST(ucvtf_s_s_fbits16,   "ucvtf s21, s10      , #16", 21, 10)
   2613 GEN_TWOVEC_TEST(ucvtf_s_s_fbits32,   "ucvtf s21, s10      , #32", 21, 10)
   2614 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits1,  "scvtf v10.2d, v21.2d, #1",  10, 21)
   2615 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits32, "scvtf v10.2d, v21.2d, #32", 10, 21)
   2616 GEN_TWOVEC_TEST(scvtf_2d_2d_fbits64, "scvtf v10.2d, v21.2d, #64", 10, 21)
   2617 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits1,  "ucvtf v10.2d, v21.2d, #1",  10, 21)
   2618 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits32, "ucvtf v10.2d, v21.2d, #32", 10, 21)
   2619 GEN_TWOVEC_TEST(ucvtf_2d_2d_fbits64, "ucvtf v10.2d, v21.2d, #64", 10, 21)
   2620 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits1,  "scvtf v10.4s, v21.4s, #1",  10, 21)
   2621 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits16, "scvtf v10.4s, v21.4s, #16", 10, 21)
   2622 GEN_TWOVEC_TEST(scvtf_4s_4s_fbits32, "scvtf v10.4s, v21.4s, #32", 10, 21)
   2623 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits1,  "ucvtf v10.4s, v21.4s, #1",  10, 21)
   2624 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits16, "ucvtf v10.4s, v21.4s, #16", 10, 21)
   2625 GEN_TWOVEC_TEST(ucvtf_4s_4s_fbits32, "ucvtf v10.4s, v21.4s, #32", 10, 21)
   2626 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits1,  "scvtf v10.2s, v21.2s, #1",  10, 21)
   2627 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits16, "scvtf v10.2s, v21.2s, #16", 10, 21)
   2628 GEN_TWOVEC_TEST(scvtf_2s_2s_fbits32, "scvtf v10.2s, v21.2s, #32", 10, 21)
   2629 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits1,  "ucvtf v10.2s, v21.2s, #1",  10, 21)
   2630 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits16, "ucvtf v10.2s, v21.2s, #16", 10, 21)
   2631 GEN_TWOVEC_TEST(ucvtf_2s_2s_fbits32, "ucvtf v10.2s, v21.2s, #32", 10, 21)
   2632 
   2633 GEN_TWOVEC_TEST(scvtf_d_d,   "scvtf d10, d21",       10, 21)
   2634 GEN_TWOVEC_TEST(ucvtf_d_d,   "ucvtf d21, d10",       21, 10)
   2635 GEN_TWOVEC_TEST(scvtf_s_s,   "scvtf s10, s21",       10, 21)
   2636 GEN_TWOVEC_TEST(ucvtf_s_s,   "ucvtf s21, s10",       21, 10)
   2637 GEN_TWOVEC_TEST(scvtf_2d_2d, "scvtf v10.2d, v21.2d", 10, 21)
   2638 GEN_TWOVEC_TEST(ucvtf_2d_2d, "ucvtf v10.2d, v21.2d", 10, 21)
   2639 GEN_TWOVEC_TEST(scvtf_4s_4s, "scvtf v10.4s, v21.4s", 10, 21)
   2640 GEN_TWOVEC_TEST(ucvtf_4s_4s, "ucvtf v10.4s, v21.4s", 10, 21)
   2641 GEN_TWOVEC_TEST(scvtf_2s_2s, "scvtf v10.2s, v21.2s", 10, 21)
   2642 GEN_TWOVEC_TEST(ucvtf_2s_2s, "ucvtf v10.2s, v21.2s", 10, 21)
   2643 
   2644 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits1,  "scvtf s7, w15, #1",  15, 7)
   2645 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits16, "scvtf s7, w15, #16", 15, 7)
   2646 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w_fbits32, "scvtf s7, w15, #32", 15, 7)
   2647 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits1,  "scvtf d7, w15, #1",  15, 7)
   2648 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits16, "scvtf d7, w15, #16", 15, 7)
   2649 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w_fbits32, "scvtf d7, w15, #32", 15, 7)
   2650 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits1,  "scvtf s7, x15, #1",  15, 7)
   2651 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits32, "scvtf s7, x15, #32", 15, 7)
   2652 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x_fbits64, "scvtf s7, x15, #64", 15, 7)
   2653 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits1,  "scvtf d7, x15, #1",  15, 7)
   2654 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits32, "scvtf d7, x15, #32", 15, 7)
   2655 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x_fbits64, "scvtf d7, x15, #64", 15, 7)
   2656 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits1,  "ucvtf s7, w15, #1",  15, 7)
   2657 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits16, "ucvtf s7, w15, #16", 15, 7)
   2658 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w_fbits32, "ucvtf s7, w15, #32", 15, 7)
   2659 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits1,  "ucvtf d7, w15, #1",  15, 7)
   2660 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits16, "ucvtf d7, w15, #16", 15, 7)
   2661 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w_fbits32, "ucvtf d7, w15, #32", 15, 7)
   2662 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits1,  "ucvtf s7, x15, #1",  15, 7)
   2663 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits32, "ucvtf s7, x15, #32", 15, 7)
   2664 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x_fbits64, "ucvtf s7, x15, #64", 15, 7)
   2665 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits1,  "ucvtf d7, x15, #1",  15, 7)
   2666 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits32, "ucvtf d7, x15, #32", 15, 7)
   2667 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x_fbits64, "ucvtf d7, x15, #64", 15, 7)
   2668 
   2669 GEN_ONEINT_ONEVEC_TEST(scvtf_s_w, "scvtf s7, w15", 15, 7)
   2670 GEN_ONEINT_ONEVEC_TEST(scvtf_d_w, "scvtf d7, w15", 15, 7)
   2671 GEN_ONEINT_ONEVEC_TEST(scvtf_s_x, "scvtf s7, x15", 15, 7)
   2672 GEN_ONEINT_ONEVEC_TEST(scvtf_d_x, "scvtf d7, x15", 15, 7)
   2673 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_w, "ucvtf s7, w15", 15, 7)
   2674 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_w, "ucvtf d7, w15", 15, 7)
   2675 GEN_ONEINT_ONEVEC_TEST(ucvtf_s_x, "ucvtf s7, x15", 15, 7)
   2676 GEN_ONEINT_ONEVEC_TEST(ucvtf_d_x, "ucvtf d7, x15", 15, 7)
   2677 
   2678 // ======================== INT ========================
   2679 
   2680 GEN_TWOVEC_TEST(abs_d_d,  "abs d22, d23",   22, 23)
   2681 GEN_TWOVEC_TEST(neg_d_d,  "neg d22, d23",   22, 23)
   2682 
   2683 GEN_UNARY_TEST(abs, 2d, 2d)
   2684 GEN_UNARY_TEST(abs, 4s, 4s)
   2685 GEN_UNARY_TEST(abs, 2s, 2s)
   2686 GEN_UNARY_TEST(abs, 8h, 8h)
   2687 GEN_UNARY_TEST(abs, 4h, 4h)
   2688 GEN_UNARY_TEST(abs, 16b, 16b)
   2689 GEN_UNARY_TEST(abs, 8b, 8b)
   2690 GEN_UNARY_TEST(neg, 2d, 2d)
   2691 GEN_UNARY_TEST(neg, 4s, 4s)
   2692 GEN_UNARY_TEST(neg, 2s, 2s)
   2693 GEN_UNARY_TEST(neg, 8h, 8h)
   2694 GEN_UNARY_TEST(neg, 4h, 4h)
   2695 GEN_UNARY_TEST(neg, 16b, 16b)
   2696 GEN_UNARY_TEST(neg, 8b,  8b)
   2697 
   2698 GEN_THREEVEC_TEST(add_d_d_d, "add d21, d22, d23", 21, 22, 23)
   2699 GEN_THREEVEC_TEST(sub_d_d_d, "sub d21, d22, d23", 21, 22, 23)
   2700 
   2701 GEN_BINARY_TEST(add, 2d, 2d, 2d)
   2702 GEN_BINARY_TEST(add, 4s, 4s, 4s)
   2703 GEN_BINARY_TEST(add, 2s, 2s, 2s)
   2704 GEN_BINARY_TEST(add, 8h, 8h, 8h)
   2705 GEN_BINARY_TEST(add, 4h, 4h, 4h)
   2706 GEN_BINARY_TEST(add, 16b, 16b, 16b)
   2707 GEN_BINARY_TEST(add, 8b, 8b, 8b)
   2708 GEN_BINARY_TEST(sub, 2d, 2d, 2d)
   2709 GEN_BINARY_TEST(sub, 4s, 4s, 4s)
   2710 GEN_BINARY_TEST(sub, 2s, 2s, 2s)
   2711 GEN_BINARY_TEST(sub, 8h, 8h, 8h)
   2712 GEN_BINARY_TEST(sub, 4h, 4h, 4h)
   2713 GEN_BINARY_TEST(sub, 16b, 16b, 16b)
   2714 GEN_BINARY_TEST(sub, 8b, 8b, 8b)
   2715 
   2716 GEN_BINARY_TEST(addhn,   2s, 2d, 2d)
   2717 GEN_BINARY_TEST(addhn2,  4s, 2d, 2d)
   2718 GEN_BINARY_TEST(addhn,   4h, 4s, 4s)
   2719 GEN_BINARY_TEST(addhn2,  8h, 4s, 4s)
   2720 GEN_BINARY_TEST(addhn,   8b, 8h, 8h)
   2721 GEN_BINARY_TEST(addhn2,  16b, 8h, 8h)
   2722 GEN_BINARY_TEST(subhn,   2s, 2d, 2d)
   2723 GEN_BINARY_TEST(subhn2,  4s, 2d, 2d)
   2724 GEN_BINARY_TEST(subhn,   4h, 4s, 4s)
   2725 GEN_BINARY_TEST(subhn2,  8h, 4s, 4s)
   2726 GEN_BINARY_TEST(subhn,   8b, 8h, 8h)
   2727 GEN_BINARY_TEST(subhn2,  16b, 8h, 8h)
   2728 GEN_BINARY_TEST(raddhn,  2s, 2d, 2d)
   2729 GEN_BINARY_TEST(raddhn2, 4s, 2d, 2d)
   2730 GEN_BINARY_TEST(raddhn,  4h, 4s, 4s)
   2731 GEN_BINARY_TEST(raddhn2, 8h, 4s, 4s)
   2732 GEN_BINARY_TEST(raddhn,  8b, 8h, 8h)
   2733 GEN_BINARY_TEST(raddhn2, 16b, 8h, 8h)
   2734 GEN_BINARY_TEST(rsubhn,  2s, 2d, 2d)
   2735 GEN_BINARY_TEST(rsubhn2, 4s, 2d, 2d)
   2736 GEN_BINARY_TEST(rsubhn,  4h, 4s, 4s)
   2737 GEN_BINARY_TEST(rsubhn2, 8h, 4s, 4s)
   2738 GEN_BINARY_TEST(rsubhn,  8b, 8h, 8h)
   2739 GEN_BINARY_TEST(rsubhn2, 16b, 8h, 8h)
   2740 
   2741 GEN_TWOVEC_TEST(addp_d_2d,  "addp d22, v23.2d",   22, 23)
   2742 
   2743 GEN_BINARY_TEST(addp, 2d, 2d, 2d)
   2744 GEN_BINARY_TEST(addp, 4s, 4s, 4s)
   2745 GEN_BINARY_TEST(addp, 2s, 2s, 2s)
   2746 GEN_BINARY_TEST(addp, 8h, 8h, 8h)
   2747 GEN_BINARY_TEST(addp, 4h, 4h, 4h)
   2748 GEN_BINARY_TEST(addp, 16b, 16b, 16b)
   2749 GEN_BINARY_TEST(addp, 8b, 8b, 8b)
   2750 
   2751 GEN_TWOVEC_TEST(addv_s_4s,  "addv s22, v23.4s",  22, 23)
   2752 GEN_TWOVEC_TEST(addv_h_8h,  "addv h22, v23.8h",  22, 23)
   2753 GEN_TWOVEC_TEST(addv_h_4h,  "addv h22, v23.4h",  22, 23)
   2754 GEN_TWOVEC_TEST(addv_b_16b, "addv b22, v23.16b", 22, 23)
   2755 GEN_TWOVEC_TEST(addv_b_8b,  "addv b22, v23.8b",  22, 23)
   2756 
   2757 GEN_BINARY_TEST(and, 16b, 16b, 16b)
   2758 GEN_BINARY_TEST(and, 8b, 8b, 8b)
   2759 GEN_BINARY_TEST(bic, 16b, 16b, 16b)
   2760 GEN_BINARY_TEST(bic, 8b, 8b, 8b)
   2761 GEN_BINARY_TEST(orr, 16b, 16b, 16b)
   2762 GEN_BINARY_TEST(orr, 8b, 8b, 8b)
   2763 GEN_BINARY_TEST(orn, 16b, 16b, 16b)
   2764 GEN_BINARY_TEST(orn, 8b, 8b, 8b)
   2765 
   2766 /* overkill -- don't need two vecs, only one */
   2767 GEN_TWOVEC_TEST(orr_8h_0x5A_lsl0, "orr v22.8h, #0x5A, LSL #0", 22, 23)
   2768 GEN_TWOVEC_TEST(orr_8h_0xA5_lsl8, "orr v22.8h, #0xA5, LSL #8", 22, 23)
   2769 GEN_TWOVEC_TEST(orr_4h_0x5A_lsl0, "orr v22.4h, #0x5A, LSL #0", 22, 23)
   2770 GEN_TWOVEC_TEST(orr_4h_0xA5_lsl8, "orr v22.4h, #0xA5, LSL #8", 22, 23)
   2771 GEN_TWOVEC_TEST(orr_4s_0x5A_lsl0,  "orr v22.4s, #0x5A, LSL #0",  22, 23)
   2772 GEN_TWOVEC_TEST(orr_4s_0x6B_lsl8,  "orr v22.4s, #0x6B, LSL #8",  22, 23)
   2773 GEN_TWOVEC_TEST(orr_4s_0x49_lsl16, "orr v22.4s, #0x49, LSL #16", 22, 23)
   2774 GEN_TWOVEC_TEST(orr_4s_0x3D_lsl24, "orr v22.4s, #0x3D, LSL #24", 22, 23)
   2775 GEN_TWOVEC_TEST(orr_2s_0x5A_lsl0,  "orr v22.2s, #0x5A, LSL #0",  22, 23)
   2776 GEN_TWOVEC_TEST(orr_2s_0x6B_lsl8,  "orr v22.2s, #0x6B, LSL #8",  22, 23)
   2777 GEN_TWOVEC_TEST(orr_2s_0x49_lsl16, "orr v22.2s, #0x49, LSL #16", 22, 23)
   2778 GEN_TWOVEC_TEST(orr_2s_0x3D_lsl24, "orr v22.2s, #0x3D, LSL #24", 22, 23)
   2779 GEN_TWOVEC_TEST(bic_8h_0x5A_lsl0, "bic v22.8h, #0x5A, LSL #0", 22, 23)
   2780 GEN_TWOVEC_TEST(bic_8h_0xA5_lsl8, "bic v22.8h, #0xA5, LSL #8", 22, 23)
   2781 GEN_TWOVEC_TEST(bic_4h_0x5A_lsl0, "bic v22.4h, #0x5A, LSL #0", 22, 23)
   2782 GEN_TWOVEC_TEST(bic_4h_0xA5_lsl8, "bic v22.4h, #0xA5, LSL #8", 22, 23)
   2783 GEN_TWOVEC_TEST(bic_4s_0x5A_lsl0,  "bic v22.4s, #0x5A, LSL #0",  22, 23)
   2784 GEN_TWOVEC_TEST(bic_4s_0x6B_lsl8,  "bic v22.4s, #0x6B, LSL #8",  22, 23)
   2785 GEN_TWOVEC_TEST(bic_4s_0x49_lsl16, "bic v22.4s, #0x49, LSL #16", 22, 23)
   2786 GEN_TWOVEC_TEST(bic_4s_0x3D_lsl24, "bic v22.4s, #0x3D, LSL #24", 22, 23)
   2787 GEN_TWOVEC_TEST(bic_2s_0x5A_lsl0,  "bic v22.2s, #0x5A, LSL #0",  22, 23)
   2788 GEN_TWOVEC_TEST(bic_2s_0x6B_lsl8,  "bic v22.2s, #0x6B, LSL #8",  22, 23)
   2789 GEN_TWOVEC_TEST(bic_2s_0x49_lsl16, "bic v22.2s, #0x49, LSL #16", 22, 23)
   2790 GEN_TWOVEC_TEST(bic_2s_0x3D_lsl24, "bic v22.2s, #0x3D, LSL #24", 22, 23)
   2791 
   2792 GEN_BINARY_TEST(bif, 16b, 16b, 16b)
   2793 GEN_BINARY_TEST(bif, 8b, 8b, 8b)
   2794 GEN_BINARY_TEST(bit, 16b, 16b, 16b)
   2795 GEN_BINARY_TEST(bit, 8b, 8b, 8b)
   2796 GEN_BINARY_TEST(bsl, 16b, 16b, 16b)
   2797 GEN_BINARY_TEST(bsl, 8b, 8b, 8b)
   2798 GEN_BINARY_TEST(eor, 16b, 16b, 16b)
   2799 GEN_BINARY_TEST(eor, 8b, 8b, 8b)
   2800 
   2801 GEN_UNARY_TEST(cls, 4s, 4s)
   2802 GEN_UNARY_TEST(cls, 2s, 2s)
   2803 GEN_UNARY_TEST(cls, 8h, 8h)
   2804 GEN_UNARY_TEST(cls, 4h, 4h)
   2805 GEN_UNARY_TEST(cls, 16b, 16b)
   2806 GEN_UNARY_TEST(cls, 8b, 8b)
   2807 GEN_UNARY_TEST(clz, 4s, 4s)
   2808 GEN_UNARY_TEST(clz, 2s, 2s)
   2809 GEN_UNARY_TEST(clz, 8h, 8h)
   2810 GEN_UNARY_TEST(clz, 4h, 4h)
   2811 GEN_UNARY_TEST(clz, 16b, 16b)
   2812 GEN_UNARY_TEST(clz, 8b, 8b)
   2813 
   2814 GEN_THREEVEC_TEST(cmeq_d_d_d,  "cmeq  d2, d11, d29", 2, 11, 29)
   2815 GEN_THREEVEC_TEST(cmge_d_d_d,  "cmge  d2, d11, d29", 2, 11, 29)
   2816 GEN_THREEVEC_TEST(cmgt_d_d_d,  "cmgt  d2, d11, d29", 2, 11, 29)
   2817 GEN_THREEVEC_TEST(cmhi_d_d_d,  "cmhi  d2, d11, d29", 2, 11, 29)
   2818 GEN_THREEVEC_TEST(cmhs_d_d_d,  "cmhs  d2, d11, d29", 2, 11, 29)
   2819 GEN_THREEVEC_TEST(cmtst_d_d_d, "cmtst d2, d11, d29", 2, 11, 29)
   2820 
   2821 GEN_BINARY_TEST(cmeq, 2d, 2d, 2d)
   2822 GEN_BINARY_TEST(cmeq, 4s, 4s, 4s)
   2823 GEN_BINARY_TEST(cmeq, 2s, 2s, 2s)
   2824 GEN_BINARY_TEST(cmeq, 8h, 8h, 8h)
   2825 GEN_BINARY_TEST(cmeq, 4h, 4h, 4h)
   2826 GEN_BINARY_TEST(cmeq, 16b, 16b, 16b)
   2827 GEN_BINARY_TEST(cmeq, 8b, 8b, 8b)
   2828 GEN_BINARY_TEST(cmge, 2d, 2d, 2d)
   2829 GEN_BINARY_TEST(cmge, 4s, 4s, 4s)
   2830 GEN_BINARY_TEST(cmge, 2s, 2s, 2s)
   2831 GEN_BINARY_TEST(cmge, 8h, 8h, 8h)
   2832 GEN_BINARY_TEST(cmge, 4h, 4h, 4h)
   2833 GEN_BINARY_TEST(cmge, 16b, 16b, 16b)
   2834 GEN_BINARY_TEST(cmge, 8b, 8b, 8b)
   2835 GEN_BINARY_TEST(cmgt, 2d, 2d, 2d)
   2836 GEN_BINARY_TEST(cmgt, 4s, 4s, 4s)
   2837 GEN_BINARY_TEST(cmgt, 2s, 2s, 2s)
   2838 GEN_BINARY_TEST(cmgt, 8h, 8h, 8h)
   2839 GEN_BINARY_TEST(cmgt, 4h, 4h, 4h)
   2840 GEN_BINARY_TEST(cmgt, 16b, 16b, 16b)
   2841 GEN_BINARY_TEST(cmgt, 8b, 8b, 8b)
   2842 GEN_BINARY_TEST(cmhi, 2d, 2d, 2d)
   2843 GEN_BINARY_TEST(cmhi, 4s, 4s, 4s)
   2844 GEN_BINARY_TEST(cmhi, 2s, 2s, 2s)
   2845 GEN_BINARY_TEST(cmhi, 8h, 8h, 8h)
   2846 GEN_BINARY_TEST(cmhi, 4h, 4h, 4h)
   2847 GEN_BINARY_TEST(cmhi, 16b, 16b, 16b)
   2848 GEN_BINARY_TEST(cmhi, 8b, 8b, 8b)
   2849 GEN_BINARY_TEST(cmhs, 2d, 2d, 2d)
   2850 GEN_BINARY_TEST(cmhs, 4s, 4s, 4s)
   2851 GEN_BINARY_TEST(cmhs, 2s, 2s, 2s)
   2852 GEN_BINARY_TEST(cmhs, 8h, 8h, 8h)
   2853 GEN_BINARY_TEST(cmhs, 4h, 4h, 4h)
   2854 GEN_BINARY_TEST(cmhs, 16b, 16b, 16b)
   2855 GEN_BINARY_TEST(cmhs, 8b, 8b, 8b)
   2856 GEN_BINARY_TEST(cmtst, 2d, 2d, 2d)
   2857 GEN_BINARY_TEST(cmtst, 4s, 4s, 4s)
   2858 GEN_BINARY_TEST(cmtst, 2s, 2s, 2s)
   2859 GEN_BINARY_TEST(cmtst, 8h, 8h, 8h)
   2860 GEN_BINARY_TEST(cmtst, 4h, 4h, 4h)
   2861 GEN_BINARY_TEST(cmtst, 16b, 16b, 16b)
   2862 GEN_BINARY_TEST(cmtst, 8b, 8b, 8b)
   2863 
   2864 GEN_TWOVEC_TEST(cmeq_zero_d_d,  "cmeq  d2, d11, #0", 2, 11)
   2865 GEN_TWOVEC_TEST(cmge_zero_d_d,  "cmge  d2, d11, #0", 2, 11)
   2866 GEN_TWOVEC_TEST(cmgt_zero_d_d,  "cmgt  d2, d11, #0", 2, 11)
   2867 GEN_TWOVEC_TEST(cmle_zero_d_d,  "cmle  d2, d11, #0", 2, 11)
   2868 GEN_TWOVEC_TEST(cmlt_zero_d_d,  "cmlt  d2, d11, #0", 2, 11)
   2869 
   2870 GEN_TWOVEC_TEST(cmeq_zero_2d_2d,   "cmeq v5.2d,  v22.2d,  #0", 5, 22)
   2871 GEN_TWOVEC_TEST(cmeq_zero_4s_4s,   "cmeq v5.4s,  v22.4s,  #0", 5, 22)
   2872 GEN_TWOVEC_TEST(cmeq_zero_2s_2s,   "cmeq v5.2s,  v22.2s,  #0", 5, 22)
   2873 GEN_TWOVEC_TEST(cmeq_zero_8h_8h,   "cmeq v5.8h,  v22.8h,  #0", 5, 22)
   2874 GEN_TWOVEC_TEST(cmeq_zero_4h_4h,   "cmeq v5.4h,  v22.4h,  #0", 5, 22)
   2875 GEN_TWOVEC_TEST(cmeq_zero_16b_16b, "cmeq v5.16b, v22.16b, #0", 5, 22)
   2876 GEN_TWOVEC_TEST(cmeq_zero_8b_8b,   "cmeq v5.8b,  v22.8b,  #0", 5, 22)
   2877 GEN_TWOVEC_TEST(cmge_zero_2d_2d,   "cmge v5.2d,  v22.2d,  #0", 5, 22)
   2878 GEN_TWOVEC_TEST(cmge_zero_4s_4s,   "cmge v5.4s,  v22.4s,  #0", 5, 22)
   2879 GEN_TWOVEC_TEST(cmge_zero_2s_2s,   "cmge v5.2s,  v22.2s,  #0", 5, 22)
   2880 GEN_TWOVEC_TEST(cmge_zero_8h_8h,   "cmge v5.8h,  v22.8h,  #0", 5, 22)
   2881 GEN_TWOVEC_TEST(cmge_zero_4h_4h,   "cmge v5.4h,  v22.4h,  #0", 5, 22)
   2882 GEN_TWOVEC_TEST(cmge_zero_16b_16b, "cmge v5.16b, v22.16b, #0", 5, 22)
   2883 GEN_TWOVEC_TEST(cmge_zero_8b_8b,   "cmge v5.8b,  v22.8b,  #0", 5, 22)
   2884 GEN_TWOVEC_TEST(cmgt_zero_2d_2d,   "cmgt v5.2d,  v22.2d,  #0", 5, 22)
   2885 GEN_TWOVEC_TEST(cmgt_zero_4s_4s,   "cmgt v5.4s,  v22.4s,  #0", 5, 22)
   2886 GEN_TWOVEC_TEST(cmgt_zero_2s_2s,   "cmgt v5.2s,  v22.2s,  #0", 5, 22)
   2887 GEN_TWOVEC_TEST(cmgt_zero_8h_8h,   "cmgt v5.8h,  v22.8h,  #0", 5, 22)
   2888 GEN_TWOVEC_TEST(cmgt_zero_4h_4h,   "cmgt v5.4h,  v22.4h,  #0", 5, 22)
   2889 GEN_TWOVEC_TEST(cmgt_zero_16b_16b, "cmgt v5.16b, v22.16b, #0", 5, 22)
   2890 GEN_TWOVEC_TEST(cmgt_zero_8b_8b,   "cmgt v5.8b,  v22.8b,  #0", 5, 22)
   2891 GEN_TWOVEC_TEST(cmle_zero_2d_2d,   "cmle v5.2d,  v22.2d,  #0", 5, 22)
   2892 GEN_TWOVEC_TEST(cmle_zero_4s_4s,   "cmle v5.4s,  v22.4s,  #0", 5, 22)
   2893 GEN_TWOVEC_TEST(cmle_zero_2s_2s,   "cmle v5.2s,  v22.2s,  #0", 5, 22)
   2894 GEN_TWOVEC_TEST(cmle_zero_8h_8h,   "cmle v5.8h,  v22.8h,  #0", 5, 22)
   2895 GEN_TWOVEC_TEST(cmle_zero_4h_4h,   "cmle v5.4h,  v22.4h,  #0", 5, 22)
   2896 GEN_TWOVEC_TEST(cmle_zero_16b_16b, "cmle v5.16b, v22.16b, #0", 5, 22)
   2897 GEN_TWOVEC_TEST(cmle_zero_8b_8b,   "cmle v5.8b,  v22.8b,  #0", 5, 22)
   2898 GEN_TWOVEC_TEST(cmlt_zero_2d_2d,   "cmlt v5.2d,  v22.2d,  #0", 5, 22)
   2899 GEN_TWOVEC_TEST(cmlt_zero_4s_4s,   "cmlt v5.4s,  v22.4s,  #0", 5, 22)
   2900 GEN_TWOVEC_TEST(cmlt_zero_2s_2s,   "cmlt v5.2s,  v22.2s,  #0", 5, 22)
   2901 GEN_TWOVEC_TEST(cmlt_zero_8h_8h,   "cmlt v5.8h,  v22.8h,  #0", 5, 22)
   2902 GEN_TWOVEC_TEST(cmlt_zero_4h_4h,   "cmlt v5.4h,  v22.4h,  #0", 5, 22)
   2903 GEN_TWOVEC_TEST(cmlt_zero_16b_16b, "cmlt v5.16b, v22.16b, #0", 5, 22)
   2904 GEN_TWOVEC_TEST(cmlt_zero_8b_8b,   "cmlt v5.8b,  v22.8b,  #0", 5, 22)
   2905 
   2906 GEN_UNARY_TEST(cnt, 16b, 16b)
   2907 GEN_UNARY_TEST(cnt, 8b, 8b)
   2908 
   2909 GEN_TWOVEC_TEST(dup_d_d0,  "dup d22, v23.d[0]", 22, 23)
   2910 GEN_TWOVEC_TEST(dup_d_d1,  "dup d22, v23.d[1]", 22, 23)
   2911 GEN_TWOVEC_TEST(dup_s_s0,  "dup s22, v23.s[0]", 22, 23)
   2912 GEN_TWOVEC_TEST(dup_s_s3,  "dup s22, v23.s[3]", 22, 23)
   2913 GEN_TWOVEC_TEST(dup_h_h0,  "dup h22, v23.h[0]", 22, 23)
   2914 GEN_TWOVEC_TEST(dup_h_h6,  "dup h22, v23.h[6]", 22, 23)
   2915 GEN_TWOVEC_TEST(dup_b_b0,  "dup b0,  v23.b[0]",  22, 23)
   2916 GEN_TWOVEC_TEST(dup_b_b13, "dup b13, v23.b[13]", 22, 23)
   2917 
   2918 GEN_TWOVEC_TEST(dup_2d_d0,  "dup v9.2d, v17.d[0]", 9, 17)
   2919 GEN_TWOVEC_TEST(dup_2d_d1,  "dup v9.2d, v17.d[1]", 9, 17)
   2920 GEN_TWOVEC_TEST(dup_4s_s0,  "dup v9.4s, v17.s[0]", 9, 17)
   2921 GEN_TWOVEC_TEST(dup_4s_s3,  "dup v9.4s, v17.s[3]", 9, 17)
   2922 GEN_TWOVEC_TEST(dup_2s_s0,  "dup v9.2s, v17.s[0]", 9, 17)
   2923 GEN_TWOVEC_TEST(dup_2s_s2,  "dup v9.2s, v17.s[2]", 9, 17)
   2924 GEN_TWOVEC_TEST(dup_8h_h0,  "dup v9.8h, v17.h[0]", 9, 17)
   2925 GEN_TWOVEC_TEST(dup_8h_h6,  "dup v9.8h, v17.h[6]", 9, 17)
   2926 GEN_TWOVEC_TEST(dup_4h_h1,  "dup v9.4h, v17.h[1]", 9, 17)
   2927 GEN_TWOVEC_TEST(dup_4h_h5,  "dup v9.4h, v17.h[5]", 9, 17)
   2928 GEN_TWOVEC_TEST(dup_16b_b2,  "dup v9.16b, v17.b[2]", 9, 17)
   2929 GEN_TWOVEC_TEST(dup_16b_b12, "dup v9.16b, v17.b[12]", 9, 17)
   2930 GEN_TWOVEC_TEST(dup_8b_b3,  "dup v9.8b, v17.b[3]", 9, 17)
   2931 GEN_TWOVEC_TEST(dup_8b_b13, "dup v9.8b, v17.b[13]", 9, 17)
   2932 
   2933 GEN_TWOVEC_TEST(dup_2d_x,  "mov x10, v17.d[0];  dup v9.2d,  x10", 9, 17)
   2934 GEN_TWOVEC_TEST(dup_4s_w,  "mov x10, v17.d[0];  dup v9.4s,  w10", 9, 17)
   2935 GEN_TWOVEC_TEST(dup_2s_w,  "mov x10, v17.d[0];  dup v9.2s,  w10", 9, 17)
   2936 GEN_TWOVEC_TEST(dup_8h_w,  "mov x10, v17.d[0];  dup v9.8h,  w10",  9, 17)
   2937 GEN_TWOVEC_TEST(dup_4h_w,  "mov x10, v17.d[0];  dup v9.4h,  w10",  9, 17)
   2938 GEN_TWOVEC_TEST(dup_16b_w, "mov x10, v17.d[0];  dup v9.16b, w10", 9, 17)
   2939 GEN_TWOVEC_TEST(dup_8b_w,  "mov x10, v17.d[0];  dup v9.8b,  w10",  9, 17)
   2940 
   2941 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x0,
   2942                   "ext  v2.16b, v11.16b, v29.16b, #0", 2, 11, 29)
   2943 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x1,
   2944                   "ext  v2.16b, v11.16b, v29.16b, #1", 2, 11, 29)
   2945 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x2,
   2946                   "ext  v2.16b, v11.16b, v29.16b, #2", 2, 11, 29)
   2947 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x3,
   2948                   "ext  v2.16b, v11.16b, v29.16b, #3", 2, 11, 29)
   2949 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x4,
   2950                   "ext  v2.16b, v11.16b, v29.16b, #4", 2, 11, 29)
   2951 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x5,
   2952                   "ext  v2.16b, v11.16b, v29.16b, #5", 2, 11, 29)
   2953 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x6,
   2954                   "ext  v2.16b, v11.16b, v29.16b, #6", 2, 11, 29)
   2955 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x7,
   2956                   "ext  v2.16b, v11.16b, v29.16b, #7", 2, 11, 29)
   2957 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x8,
   2958                   "ext  v2.16b, v11.16b, v29.16b, #8", 2, 11, 29)
   2959 GEN_THREEVEC_TEST(ext_16b_16b_16b_0x9,
   2960                   "ext  v2.16b, v11.16b, v29.16b, #9", 2, 11, 29)
   2961 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xA,
   2962                   "ext  v2.16b, v11.16b, v29.16b, #10", 2, 11, 29)
   2963 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xB,
   2964                   "ext  v2.16b, v11.16b, v29.16b, #11", 2, 11, 29)
   2965 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xC,
   2966                   "ext  v2.16b, v11.16b, v29.16b, #12", 2, 11, 29)
   2967 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xD,
   2968                   "ext  v2.16b, v11.16b, v29.16b, #13", 2, 11, 29)
   2969 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xE,
   2970                   "ext  v2.16b, v11.16b, v29.16b, #14", 2, 11, 29)
   2971 GEN_THREEVEC_TEST(ext_16b_16b_16b_0xF,
   2972                   "ext  v2.16b, v11.16b, v29.16b, #15", 2, 11, 29)
   2973 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x0,
   2974                   "ext  v2.8b, v11.8b, v29.8b, #0", 2, 11, 29)
   2975 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x1,
   2976                   "ext  v2.8b, v11.8b, v29.8b, #1", 2, 11, 29)
   2977 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x2,
   2978                   "ext  v2.8b, v11.8b, v29.8b, #2", 2, 11, 29)
   2979 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x3,
   2980                   "ext  v2.8b, v11.8b, v29.8b, #3", 2, 11, 29)
   2981 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x4,
   2982                   "ext  v2.8b, v11.8b, v29.8b, #4", 2, 11, 29)
   2983 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x5,
   2984                   "ext  v2.8b, v11.8b, v29.8b, #5", 2, 11, 29)
   2985 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x6,
   2986                   "ext  v2.8b, v11.8b, v29.8b, #6", 2, 11, 29)
   2987 GEN_THREEVEC_TEST(ext_8b_8b_8b_0x7,
   2988                   "ext  v2.8b, v11.8b, v29.8b, #7", 2, 11, 29)
   2989 
   2990 GEN_TWOVEC_TEST(ins_d0_d0, "ins v3.d[0], v24.d[0]", 3, 24)
   2991 GEN_TWOVEC_TEST(ins_d0_d1, "ins v3.d[0], v24.d[1]", 3, 24)
   2992 GEN_TWOVEC_TEST(ins_d1_d0, "ins v3.d[1], v24.d[0]", 3, 24)
   2993 GEN_TWOVEC_TEST(ins_d1_d1, "ins v3.d[1], v24.d[1]", 3, 24)
   2994 GEN_TWOVEC_TEST(ins_s0_s2, "ins v3.s[0], v24.s[2]", 3, 24)
   2995 GEN_TWOVEC_TEST(ins_s3_s0, "ins v3.s[3], v24.s[0]", 3, 24)
   2996 GEN_TWOVEC_TEST(ins_s2_s1, "ins v3.s[2], v24.s[1]", 3, 24)
   2997 GEN_TWOVEC_TEST(ins_s1_s3, "ins v3.s[1], v24.s[3]", 3, 24)
   2998 GEN_TWOVEC_TEST(ins_h0_h6, "ins v3.h[0], v24.h[6]", 3, 24)
   2999 GEN_TWOVEC_TEST(ins_h7_h0, "ins v3.h[7], v24.h[0]", 3, 24)
   3000 GEN_TWOVEC_TEST(ins_h6_h1, "ins v3.h[6], v24.h[1]", 3, 24)
   3001 GEN_TWOVEC_TEST(ins_h1_h7, "ins v3.h[1], v24.h[7]", 3, 24)
   3002 GEN_TWOVEC_TEST(ins_b0_b14, "ins v3.b[0],  v24.b[14]", 3, 24)
   3003 GEN_TWOVEC_TEST(ins_b15_b8, "ins v3.b[15], v24.b[8]",  3, 24)
   3004 GEN_TWOVEC_TEST(ins_b13_b9, "ins v3.b[13], v24.b[9]",  3, 24)
   3005 GEN_TWOVEC_TEST(ins_b5_b12, "ins v3.b[5],  v24.b[12]", 3, 24)
   3006 
   3007 // test_INS_general is a handwritten function
   3008 
   3009 GEN_THREEVEC_TEST(mla_4s_4s_s0, "mla v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3010 GEN_THREEVEC_TEST(mla_4s_4s_s3, "mla v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3011 GEN_THREEVEC_TEST(mla_2s_2s_s0, "mla v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3012 GEN_THREEVEC_TEST(mla_2s_2s_s3, "mla v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3013 // For the 'h' version of these, Rm can only be <= 15 (!)
   3014 GEN_THREEVEC_TEST(mla_8h_8h_h1, "mla v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3015 GEN_THREEVEC_TEST(mla_8h_8h_h5, "mla v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3016 GEN_THREEVEC_TEST(mla_4h_4h_h2, "mla v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3017 GEN_THREEVEC_TEST(mla_4h_4h_h7, "mla v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3018 GEN_THREEVEC_TEST(mls_4s_4s_s0, "mls v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3019 GEN_THREEVEC_TEST(mls_4s_4s_s3, "mls v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3020 GEN_THREEVEC_TEST(mls_2s_2s_s0, "mls v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3021 GEN_THREEVEC_TEST(mls_2s_2s_s3, "mls v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3022 // For the 'h' version of these, Rm can only be <= 15 (!)
   3023 GEN_THREEVEC_TEST(mls_8h_8h_h1, "mls v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3024 GEN_THREEVEC_TEST(mls_8h_8h_h5, "mls v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3025 GEN_THREEVEC_TEST(mls_4h_4h_h2, "mls v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3026 GEN_THREEVEC_TEST(mls_4h_4h_h7, "mls v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3027 GEN_THREEVEC_TEST(mul_4s_4s_s0, "mul v2.4s, v11.4s, v29.s[0]", 2, 11, 29)
   3028 GEN_THREEVEC_TEST(mul_4s_4s_s3, "mul v2.4s, v11.4s, v29.s[3]", 2, 11, 29)
   3029 GEN_THREEVEC_TEST(mul_2s_2s_s0, "mul v2.2s, v11.2s, v29.s[0]", 2, 11, 29)
   3030 GEN_THREEVEC_TEST(mul_2s_2s_s3, "mul v2.2s, v11.2s, v29.s[3]", 2, 11, 29)
   3031 // For the 'h' version of these, Rm can only be <= 15 (!)
   3032 GEN_THREEVEC_TEST(mul_8h_8h_h1, "mul v2.8h, v11.8h, v2.h[1]", 2, 11, 9)
   3033 GEN_THREEVEC_TEST(mul_8h_8h_h5, "mul v2.8h, v11.8h, v2.h[5]", 2, 11, 9)
   3034 GEN_THREEVEC_TEST(mul_4h_4h_h2, "mul v2.4h, v11.4h, v2.h[2]", 2, 11, 9)
   3035 GEN_THREEVEC_TEST(mul_4h_4h_h7, "mul v2.4h, v11.4h, v2.h[7]", 2, 11, 9)
   3036 
   3037 GEN_BINARY_TEST(mla, 4s, 4s, 4s)
   3038 GEN_BINARY_TEST(mla, 2s, 2s, 2s)
   3039 GEN_BINARY_TEST(mla, 8h, 8h, 8h)
   3040 GEN_BINARY_TEST(mla, 4h, 4h, 4h)
   3041 GEN_BINARY_TEST(mla, 16b, 16b, 16b)
   3042 GEN_BINARY_TEST(mla, 8b, 8b, 8b)
   3043 GEN_BINARY_TEST(mls, 4s, 4s, 4s)
   3044 GEN_BINARY_TEST(mls, 2s, 2s, 2s)
   3045 GEN_BINARY_TEST(mls, 8h, 8h, 8h)
   3046 GEN_BINARY_TEST(mls, 4h, 4h, 4h)
   3047 GEN_BINARY_TEST(mls, 16b, 16b, 16b)
   3048 GEN_BINARY_TEST(mls, 8b, 8b, 8b)
   3049 GEN_BINARY_TEST(mul, 4s, 4s, 4s)
   3050 GEN_BINARY_TEST(mul, 2s, 2s, 2s)
   3051 GEN_BINARY_TEST(mul, 8h, 8h, 8h)
   3052 GEN_BINARY_TEST(mul, 4h, 4h, 4h)
   3053 GEN_BINARY_TEST(mul, 16b, 16b, 16b)
   3054 GEN_BINARY_TEST(mul, 8b, 8b, 8b)
   3055 
   3056 /* overkill -- don't need two vecs, only one */
   3057 GEN_TWOVEC_TEST(movi_16b_0x9C_lsl0, "movi v22.16b, #0x9C, LSL #0", 22, 23)
   3058 GEN_TWOVEC_TEST(movi_8b_0x8B_lsl0,  "movi v22.8b,  #0x8B, LSL #0", 22, 23)
   3059 
   3060 GEN_TWOVEC_TEST(movi_8h_0x5A_lsl0,  "movi v22.8h,  #0x5A, LSL #0", 22, 23)
   3061 GEN_TWOVEC_TEST(movi_8h_0xA5_lsl8,  "movi v22.8h,  #0xA5, LSL #8", 22, 23)
   3062 GEN_TWOVEC_TEST(movi_4h_0x5A_lsl0,  "movi v22.4h,  #0x5A, LSL #0", 22, 23)
   3063 GEN_TWOVEC_TEST(movi_4h_0xA5_lsl8,  "movi v22.4h,  #0xA5, LSL #8", 22, 23)
   3064 GEN_TWOVEC_TEST(mvni_8h_0x5A_lsl0,  "mvni v22.8h,  #0x5A, LSL #0", 22, 23)
   3065 GEN_TWOVEC_TEST(mvni_8h_0xA5_lsl8,  "mvni v22.8h,  #0xA5, LSL #8", 22, 23)
   3066 GEN_TWOVEC_TEST(mvni_4h_0x5A_lsl0,  "mvni v22.4h,  #0x5A, LSL #0", 22, 23)
   3067 GEN_TWOVEC_TEST(mvni_4h_0xA5_lsl8,  "mvni v22.4h,  #0xA5, LSL #8", 22, 23)
   3068 
   3069 GEN_TWOVEC_TEST(movi_4s_0x5A_lsl0,  "movi v22.4s,  #0x5A, LSL #0",  22, 23)
   3070 GEN_TWOVEC_TEST(movi_4s_0x6B_lsl8,  "movi v22.4s,  #0x6B, LSL #8",  22, 23)
   3071 GEN_TWOVEC_TEST(movi_4s_0x49_lsl16, "movi v22.4s,  #0x49, LSL #16", 22, 23)
   3072 GEN_TWOVEC_TEST(movi_4s_0x3D_lsl24, "movi v22.4s,  #0x3D, LSL #24", 22, 23)
   3073 GEN_TWOVEC_TEST(movi_2s_0x5A_lsl0,  "movi v22.2s,  #0x5A, LSL #0",  22, 23)
   3074 GEN_TWOVEC_TEST(movi_2s_0x6B_lsl8,  "movi v22.2s,  #0x6B, LSL #8",  22, 23)
   3075 GEN_TWOVEC_TEST(movi_2s_0x49_lsl16, "movi v22.2s,  #0x49, LSL #16", 22, 23)
   3076 GEN_TWOVEC_TEST(movi_2s_0x3D_lsl24, "movi v22.2s,  #0x3D, LSL #24", 22, 23)
   3077 GEN_TWOVEC_TEST(mvni_4s_0x5A_lsl0,  "mvni v22.4s,  #0x5A, LSL #0",  22, 23)
   3078 GEN_TWOVEC_TEST(mvni_4s_0x6B_lsl8,  "mvni v22.4s,  #0x6B, LSL #8",  22, 23)
   3079 GEN_TWOVEC_TEST(mvni_4s_0x49_lsl16, "mvni v22.4s,  #0x49, LSL #16", 22, 23)
   3080 GEN_TWOVEC_TEST(mvni_4s_0x3D_lsl24, "mvni v22.4s,  #0x3D, LSL #24", 22, 23)
   3081 GEN_TWOVEC_TEST(mvni_2s_0x5A_lsl0,  "mvni v22.2s,  #0x5A, LSL #0",  22, 23)
   3082 GEN_TWOVEC_TEST(mvni_2s_0x6B_lsl8,  "mvni v22.2s,  #0x6B, LSL #8",  22, 23)
   3083 GEN_TWOVEC_TEST(mvni_2s_0x49_lsl16, "mvni v22.2s,  #0x49, LSL #16", 22, 23)
   3084 GEN_TWOVEC_TEST(mvni_2s_0x3D_lsl24, "mvni v22.2s,  #0x3D, LSL #24", 22, 23)
   3085 
   3086 /* overkill -- don't need two vecs, only one */
   3087 GEN_TWOVEC_TEST(movi_4s_0x6B_msl8,  "movi v22.4s,  #0x6B, MSL #8", 22, 23)
   3088 GEN_TWOVEC_TEST(movi_4s_0x94_msl16, "movi v22.4s,  #0x94, MSL #16", 22, 23)
   3089 GEN_TWOVEC_TEST(movi_2s_0x7A_msl8,  "movi v22.2s,  #0x7A, MSL #8", 22, 23)
   3090 GEN_TWOVEC_TEST(movi_2s_0xA5_msl16, "movi v22.2s,  #0xA5, MSL #16", 22, 23)
   3091 GEN_TWOVEC_TEST(mvni_4s_0x6B_msl8,  "mvni v22.4s,  #0x6B, MSL #8", 22, 23)
   3092 GEN_TWOVEC_TEST(mvni_4s_0x94_msl16, "mvni v22.4s,  #0x94, MSL #16", 22, 23)
   3093 GEN_TWOVEC_TEST(mvni_2s_0x7A_msl8,  "mvni v22.2s,  #0x7A, MSL #8", 22, 23)
   3094 GEN_TWOVEC_TEST(mvni_2s_0xA5_msl16, "mvni v22.2s,  #0xA5, MSL #16", 22, 23)
   3095 
   3096 GEN_TWOVEC_TEST(movi_d_0xA5,  "movi d22,    #0xFF00FF0000FF00FF", 22, 23)
   3097 GEN_TWOVEC_TEST(movi_2d_0xB4, "movi v22.2d, #0xFF00FFFF00FF0000", 22, 23)
   3098 
   3099 GEN_UNARY_TEST(not, 16b, 16b)
   3100 GEN_UNARY_TEST(not, 8b,  8b)
   3101 
   3102 GEN_BINARY_TEST(pmul, 16b, 16b, 16b)
   3103 GEN_BINARY_TEST(pmul, 8b, 8b, 8b)
   3104 
   3105 GEN_BINARY_TEST(pmull,  8h, 8b,  8b)
   3106 GEN_BINARY_TEST(pmull2, 8h, 16b, 16b)
   3107 GEN_BINARY_TEST(pmull,  1q, 1d,  1d)
   3108 GEN_BINARY_TEST(pmull2, 1q, 2d,  2d)
   3109 
   3110 GEN_UNARY_TEST(rbit, 16b, 16b)
   3111 GEN_UNARY_TEST(rbit, 8b, 8b)
   3112 GEN_UNARY_TEST(rev16, 16b, 16b)
   3113 GEN_UNARY_TEST(rev16, 8b, 8b)
   3114 GEN_UNARY_TEST(rev32, 16b, 16b)
   3115 GEN_UNARY_TEST(rev32, 8b, 8b)
   3116 GEN_UNARY_TEST(rev32, 8h, 8h)
   3117 GEN_UNARY_TEST(rev32, 4h, 4h)
   3118 GEN_UNARY_TEST(rev64, 16b, 16b)
   3119 GEN_UNARY_TEST(rev64, 8b, 8b)
   3120 GEN_UNARY_TEST(rev64, 8h, 8h)
   3121 GEN_UNARY_TEST(rev64, 4h, 4h)
   3122 GEN_UNARY_TEST(rev64, 4s, 4s)
   3123 GEN_UNARY_TEST(rev64, 2s, 2s)
   3124 
   3125 GEN_BINARY_TEST(saba, 4s, 4s, 4s)
   3126 GEN_BINARY_TEST(saba, 2s, 2s, 2s)
   3127 GEN_BINARY_TEST(saba, 8h, 8h, 8h)
   3128 GEN_BINARY_TEST(saba, 4h, 4h, 4h)
   3129 GEN_BINARY_TEST(saba, 16b, 16b, 16b)
   3130 GEN_BINARY_TEST(saba, 8b, 8b, 8b)
   3131 GEN_BINARY_TEST(uaba, 4s, 4s, 4s)
   3132 GEN_BINARY_TEST(uaba, 2s, 2s, 2s)
   3133 GEN_BINARY_TEST(uaba, 8h, 8h, 8h)
   3134 GEN_BINARY_TEST(uaba, 4h, 4h, 4h)
   3135 GEN_BINARY_TEST(uaba, 16b, 16b, 16b)
   3136 GEN_BINARY_TEST(uaba, 8b, 8b, 8b)
   3137 
   3138 GEN_THREEVEC_TEST(sabal_2d_2s_2s,  "sabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3139 GEN_THREEVEC_TEST(sabal2_2d_4s_4s, "sabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3140 GEN_THREEVEC_TEST(sabal_4s_4h_4h,  "sabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3141 GEN_THREEVEC_TEST(sabal2_4s_8h_8h, "sabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3142 GEN_THREEVEC_TEST(sabal_8h_8b_8b,  "sabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3143 GEN_THREEVEC_TEST(sabal2_8h_16b_16b,
   3144                                    "sabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3145 GEN_THREEVEC_TEST(uabal_2d_2s_2s,  "uabal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3146 GEN_THREEVEC_TEST(uabal2_2d_4s_4s, "uabal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3147 GEN_THREEVEC_TEST(uabal_4s_4h_4h,  "uabal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3148 GEN_THREEVEC_TEST(uabal2_4s_8h_8h, "uabal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3149 GEN_THREEVEC_TEST(uabal_8h_8b_8b,  "uabal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3150 GEN_THREEVEC_TEST(uabal2_8h_16b_16b,
   3151                                    "uabal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3152 
   3153 GEN_THREEVEC_TEST(sabd_4s_4s_4s,    "sabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3154 GEN_THREEVEC_TEST(sabd_2s_2s_2s,    "sabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3155 GEN_THREEVEC_TEST(sabd_8h_8h_8h,    "sabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3156 GEN_THREEVEC_TEST(sabd_4h_4h_4h,    "sabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3157 GEN_THREEVEC_TEST(sabd_16b_16b_16b, "sabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3158 GEN_THREEVEC_TEST(sabd_8b_8b_8b,    "sabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3159 GEN_THREEVEC_TEST(uabd_4s_4s_4s,    "uabd v2.4s, v11.4s, v29.4s", 2, 11, 29)
   3160 GEN_THREEVEC_TEST(uabd_2s_2s_2s,    "uabd v2.2s, v11.2s, v29.2s", 2, 11, 29)
   3161 GEN_THREEVEC_TEST(uabd_8h_8h_8h,    "uabd v2.8h, v11.8h, v29.8h", 2, 11, 29)
   3162 GEN_THREEVEC_TEST(uabd_4h_4h_4h,    "uabd v2.4h, v11.4h, v29.4h", 2, 11, 29)
   3163 GEN_THREEVEC_TEST(uabd_16b_16b_16b, "uabd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3164 GEN_THREEVEC_TEST(uabd_8b_8b_8b,    "uabd v2.8b, v11.8b, v29.8b", 2, 11, 29)
   3165 
   3166 GEN_THREEVEC_TEST(sabdl_2d_2s_2s,  "sabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3167 GEN_THREEVEC_TEST(sabdl2_2d_4s_4s, "sabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3168 GEN_THREEVEC_TEST(sabdl_4s_4h_4h,  "sabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3169 GEN_THREEVEC_TEST(sabdl2_4s_8h_8h, "sabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3170 GEN_THREEVEC_TEST(sabdl_8h_8b_8b,  "sabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3171 GEN_THREEVEC_TEST(sabdl2_8h_16b_16b,
   3172                                    "sabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3173 GEN_THREEVEC_TEST(uabdl_2d_2s_2s,  "uabdl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3174 GEN_THREEVEC_TEST(uabdl2_2d_4s_4s, "uabdl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3175 GEN_THREEVEC_TEST(uabdl_4s_4h_4h,  "uabdl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3176 GEN_THREEVEC_TEST(uabdl2_4s_8h_8h, "uabdl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3177 GEN_THREEVEC_TEST(uabdl_8h_8b_8b,  "uabdl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3178 GEN_THREEVEC_TEST(uabdl2_8h_16b_16b,
   3179                                    "uabdl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3180 
   3181 GEN_TWOVEC_TEST(sadalp_4h_8b,  "sadalp v3.4h, v19.8b",  3, 19)
   3182 GEN_TWOVEC_TEST(sadalp_8h_16b, "sadalp v3.8h, v19.16b", 3, 19)
   3183 GEN_TWOVEC_TEST(sadalp_2s_4h,  "sadalp v3.2s, v19.4h",  3, 19)
   3184 GEN_TWOVEC_TEST(sadalp_4s_8h,  "sadalp v3.4s, v19.8h",  3, 19)
   3185 GEN_TWOVEC_TEST(sadalp_1d_2s,  "sadalp v3.1d, v19.2s",  3, 19)
   3186 GEN_TWOVEC_TEST(sadalp_2d_4s,  "sadalp v3.2d, v19.4s",  3, 19)
   3187 GEN_TWOVEC_TEST(uadalp_4h_8b,  "uadalp v3.4h, v19.8b",  3, 19)
   3188 GEN_TWOVEC_TEST(uadalp_8h_16b, "uadalp v3.8h, v19.16b", 3, 19)
   3189 GEN_TWOVEC_TEST(uadalp_2s_4h,  "uadalp v3.2s, v19.4h",  3, 19)
   3190 GEN_TWOVEC_TEST(uadalp_4s_8h,  "uadalp v3.4s, v19.8h",  3, 19)
   3191 GEN_TWOVEC_TEST(uadalp_1d_2s,  "uadalp v3.1d, v19.2s",  3, 19)
   3192 GEN_TWOVEC_TEST(uadalp_2d_4s,  "uadalp v3.2d, v19.4s",  3, 19)
   3193 
   3194 GEN_THREEVEC_TEST(saddl_2d_2s_2s,  "saddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3195 GEN_THREEVEC_TEST(saddl2_2d_4s_4s, "saddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3196 GEN_THREEVEC_TEST(saddl_4s_4h_4h,  "saddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3197 GEN_THREEVEC_TEST(saddl2_4s_8h_8h, "saddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3198 GEN_THREEVEC_TEST(saddl_8h_8b_8b,  "saddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3199 GEN_THREEVEC_TEST(saddl2_8h_16b_16b,
   3200                                    "saddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3201 GEN_THREEVEC_TEST(uaddl_2d_2s_2s,  "uaddl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3202 GEN_THREEVEC_TEST(uaddl2_2d_4s_4s, "uaddl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3203 GEN_THREEVEC_TEST(uaddl_4s_4h_4h,  "uaddl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3204 GEN_THREEVEC_TEST(uaddl2_4s_8h_8h, "uaddl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3205 GEN_THREEVEC_TEST(uaddl_8h_8b_8b,  "uaddl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3206 GEN_THREEVEC_TEST(uaddl2_8h_16b_16b,
   3207                                    "uaddl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3208 GEN_THREEVEC_TEST(ssubl_2d_2s_2s,  "ssubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3209 GEN_THREEVEC_TEST(ssubl2_2d_4s_4s, "ssubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3210 GEN_THREEVEC_TEST(ssubl_4s_4h_4h,  "ssubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3211 GEN_THREEVEC_TEST(ssubl2_4s_8h_8h, "ssubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3212 GEN_THREEVEC_TEST(ssubl_8h_8b_8b,  "ssubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3213 GEN_THREEVEC_TEST(ssubl2_8h_16b_16b,
   3214                                    "ssubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3215 GEN_THREEVEC_TEST(usubl_2d_2s_2s,  "usubl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3216 GEN_THREEVEC_TEST(usubl2_2d_4s_4s, "usubl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3217 GEN_THREEVEC_TEST(usubl_4s_4h_4h,  "usubl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3218 GEN_THREEVEC_TEST(usubl2_4s_8h_8h, "usubl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3219 GEN_THREEVEC_TEST(usubl_8h_8b_8b,  "usubl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3220 GEN_THREEVEC_TEST(usubl2_8h_16b_16b,
   3221                                    "usubl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3222 
   3223 GEN_TWOVEC_TEST(saddlp_4h_8b,  "saddlp v3.4h, v19.8b",  3, 19)
   3224 GEN_TWOVEC_TEST(saddlp_8h_16b, "saddlp v3.8h, v19.16b", 3, 19)
   3225 GEN_TWOVEC_TEST(saddlp_2s_4h,  "saddlp v3.2s, v19.4h",  3, 19)
   3226 GEN_TWOVEC_TEST(saddlp_4s_8h,  "saddlp v3.4s, v19.8h",  3, 19)
   3227 GEN_TWOVEC_TEST(saddlp_1d_2s,  "saddlp v3.1d, v19.2s",  3, 19)
   3228 GEN_TWOVEC_TEST(saddlp_2d_4s,  "saddlp v3.2d, v19.4s",  3, 19)
   3229 GEN_TWOVEC_TEST(uaddlp_4h_8b,  "uaddlp v3.4h, v19.8b",  3, 19)
   3230 GEN_TWOVEC_TEST(uaddlp_8h_16b, "uaddlp v3.8h, v19.16b", 3, 19)
   3231 GEN_TWOVEC_TEST(uaddlp_2s_4h,  "uaddlp v3.2s, v19.4h",  3, 19)
   3232 GEN_TWOVEC_TEST(uaddlp_4s_8h,  "uaddlp v3.4s, v19.8h",  3, 19)
   3233 GEN_TWOVEC_TEST(uaddlp_1d_2s,  "uaddlp v3.1d, v19.2s",  3, 19)
   3234 GEN_TWOVEC_TEST(uaddlp_2d_4s,  "uaddlp v3.2d, v19.4s",  3, 19)
   3235 
   3236 GEN_TWOVEC_TEST(saddlv_h_16b, "saddlv h3, v19.16b",  3, 19)
   3237 GEN_TWOVEC_TEST(saddlv_h_8b,  "saddlv h3, v19.8b",   3, 19)
   3238 GEN_TWOVEC_TEST(saddlv_s_8h,  "saddlv s3, v19.8h",   3, 19)
   3239 GEN_TWOVEC_TEST(saddlv_s_4h,  "saddlv s3, v19.4h",   3, 19)
   3240 GEN_TWOVEC_TEST(saddlv_d_4s,  "saddlv d3, v19.4s",   3, 19)
   3241 GEN_TWOVEC_TEST(uaddlv_h_16b, "uaddlv h3, v19.16b",  3, 19)
   3242 GEN_TWOVEC_TEST(uaddlv_h_8b,  "uaddlv h3, v19.8b",   3, 19)
   3243 GEN_TWOVEC_TEST(uaddlv_s_8h,  "uaddlv s3, v19.8h",   3, 19)
   3244 GEN_TWOVEC_TEST(uaddlv_s_4h,  "uaddlv s3, v19.4h",   3, 19)
   3245 GEN_TWOVEC_TEST(uaddlv_d_4s,  "uaddlv d3, v19.4s",   3, 19)
   3246 
   3247 GEN_THREEVEC_TEST(saddw2_8h_8h_16b, "saddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3248 GEN_THREEVEC_TEST(saddw_8h_8h_8b,   "saddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3249 GEN_THREEVEC_TEST(saddw2_4s_4s_8h,  "saddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3250 GEN_THREEVEC_TEST(saddw_4s_4s_4h,   "saddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3251 GEN_THREEVEC_TEST(saddw2_2d_2d_4s,  "saddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3252 GEN_THREEVEC_TEST(saddw_2d_2d_2s,   "saddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3253 GEN_THREEVEC_TEST(uaddw2_8h_8h_16b, "uaddw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3254 GEN_THREEVEC_TEST(uaddw_8h_8h_8b,   "uaddw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3255 GEN_THREEVEC_TEST(uaddw2_4s_4s_8h,  "uaddw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3256 GEN_THREEVEC_TEST(uaddw_4s_4s_4h,   "uaddw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3257 GEN_THREEVEC_TEST(uaddw2_2d_2d_4s,  "uaddw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3258 GEN_THREEVEC_TEST(uaddw_2d_2d_2s,   "uaddw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3259 GEN_THREEVEC_TEST(ssubw2_8h_8h_16b, "ssubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3260 GEN_THREEVEC_TEST(ssubw_8h_8h_8b,   "ssubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3261 GEN_THREEVEC_TEST(ssubw2_4s_4s_8h,  "ssubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3262 GEN_THREEVEC_TEST(ssubw_4s_4s_4h,   "ssubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3263 GEN_THREEVEC_TEST(ssubw2_2d_2d_4s,  "ssubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3264 GEN_THREEVEC_TEST(ssubw_2d_2d_2s,   "ssubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3265 GEN_THREEVEC_TEST(usubw2_8h_8h_16b, "usubw2 v5.8h, v13.8h, v31.16b", 5, 13, 31)
   3266 GEN_THREEVEC_TEST(usubw_8h_8h_8b,   "usubw  v5.8h, v13.8h, v31.8b",  5, 13, 31)
   3267 GEN_THREEVEC_TEST(usubw2_4s_4s_8h,  "usubw2 v5.4s, v13.4s, v31.8h",  5, 13, 31)
   3268 GEN_THREEVEC_TEST(usubw_4s_4s_4h,   "usubw  v5.4s, v13.4s, v31.4h",  5, 13, 31)
   3269 GEN_THREEVEC_TEST(usubw2_2d_2d_4s,  "usubw2 v5.2d, v13.2d, v31.4s",  5, 13, 31)
   3270 GEN_THREEVEC_TEST(usubw_2d_2d_2s,   "usubw  v5.2d, v13.2d, v31.2s",  5, 13, 31)
   3271 
   3272 GEN_THREEVEC_TEST(shadd_4s_4s_4s,   "shadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3273 GEN_THREEVEC_TEST(shadd_2s_2s_2s,   "shadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3274 GEN_THREEVEC_TEST(shadd_8h_8h_8h,   "shadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3275 GEN_THREEVEC_TEST(shadd_4h_4h_4h,   "shadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3276 GEN_THREEVEC_TEST(shadd_16b_16b_16b,"shadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3277 GEN_THREEVEC_TEST(shadd_8b_8b_8b,   "shadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3278 GEN_THREEVEC_TEST(uhadd_4s_4s_4s,   "uhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3279 GEN_THREEVEC_TEST(uhadd_2s_2s_2s,   "uhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3280 GEN_THREEVEC_TEST(uhadd_8h_8h_8h,   "uhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3281 GEN_THREEVEC_TEST(uhadd_4h_4h_4h,   "uhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3282 GEN_THREEVEC_TEST(uhadd_16b_16b_16b,"uhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3283 GEN_THREEVEC_TEST(uhadd_8b_8b_8b,   "uhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3284 GEN_THREEVEC_TEST(shsub_4s_4s_4s,   "shsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3285 GEN_THREEVEC_TEST(shsub_2s_2s_2s,   "shsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3286 GEN_THREEVEC_TEST(shsub_8h_8h_8h,   "shsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3287 GEN_THREEVEC_TEST(shsub_4h_4h_4h,   "shsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3288 GEN_THREEVEC_TEST(shsub_16b_16b_16b,"shsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3289 GEN_THREEVEC_TEST(shsub_8b_8b_8b,   "shsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3290 GEN_THREEVEC_TEST(uhsub_4s_4s_4s,   "uhsub v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   3291 GEN_THREEVEC_TEST(uhsub_2s_2s_2s,   "uhsub v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   3292 GEN_THREEVEC_TEST(uhsub_8h_8h_8h,   "uhsub v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   3293 GEN_THREEVEC_TEST(uhsub_4h_4h_4h,   "uhsub v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   3294 GEN_THREEVEC_TEST(uhsub_16b_16b_16b,"uhsub v2.16b, v11.16b, v29.16b", 2, 11, 29)
   3295 GEN_THREEVEC_TEST(uhsub_8b_8b_8b,   "uhsub v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   3296 
   3297 GEN_TWOVEC_TEST(shll_8h_8b_8,   "shll  v3.8h, v24.8b,  #8", 3, 24)
   3298 GEN_TWOVEC_TEST(shll2_8h_16b_8, "shll2 v3.8h, v24.16b, #8", 3, 24)
   3299 GEN_TWOVEC_TEST(shll_4s_4h_16,  "shll  v3.4s, v24.4h, #16", 3, 24)
   3300 GEN_TWOVEC_TEST(shll2_4s_8h_16, "shll2 v3.4s, v24.8h, #16", 3, 24)
   3301 GEN_TWOVEC_TEST(shll_2d_2s_32,  "shll  v3.2d, v24.2s, #32", 3, 24)
   3302 GEN_TWOVEC_TEST(shll2_2d_4s_32, "shll2 v3.2d, v24.4s, #32", 3, 24)
   3303 
   3304 GEN_TWOVEC_TEST(shrn_2s_2d_1,   "shrn  v4.2s,  v29.2d, #1",  4, 29)
   3305 GEN_TWOVEC_TEST(shrn_2s_2d_32,  "shrn  v4.2s,  v29.2d, #32", 4, 29)
   3306 GEN_TWOVEC_TEST(shrn2_4s_2d_1,  "shrn2 v4.4s,  v29.2d, #1",  4, 29)
   3307 GEN_TWOVEC_TEST(shrn2_4s_2d_32, "shrn2 v4.4s,  v29.2d, #32", 4, 29)
   3308 GEN_TWOVEC_TEST(shrn_4h_4s_1,   "shrn  v4.4h,  v29.4s, #1",  4, 29)
   3309 GEN_TWOVEC_TEST(shrn_4h_4s_16,  "shrn  v4.4h,  v29.4s, #16", 4, 29)
   3310 GEN_TWOVEC_TEST(shrn2_8h_4s_1,  "shrn2 v4.8h,  v29.4s, #1",  4, 29)
   3311 GEN_TWOVEC_TEST(shrn2_8h_4s_16, "shrn2 v4.8h,  v29.4s, #16", 4, 29)
   3312 GEN_TWOVEC_TEST(shrn_8b_8h_1,   "shrn  v4.8b,  v29.8h, #1",  4, 29)
   3313 GEN_TWOVEC_TEST(shrn_8b_8h_8,   "shrn  v4.8b,  v29.8h, #8",  4, 29)
   3314 GEN_TWOVEC_TEST(shrn2_16b_8h_1, "shrn2 v4.16b, v29.8h, #1",  4, 29)
   3315 GEN_TWOVEC_TEST(shrn2_16b_8h_8, "shrn2 v4.16b, v29.8h, #8",  4, 29)
   3316 GEN_TWOVEC_TEST(rshrn_2s_2d_1,   "rshrn  v4.2s,  v29.2d, #1",  4, 29)
   3317 GEN_TWOVEC_TEST(rshrn_2s_2d_32,  "rshrn  v4.2s,  v29.2d, #32", 4, 29)
   3318 GEN_TWOVEC_TEST(rshrn2_4s_2d_1,  "rshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3319 GEN_TWOVEC_TEST(rshrn2_4s_2d_32, "rshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3320 GEN_TWOVEC_TEST(rshrn_4h_4s_1,   "rshrn  v4.4h,  v29.4s, #1",  4, 29)
   3321 GEN_TWOVEC_TEST(rshrn_4h_4s_16,  "rshrn  v4.4h,  v29.4s, #16", 4, 29)
   3322 GEN_TWOVEC_TEST(rshrn2_8h_4s_1,  "rshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3323 GEN_TWOVEC_TEST(rshrn2_8h_4s_16, "rshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3324 GEN_TWOVEC_TEST(rshrn_8b_8h_1,   "rshrn  v4.8b,  v29.8h, #1",  4, 29)
   3325 GEN_TWOVEC_TEST(rshrn_8b_8h_8,   "rshrn  v4.8b,  v29.8h, #8",  4, 29)
   3326 GEN_TWOVEC_TEST(rshrn2_16b_8h_1, "rshrn2 v4.16b, v29.8h, #1",  4, 29)
   3327 GEN_TWOVEC_TEST(rshrn2_16b_8h_8, "rshrn2 v4.16b, v29.8h, #8",  4, 29)
   3328 
   3329 GEN_TWOVEC_TEST(sli_d_d_0,  "sli d5, d28, #0",  5, 28)
   3330 GEN_TWOVEC_TEST(sli_d_d_32, "sli d5, d28, #32", 5, 28)
   3331 GEN_TWOVEC_TEST(sli_d_d_63, "sli d5, d28, #63", 5, 28)
   3332 GEN_TWOVEC_TEST(sri_d_d_1,  "sri d5, d28, #1",  5, 28)
   3333 GEN_TWOVEC_TEST(sri_d_d_33, "sri d5, d28, #33", 5, 28)
   3334 GEN_TWOVEC_TEST(sri_d_d_64, "sri d5, d28, #64", 5, 28)
   3335 
   3336 GEN_TWOVEC_TEST(sli_2d_2d_0,   "sli v6.2d,  v27.2d, #0",  6, 27)
   3337 GEN_TWOVEC_TEST(sli_2d_2d_32,  "sli v6.2d,  v27.2d, #32", 6, 27)
   3338 GEN_TWOVEC_TEST(sli_2d_2d_63,  "sli v6.2d,  v27.2d, #63", 6, 27)
   3339 GEN_TWOVEC_TEST(sli_4s_4s_0,   "sli v6.4s,  v27.4s, #0",  6, 27)
   3340 GEN_TWOVEC_TEST(sli_4s_4s_16,  "sli v6.4s,  v27.4s, #16", 6, 27)
   3341 GEN_TWOVEC_TEST(sli_4s_4s_31,  "sli v6.4s,  v27.4s, #31", 6, 27)
   3342 GEN_TWOVEC_TEST(sli_2s_2s_0,   "sli v6.2s,  v27.2s, #0",  6, 27)
   3343 GEN_TWOVEC_TEST(sli_2s_2s_16,  "sli v6.2s,  v27.2s, #16", 6, 27)
   3344 GEN_TWOVEC_TEST(sli_2s_2s_31,  "sli v6.2s,  v27.2s, #31", 6, 27)
   3345 GEN_TWOVEC_TEST(sli_8h_8h_0,   "sli v6.8h,  v27.8h, #0",  6, 27)
   3346 GEN_TWOVEC_TEST(sli_8h_8h_8,   "sli v6.8h,  v27.8h, #8",  6, 27)
   3347 GEN_TWOVEC_TEST(sli_8h_8h_15,  "sli v6.8h,  v27.8h, #15", 6, 27)
   3348 GEN_TWOVEC_TEST(sli_4h_4h_0,   "sli v6.4h,  v27.4h, #0",  6, 27)
   3349 GEN_TWOVEC_TEST(sli_4h_4h_8,   "sli v6.4h,  v27.4h, #8",  6, 27)
   3350 GEN_TWOVEC_TEST(sli_4h_4h_15,  "sli v6.4h,  v27.4h, #15", 6, 27)
   3351 GEN_TWOVEC_TEST(sli_16b_16b_0, "sli v6.16b, v27.16b, #0", 6, 27)
   3352 GEN_TWOVEC_TEST(sli_16b_16b_3, "sli v6.16b, v27.16b, #3", 6, 27)
   3353 GEN_TWOVEC_TEST(sli_16b_16b_7, "sli v6.16b, v27.16b, #7", 6, 27)
   3354 GEN_TWOVEC_TEST(sli_8b_8b_0,   "sli v6.8b,  v27.8b, #0",  6, 27)
   3355 GEN_TWOVEC_TEST(sli_8b_8b_3,   "sli v6.8b,  v27.8b, #3",  6, 27)
   3356 GEN_TWOVEC_TEST(sli_8b_8b_7,   "sli v6.8b,  v27.8b, #7",  6, 27)
   3357 GEN_TWOVEC_TEST(sri_2d_2d_1,   "sri v6.2d,  v27.2d,  #1",  6, 27)
   3358 GEN_TWOVEC_TEST(sri_2d_2d_33,  "sri v6.2d,  v27.2d,  #33", 6, 27)
   3359 GEN_TWOVEC_TEST(sri_2d_2d_64,  "sri v6.2d,  v27.2d,  #64", 6, 27)
   3360 GEN_TWOVEC_TEST(sri_4s_4s_1,   "sri v6.4s,  v27.4s,  #1",  6, 27)
   3361 GEN_TWOVEC_TEST(sri_4s_4s_17,  "sri v6.4s,  v27.4s,  #17", 6, 27)
   3362 GEN_TWOVEC_TEST(sri_4s_4s_32,  "sri v6.4s,  v27.4s,  #32", 6, 27)
   3363 GEN_TWOVEC_TEST(sri_2s_2s_1,   "sri v6.2s,  v27.2s,  #1",  6, 27)
   3364 GEN_TWOVEC_TEST(sri_2s_2s_17,  "sri v6.2s,  v27.2s,  #17", 6, 27)
   3365 GEN_TWOVEC_TEST(sri_2s_2s_32,  "sri v6.2s,  v27.2s,  #32", 6, 27)
   3366 GEN_TWOVEC_TEST(sri_8h_8h_1,   "sri v6.8h,  v27.8h,  #1",  6, 27)
   3367 GEN_TWOVEC_TEST(sri_8h_8h_8,   "sri v6.8h,  v27.8h,  #8",  6, 27)
   3368 GEN_TWOVEC_TEST(sri_8h_8h_16,  "sri v6.8h,  v27.8h,  #16", 6, 27)
   3369 GEN_TWOVEC_TEST(sri_4h_4h_1,   "sri v6.4h,  v27.4h,  #1",  6, 27)
   3370 GEN_TWOVEC_TEST(sri_4h_4h_8,   "sri v6.4h,  v27.4h,  #8",  6, 27)
   3371 GEN_TWOVEC_TEST(sri_4h_4h_16,  "sri v6.4h,  v27.4h,  #16", 6, 27)
   3372 GEN_TWOVEC_TEST(sri_16b_16b_1, "sri v6.16b, v27.16b, #1", 6, 27)
   3373 GEN_TWOVEC_TEST(sri_16b_16b_4, "sri v6.16b, v27.16b, #4", 6, 27)
   3374 GEN_TWOVEC_TEST(sri_16b_16b_8, "sri v6.16b, v27.16b, #8", 6, 27)
   3375 GEN_TWOVEC_TEST(sri_8b_8b_1,   "sri v6.8b,  v27.8b,  #1",  6, 27)
   3376 GEN_TWOVEC_TEST(sri_8b_8b_4,   "sri v6.8b,  v27.8b,  #4",  6, 27)
   3377 GEN_TWOVEC_TEST(sri_8b_8b_8,   "sri v6.8b,  v27.8b,  #8",  6, 27)
   3378 
   3379 GEN_BINARY_TEST(smax, 4s, 4s, 4s)
   3380 GEN_BINARY_TEST(smax, 2s, 2s, 2s)
   3381 GEN_BINARY_TEST(smax, 8h, 8h, 8h)
   3382 GEN_BINARY_TEST(smax, 4h, 4h, 4h)
   3383 GEN_BINARY_TEST(smax, 16b, 16b, 16b)
   3384 GEN_BINARY_TEST(smax, 8b, 8b, 8b)
   3385 GEN_BINARY_TEST(umax, 4s, 4s, 4s)
   3386 GEN_BINARY_TEST(umax, 2s, 2s, 2s)
   3387 GEN_BINARY_TEST(umax, 8h, 8h, 8h)
   3388 GEN_BINARY_TEST(umax, 4h, 4h, 4h)
   3389 GEN_BINARY_TEST(umax, 16b, 16b, 16b)
   3390 GEN_BINARY_TEST(umax, 8b, 8b, 8b)
   3391 GEN_BINARY_TEST(smin, 4s, 4s, 4s)
   3392 GEN_BINARY_TEST(smin, 2s, 2s, 2s)
   3393 GEN_BINARY_TEST(smin, 8h, 8h, 8h)
   3394 GEN_BINARY_TEST(smin, 4h, 4h, 4h)
   3395 GEN_BINARY_TEST(smin, 16b, 16b, 16b)
   3396 GEN_BINARY_TEST(smin, 8b, 8b, 8b)
   3397 GEN_BINARY_TEST(umin, 4s, 4s, 4s)
   3398 GEN_BINARY_TEST(umin, 2s, 2s, 2s)
   3399 GEN_BINARY_TEST(umin, 8h, 8h, 8h)
   3400 GEN_BINARY_TEST(umin, 4h, 4h, 4h)
   3401 GEN_BINARY_TEST(umin, 16b, 16b, 16b)
   3402 GEN_BINARY_TEST(umin, 8b, 8b, 8b)
   3403 
   3404 GEN_BINARY_TEST(smaxp, 4s, 4s, 4s)
   3405 GEN_BINARY_TEST(smaxp, 2s, 2s, 2s)
   3406 GEN_BINARY_TEST(smaxp, 8h, 8h, 8h)
   3407 GEN_BINARY_TEST(smaxp, 4h, 4h, 4h)
   3408 GEN_BINARY_TEST(smaxp, 16b, 16b, 16b)
   3409 GEN_BINARY_TEST(smaxp, 8b, 8b, 8b)
   3410 GEN_BINARY_TEST(umaxp, 4s, 4s, 4s)
   3411 GEN_BINARY_TEST(umaxp, 2s, 2s, 2s)
   3412 GEN_BINARY_TEST(umaxp, 8h, 8h, 8h)
   3413 GEN_BINARY_TEST(umaxp, 4h, 4h, 4h)
   3414 GEN_BINARY_TEST(umaxp, 16b, 16b, 16b)
   3415 GEN_BINARY_TEST(umaxp, 8b, 8b, 8b)
   3416 GEN_BINARY_TEST(sminp, 4s, 4s, 4s)
   3417 GEN_BINARY_TEST(sminp, 2s, 2s, 2s)
   3418 GEN_BINARY_TEST(sminp, 8h, 8h, 8h)
   3419 GEN_BINARY_TEST(sminp, 4h, 4h, 4h)
   3420 GEN_BINARY_TEST(sminp, 16b, 16b, 16b)
   3421 GEN_BINARY_TEST(sminp, 8b, 8b, 8b)
   3422 GEN_BINARY_TEST(uminp, 4s, 4s, 4s)
   3423 GEN_BINARY_TEST(uminp, 2s, 2s, 2s)
   3424 GEN_BINARY_TEST(uminp, 8h, 8h, 8h)
   3425 GEN_BINARY_TEST(uminp, 4h, 4h, 4h)
   3426 GEN_BINARY_TEST(uminp, 16b, 16b, 16b)
   3427 GEN_BINARY_TEST(uminp, 8b, 8b, 8b)
   3428 
   3429 // test_SMAXV is a handwritten function
   3430 // test_UMAXV is a handwritten function
   3431 // test_SMINV is a handwritten function
   3432 // test_UMINV is a handwritten function
   3433 
   3434 GEN_THREEVEC_TEST(smlal_2d_2s_s0,  "smlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3435 GEN_THREEVEC_TEST(smlal_2d_2s_s3,  "smlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3436 GEN_THREEVEC_TEST(smlal2_2d_4s_s1, "smlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3437 GEN_THREEVEC_TEST(smlal2_2d_4s_s2, "smlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3438 GEN_THREEVEC_TEST(smlal_4s_4h_h0,  "smlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3439 GEN_THREEVEC_TEST(smlal_4s_4h_h7,  "smlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3440 GEN_THREEVEC_TEST(smlal2_4s_8h_h1, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3441 GEN_THREEVEC_TEST(smlal2_4s_8h_h4, "smlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3442 GEN_THREEVEC_TEST(umlal_2d_2s_s0,  "umlal  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3443 GEN_THREEVEC_TEST(umlal_2d_2s_s3,  "umlal  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3444 GEN_THREEVEC_TEST(umlal2_2d_4s_s1, "umlal2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3445 GEN_THREEVEC_TEST(umlal2_2d_4s_s2, "umlal2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3446 GEN_THREEVEC_TEST(umlal_4s_4h_h0,  "umlal  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3447 GEN_THREEVEC_TEST(umlal_4s_4h_h7,  "umlal  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3448 GEN_THREEVEC_TEST(umlal2_4s_8h_h1, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3449 GEN_THREEVEC_TEST(umlal2_4s_8h_h4, "umlal2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3450 GEN_THREEVEC_TEST(smlsl_2d_2s_s0,  "smlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3451 GEN_THREEVEC_TEST(smlsl_2d_2s_s3,  "smlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3452 GEN_THREEVEC_TEST(smlsl2_2d_4s_s1, "smlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3453 GEN_THREEVEC_TEST(smlsl2_2d_4s_s2, "smlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3454 GEN_THREEVEC_TEST(smlsl_4s_4h_h0,  "smlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3455 GEN_THREEVEC_TEST(smlsl_4s_4h_h7,  "smlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3456 GEN_THREEVEC_TEST(smlsl2_4s_8h_h1, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3457 GEN_THREEVEC_TEST(smlsl2_4s_8h_h4, "smlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3458 GEN_THREEVEC_TEST(umlsl_2d_2s_s0,  "umlsl  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3459 GEN_THREEVEC_TEST(umlsl_2d_2s_s3,  "umlsl  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3460 GEN_THREEVEC_TEST(umlsl2_2d_4s_s1, "umlsl2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3461 GEN_THREEVEC_TEST(umlsl2_2d_4s_s2, "umlsl2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3462 GEN_THREEVEC_TEST(umlsl_4s_4h_h0,  "umlsl  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3463 GEN_THREEVEC_TEST(umlsl_4s_4h_h7,  "umlsl  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3464 GEN_THREEVEC_TEST(umlsl2_4s_8h_h1, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3465 GEN_THREEVEC_TEST(umlsl2_4s_8h_h4, "umlsl2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3466 GEN_THREEVEC_TEST(smull_2d_2s_s0,  "smull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3467 GEN_THREEVEC_TEST(smull_2d_2s_s3,  "smull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3468 GEN_THREEVEC_TEST(smull2_2d_4s_s1, "smull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3469 GEN_THREEVEC_TEST(smull2_2d_4s_s2, "smull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3470 GEN_THREEVEC_TEST(smull_4s_4h_h0,  "smull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3471 GEN_THREEVEC_TEST(smull_4s_4h_h7,  "smull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3472 GEN_THREEVEC_TEST(smull2_4s_8h_h1, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3473 GEN_THREEVEC_TEST(smull2_4s_8h_h4, "smull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3474 GEN_THREEVEC_TEST(umull_2d_2s_s0,  "umull  v29.2d, v20.2s, v3.s[0]", 29, 20, 3)
   3475 GEN_THREEVEC_TEST(umull_2d_2s_s3,  "umull  v29.2d, v20.2s, v3.s[3]", 29, 20, 3)
   3476 GEN_THREEVEC_TEST(umull2_2d_4s_s1, "umull2 v29.2d, v20.4s, v3.s[1]", 29, 20, 3)
   3477 GEN_THREEVEC_TEST(umull2_2d_4s_s2, "umull2 v29.2d, v20.4s, v3.s[2]", 29, 20, 3)
   3478 GEN_THREEVEC_TEST(umull_4s_4h_h0,  "umull  v29.4s, v20.4h, v3.h[0]", 29, 20, 3)
   3479 GEN_THREEVEC_TEST(umull_4s_4h_h7,  "umull  v29.4s, v20.4h, v3.h[7]", 29, 20, 3)
   3480 GEN_THREEVEC_TEST(umull2_4s_8h_h1, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3481 GEN_THREEVEC_TEST(umull2_4s_8h_h4, "umull2 v29.4s, v20.8h, v3.h[1]", 29, 20, 3)
   3482 
   3483 GEN_THREEVEC_TEST(smlal_2d_2s_2s,  "smlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3484 GEN_THREEVEC_TEST(smlal2_2d_4s_4s, "smlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3485 GEN_THREEVEC_TEST(smlal_4s_4h_4h,  "smlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3486 GEN_THREEVEC_TEST(smlal2_4s_8h_8h, "smlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3487 GEN_THREEVEC_TEST(smlal_8h_8b_8b,  "smlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3488 GEN_THREEVEC_TEST(smlal2_8h_16b_16b,
   3489                                    "smlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3490 GEN_THREEVEC_TEST(umlal_2d_2s_2s,  "umlal  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3491 GEN_THREEVEC_TEST(umlal2_2d_4s_4s, "umlal2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3492 GEN_THREEVEC_TEST(umlal_4s_4h_4h,  "umlal  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3493 GEN_THREEVEC_TEST(umlal2_4s_8h_8h, "umlal2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3494 GEN_THREEVEC_TEST(umlal_8h_8b_8b,  "umlal  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3495 GEN_THREEVEC_TEST(umlal2_8h_16b_16b,
   3496                                    "umlal2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3497 GEN_THREEVEC_TEST(smlsl_2d_2s_2s,  "smlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3498 GEN_THREEVEC_TEST(smlsl2_2d_4s_4s, "smlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3499 GEN_THREEVEC_TEST(smlsl_4s_4h_4h,  "smlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3500 GEN_THREEVEC_TEST(smlsl2_4s_8h_8h, "smlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3501 GEN_THREEVEC_TEST(smlsl_8h_8b_8b,  "smlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3502 GEN_THREEVEC_TEST(smlsl2_8h_16b_16b,
   3503                                    "smlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3504 GEN_THREEVEC_TEST(umlsl_2d_2s_2s,  "umlsl  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3505 GEN_THREEVEC_TEST(umlsl2_2d_4s_4s, "umlsl2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3506 GEN_THREEVEC_TEST(umlsl_4s_4h_4h,  "umlsl  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3507 GEN_THREEVEC_TEST(umlsl2_4s_8h_8h, "umlsl2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3508 GEN_THREEVEC_TEST(umlsl_8h_8b_8b,  "umlsl  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3509 GEN_THREEVEC_TEST(umlsl2_8h_16b_16b,
   3510                                    "umlsl2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3511 GEN_THREEVEC_TEST(smull_2d_2s_2s,  "smull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3512 GEN_THREEVEC_TEST(smull2_2d_4s_4s, "smull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3513 GEN_THREEVEC_TEST(smull_4s_4h_4h,  "smull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3514 GEN_THREEVEC_TEST(smull2_4s_8h_8h, "smull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3515 GEN_THREEVEC_TEST(smull_8h_8b_8b,  "smull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3516 GEN_THREEVEC_TEST(smull2_8h_16b_16b,
   3517                                    "smull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3518 GEN_THREEVEC_TEST(umull_2d_2s_2s,  "umull  v2.2d, v11.2s, v29.2s", 2, 11, 29)
   3519 GEN_THREEVEC_TEST(umull2_2d_4s_4s, "umull2 v2.2d, v11.4s, v29.4s", 2, 11, 29)
   3520 GEN_THREEVEC_TEST(umull_4s_4h_4h,  "umull  v2.4s, v11.4h, v29.4h", 2, 11, 29)
   3521 GEN_THREEVEC_TEST(umull2_4s_8h_8h, "umull2 v2.4s, v11.8h, v29.8h", 2, 11, 29)
   3522 GEN_THREEVEC_TEST(umull_8h_8b_8b,  "umull  v2.8h, v11.8b, v29.8b", 2, 11, 29)
   3523 GEN_THREEVEC_TEST(umull2_8h_16b_16b,
   3524                                    "umull2 v2.8h, v11.16b, v29.16b", 2, 11, 29)
   3525 
   3526 GEN_ONEINT_ONEVEC_TEST(umov_x_d0,  "umov x9, v10.d[0]", 9, 10)
   3527 GEN_ONEINT_ONEVEC_TEST(umov_x_d1,  "umov x9, v10.d[1]", 9, 10)
   3528 GEN_ONEINT_ONEVEC_TEST(umov_w_s0,  "umov w9, v10.s[0]", 9, 10)
   3529 GEN_ONEINT_ONEVEC_TEST(umov_w_s3,  "umov w9, v10.s[3]", 9, 10)
   3530 GEN_ONEINT_ONEVEC_TEST(umov_w_h0,  "umov w9, v10.h[0]", 9, 10)
   3531 GEN_ONEINT_ONEVEC_TEST(umov_w_h7,  "umov w9, v10.h[7]", 9, 10)
   3532 GEN_ONEINT_ONEVEC_TEST(umov_w_b0,  "umov w9, v10.b[0]", 9, 10)
   3533 GEN_ONEINT_ONEVEC_TEST(umov_w_b15, "umov w9, v10.b[15]", 9, 10)
   3534 GEN_ONEINT_ONEVEC_TEST(smov_x_s0,  "smov x9, v10.s[0]", 9, 10)
   3535 GEN_ONEINT_ONEVEC_TEST(smov_x_s3,  "smov x9, v10.s[3]", 9, 10)
   3536 GEN_ONEINT_ONEVEC_TEST(smov_x_h0,  "smov x9, v10.h[0]", 9, 10)
   3537 GEN_ONEINT_ONEVEC_TEST(smov_x_h7,  "smov x9, v10.h[7]", 9, 10)
   3538 GEN_ONEINT_ONEVEC_TEST(smov_w_h0,  "smov w9, v10.h[0]", 9, 10)
   3539 GEN_ONEINT_ONEVEC_TEST(smov_w_h7,  "smov w9, v10.h[7]", 9, 10)
   3540 GEN_ONEINT_ONEVEC_TEST(smov_x_b0,  "smov x9, v10.b[0]", 9, 10)
   3541 GEN_ONEINT_ONEVEC_TEST(smov_x_b15, "smov x9, v10.b[15]", 9, 10)
   3542 GEN_ONEINT_ONEVEC_TEST(smov_w_b0,  "smov w9, v10.b[0]", 9, 10)
   3543 GEN_ONEINT_ONEVEC_TEST(smov_w_b15, "smov w9, v10.b[15]", 9, 10)
   3544 
   3545 GEN_TWOVEC_TEST(sqabs_d_d, "sqabs d7, d30", 7, 30)
   3546 GEN_TWOVEC_TEST(sqabs_s_s, "sqabs s7, s30", 7, 30)
   3547 GEN_TWOVEC_TEST(sqabs_h_h, "sqabs h7, h30", 7, 30)
   3548 GEN_TWOVEC_TEST(sqabs_b_b, "sqabs b7, b30", 7, 30)
   3549 GEN_TWOVEC_TEST(sqneg_d_d, "sqneg d7, d30", 7, 30)
   3550 GEN_TWOVEC_TEST(sqneg_s_s, "sqneg s7, s30", 7, 30)
   3551 GEN_TWOVEC_TEST(sqneg_h_h, "sqneg h7, h30", 7, 30)
   3552 GEN_TWOVEC_TEST(sqneg_b_b, "sqneg b7, b30", 7, 30)
   3553 
   3554 GEN_UNARY_TEST(sqabs, 2d, 2d)
   3555 GEN_UNARY_TEST(sqabs, 4s, 4s)
   3556 GEN_UNARY_TEST(sqabs, 2s, 2s)
   3557 GEN_UNARY_TEST(sqabs, 8h, 8h)
   3558 GEN_UNARY_TEST(sqabs, 4h, 4h)
   3559 GEN_UNARY_TEST(sqabs, 16b, 16b)
   3560 GEN_UNARY_TEST(sqabs, 8b, 8b)
   3561 GEN_UNARY_TEST(sqneg, 2d, 2d)
   3562 GEN_UNARY_TEST(sqneg, 4s, 4s)
   3563 GEN_UNARY_TEST(sqneg, 2s, 2s)
   3564 GEN_UNARY_TEST(sqneg, 8h, 8h)
   3565 GEN_UNARY_TEST(sqneg, 4h, 4h)
   3566 GEN_UNARY_TEST(sqneg, 16b, 16b)
   3567 GEN_UNARY_TEST(sqneg, 8b, 8b)
   3568 
   3569 GEN_THREEVEC_TEST(sqadd_d_d_d, "sqadd d1, d2, d4", 1, 2, 4)
   3570 GEN_THREEVEC_TEST(sqadd_s_s_s, "sqadd s1, s2, s4", 1, 2, 4)
   3571 GEN_THREEVEC_TEST(sqadd_h_h_h, "sqadd h1, h2, h4", 1, 2, 4)
   3572 GEN_THREEVEC_TEST(sqadd_b_b_b, "sqadd b1, b2, b4", 1, 2, 4)
   3573 GEN_THREEVEC_TEST(uqadd_d_d_d, "uqadd d1, d2, d4", 1, 2, 4)
   3574 GEN_THREEVEC_TEST(uqadd_s_s_s, "uqadd s1, s2, s4", 1, 2, 4)
   3575 GEN_THREEVEC_TEST(uqadd_h_h_h, "uqadd h1, h2, h4", 1, 2, 4)
   3576 GEN_THREEVEC_TEST(uqadd_b_b_b, "uqadd b1, b2, b4", 1, 2, 4)
   3577 GEN_THREEVEC_TEST(sqsub_d_d_d, "sqsub d1, d2, d4", 1, 2, 4)
   3578 GEN_THREEVEC_TEST(sqsub_s_s_s, "sqsub s1, s2, s4", 1, 2, 4)
   3579 GEN_THREEVEC_TEST(sqsub_h_h_h, "sqsub h1, h2, h4", 1, 2, 4)
   3580 GEN_THREEVEC_TEST(sqsub_b_b_b, "sqsub b1, b2, b4", 1, 2, 4)
   3581 GEN_THREEVEC_TEST(uqsub_d_d_d, "uqsub d1, d2, d4", 1, 2, 4)
   3582 GEN_THREEVEC_TEST(uqsub_s_s_s, "uqsub s1, s2, s4", 1, 2, 4)
   3583 GEN_THREEVEC_TEST(uqsub_h_h_h, "uqsub h1, h2, h4", 1, 2, 4)
   3584 GEN_THREEVEC_TEST(uqsub_b_b_b, "uqsub b1, b2, b4", 1, 2, 4)
   3585 
   3586 GEN_THREEVEC_TEST(sqadd_2d_2d_2d,    "sqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3587 GEN_THREEVEC_TEST(sqadd_4s_4s_4s,    "sqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3588 GEN_THREEVEC_TEST(sqadd_2s_2s_2s,    "sqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3589 GEN_THREEVEC_TEST(sqadd_8h_8h_8h,    "sqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3590 GEN_THREEVEC_TEST(sqadd_4h_4h_4h,    "sqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3591 GEN_THREEVEC_TEST(sqadd_16b_16b_16b, "sqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3592 GEN_THREEVEC_TEST(sqadd_8b_8b_8b,    "sqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3593 GEN_THREEVEC_TEST(uqadd_2d_2d_2d,    "uqadd v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3594 GEN_THREEVEC_TEST(uqadd_4s_4s_4s,    "uqadd v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3595 GEN_THREEVEC_TEST(uqadd_2s_2s_2s,    "uqadd v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3596 GEN_THREEVEC_TEST(uqadd_8h_8h_8h,    "uqadd v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3597 GEN_THREEVEC_TEST(uqadd_4h_4h_4h,    "uqadd v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3598 GEN_THREEVEC_TEST(uqadd_16b_16b_16b, "uqadd v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3599 GEN_THREEVEC_TEST(uqadd_8b_8b_8b,    "uqadd v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3600 GEN_THREEVEC_TEST(sqsub_2d_2d_2d,    "sqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3601 GEN_THREEVEC_TEST(sqsub_4s_4s_4s,    "sqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3602 GEN_THREEVEC_TEST(sqsub_2s_2s_2s,    "sqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3603 GEN_THREEVEC_TEST(sqsub_8h_8h_8h,    "sqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3604 GEN_THREEVEC_TEST(sqsub_4h_4h_4h,    "sqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3605 GEN_THREEVEC_TEST(sqsub_16b_16b_16b, "sqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3606 GEN_THREEVEC_TEST(sqsub_8b_8b_8b,    "sqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3607 GEN_THREEVEC_TEST(uqsub_2d_2d_2d,    "uqsub v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3608 GEN_THREEVEC_TEST(uqsub_4s_4s_4s,    "uqsub v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3609 GEN_THREEVEC_TEST(uqsub_2s_2s_2s,    "uqsub v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3610 GEN_THREEVEC_TEST(uqsub_8h_8h_8h,    "uqsub v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3611 GEN_THREEVEC_TEST(uqsub_4h_4h_4h,    "uqsub v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3612 GEN_THREEVEC_TEST(uqsub_16b_16b_16b, "uqsub v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3613 GEN_THREEVEC_TEST(uqsub_8b_8b_8b,    "uqsub v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3614 
   3615 GEN_THREEVEC_TEST(sqdmlal_d_s_s0, "sqdmlal d31, s30, v29.s[0]", 31,30,29)
   3616 GEN_THREEVEC_TEST(sqdmlal_d_s_s3, "sqdmlal d31, s30, v29.s[3]", 31,30,29)
   3617 GEN_THREEVEC_TEST(sqdmlal_s_h_h1, "sqdmlal s31, h30, v13.h[1]", 31,30,13)
   3618 GEN_THREEVEC_TEST(sqdmlal_s_h_h5, "sqdmlal s31, h30, v13.h[5]", 31,30,13)
   3619 GEN_THREEVEC_TEST(sqdmlsl_d_s_s0, "sqdmlsl d31, s30, v29.s[0]", 31,30,29)
   3620 GEN_THREEVEC_TEST(sqdmlsl_d_s_s3, "sqdmlsl d31, s30, v29.s[3]", 31,30,29)
   3621 GEN_THREEVEC_TEST(sqdmlsl_s_h_h1, "sqdmlsl s31, h30, v13.h[1]", 31,30,13)
   3622 GEN_THREEVEC_TEST(sqdmlsl_s_h_h5, "sqdmlsl s31, h30, v13.h[5]", 31,30,13)
   3623 GEN_THREEVEC_TEST(sqdmull_d_s_s0, "sqdmull d31, s30, v29.s[0]", 31,30,29)
   3624 GEN_THREEVEC_TEST(sqdmull_d_s_s3, "sqdmull d31, s30, v29.s[3]", 31,30,29)
   3625 GEN_THREEVEC_TEST(sqdmull_s_h_h1, "sqdmull s31, h30, v13.h[1]", 31,30,13)
   3626 GEN_THREEVEC_TEST(sqdmull_s_h_h5, "sqdmull s31, h30, v13.h[5]", 31,30,13)
   3627 
   3628 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s0, "sqdmlal  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3629 GEN_THREEVEC_TEST(sqdmlal_2d_2s_s3, "sqdmlal  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3630 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s1,"sqdmlal2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3631 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_s2,"sqdmlal2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3632 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h0, "sqdmlal  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3633 GEN_THREEVEC_TEST(sqdmlal_4s_4h_h7, "sqdmlal  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3634 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h1,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3635 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_h4,"sqdmlal2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3636 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s0, "sqdmlsl  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3637 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_s3, "sqdmlsl  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3638 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s1,"sqdmlsl2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3639 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_s2,"sqdmlsl2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3640 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h0, "sqdmlsl  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3641 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_h7, "sqdmlsl  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3642 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h1,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3643 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_h4,"sqdmlsl2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3644 GEN_THREEVEC_TEST(sqdmull_2d_2s_s0, "sqdmull  v29.2d, v20.2s, v3.s[0]",29,20,3)
   3645 GEN_THREEVEC_TEST(sqdmull_2d_2s_s3, "sqdmull  v29.2d, v20.2s, v3.s[3]",29,20,3)
   3646 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s1,"sqdmull2 v29.2d, v20.4s, v3.s[1]",29,20,3)
   3647 GEN_THREEVEC_TEST(sqdmull2_2d_4s_s2,"sqdmull2 v29.2d, v20.4s, v3.s[2]",29,20,3)
   3648 GEN_THREEVEC_TEST(sqdmull_4s_4h_h0, "sqdmull  v29.4s, v20.4h, v3.h[0]",29,20,3)
   3649 GEN_THREEVEC_TEST(sqdmull_4s_4h_h7, "sqdmull  v29.4s, v20.4h, v3.h[7]",29,20,3)
   3650 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h1,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3651 GEN_THREEVEC_TEST(sqdmull2_4s_8h_h4,"sqdmull2 v29.4s, v20.8h, v3.h[1]",29,20,3)
   3652 
   3653 GEN_THREEVEC_TEST(sqdmlal_d_s_s, "sqdmlal d0, s8, s16", 0, 8, 16)
   3654 GEN_THREEVEC_TEST(sqdmlal_s_h_h, "sqdmlal s0, h8, h16", 0, 8, 16)
   3655 GEN_THREEVEC_TEST(sqdmlsl_d_s_s, "sqdmlsl d0, s8, s16", 0, 8, 16)
   3656 GEN_THREEVEC_TEST(sqdmlsl_s_h_h, "sqdmlsl s0, h8, h16", 0, 8, 16)
   3657 GEN_THREEVEC_TEST(sqdmull_d_s_s, "sqdmull d0, s8, s16", 0, 8, 16)
   3658 GEN_THREEVEC_TEST(sqdmull_s_h_h, "sqdmull s0, h8, h16", 0, 8, 16)
   3659 
   3660 GEN_THREEVEC_TEST(sqdmlal_2d_2s_2s,  "sqdmlal  v2.2d, v11.2s, v29.2s", 2,11,29)
   3661 GEN_THREEVEC_TEST(sqdmlal2_2d_4s_4s, "sqdmlal2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3662 GEN_THREEVEC_TEST(sqdmlal_4s_4h_4h,  "sqdmlal  v2.4s, v11.4h, v29.4h", 2,11,29)
   3663 GEN_THREEVEC_TEST(sqdmlal2_4s_8h_8h, "sqdmlal2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3664 GEN_THREEVEC_TEST(sqdmlsl_2d_2s_2s,  "sqdmlsl  v2.2d, v11.2s, v29.2s", 2,11,29)
   3665 GEN_THREEVEC_TEST(sqdmlsl2_2d_4s_4s, "sqdmlsl2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3666 GEN_THREEVEC_TEST(sqdmlsl_4s_4h_4h,  "sqdmlsl  v2.4s, v11.4h, v29.4h", 2,11,29)
   3667 GEN_THREEVEC_TEST(sqdmlsl2_4s_8h_8h, "sqdmlsl2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3668 GEN_THREEVEC_TEST(sqdmull_2d_2s_2s,  "sqdmull  v2.2d, v11.2s, v29.2s", 2,11,29)
   3669 GEN_THREEVEC_TEST(sqdmull2_2d_4s_4s, "sqdmull2 v2.2d, v11.4s, v29.4s", 2,11,29)
   3670 GEN_THREEVEC_TEST(sqdmull_4s_4h_4h,  "sqdmull  v2.4s, v11.4h, v29.4h", 2,11,29)
   3671 GEN_THREEVEC_TEST(sqdmull2_4s_8h_8h, "sqdmull2 v2.4s, v11.8h, v29.8h", 2,11,29)
   3672 
   3673 GEN_THREEVEC_TEST(sqdmulh_s_s_s1, "sqdmulh s0, s1, v2.s[1]", 0,1,2)
   3674 GEN_THREEVEC_TEST(sqdmulh_s_s_s3, "sqdmulh s0, s1, v2.s[3]", 0,1,2)
   3675 GEN_THREEVEC_TEST(sqdmulh_h_h_h2, "sqdmulh h0, h1, v2.h[2]", 0,1,2)
   3676 GEN_THREEVEC_TEST(sqdmulh_h_h_h7, "sqdmulh h0, h1, v2.h[7]", 0,1,2)
   3677 GEN_THREEVEC_TEST(sqrdmulh_s_s_s1, "sqrdmulh s0, s1, v2.s[1]", 0,1,2)
   3678 GEN_THREEVEC_TEST(sqrdmulh_s_s_s3, "sqrdmulh s0, s1, v2.s[3]", 0,1,2)
   3679 GEN_THREEVEC_TEST(sqrdmulh_h_h_h2, "sqrdmulh h0, h1, v2.h[2]", 0,1,2)
   3680 GEN_THREEVEC_TEST(sqrdmulh_h_h_h7, "sqrdmulh h0, h1, v2.h[7]", 0,1,2)
   3681 
   3682 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s1, "sqdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3683 GEN_THREEVEC_TEST(sqdmulh_4s_4s_s3, "sqdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3684 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s1, "sqdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3685 GEN_THREEVEC_TEST(sqdmulh_2s_2s_s3, "sqdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3686 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h2, "sqdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3687 GEN_THREEVEC_TEST(sqdmulh_8h_8h_h7, "sqdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3688 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h2, "sqdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3689 GEN_THREEVEC_TEST(sqdmulh_4h_4h_h7, "sqdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3690 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s1, "sqrdmulh v0.4s, v1.4s, v2.s[1]", 0,1,2)
   3691 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_s3, "sqrdmulh v0.4s, v1.4s, v2.s[3]", 0,1,2)
   3692 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s1, "sqrdmulh v0.2s, v1.2s, v2.s[1]", 0,1,2)
   3693 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_s3, "sqrdmulh v0.2s, v1.2s, v2.s[3]", 0,1,2)
   3694 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h2, "sqrdmulh v0.8h, v1.8h, v2.h[2]", 0,1,2)
   3695 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_h7, "sqrdmulh v0.8h, v1.8h, v2.h[7]", 0,1,2)
   3696 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h2, "sqrdmulh v0.4h, v1.4h, v2.h[2]", 0,1,2)
   3697 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_h7, "sqrdmulh v0.4h, v1.4h, v2.h[7]", 0,1,2)
   3698 
   3699 GEN_THREEVEC_TEST(sqdmulh_s_s_s,  "sqdmulh  s1, s2, s4", 1, 2, 4)
   3700 GEN_THREEVEC_TEST(sqdmulh_h_h_h,  "sqdmulh  h1, h2, h4", 1, 2, 4)
   3701 GEN_THREEVEC_TEST(sqrdmulh_s_s_s, "sqrdmulh s1, s2, s4", 1, 2, 4)
   3702 GEN_THREEVEC_TEST(sqrdmulh_h_h_h, "sqrdmulh h1, h2, h4", 1, 2, 4)
   3703 
   3704 GEN_THREEVEC_TEST(sqdmulh_4s_4s_4s, "sqdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3705 GEN_THREEVEC_TEST(sqdmulh_2s_2s_2s, "sqdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3706 GEN_THREEVEC_TEST(sqdmulh_8h_8h_8h, "sqdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3707 GEN_THREEVEC_TEST(sqdmulh_4h_4h_4h, "sqdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3708 GEN_THREEVEC_TEST(sqrdmulh_4s_4s_4s, "sqrdmulh v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3709 GEN_THREEVEC_TEST(sqrdmulh_2s_2s_2s, "sqrdmulh v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3710 GEN_THREEVEC_TEST(sqrdmulh_8h_8h_8h, "sqrdmulh v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3711 GEN_THREEVEC_TEST(sqrdmulh_4h_4h_4h, "sqrdmulh v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3712 
   3713 GEN_THREEVEC_TEST(sqshl_d_d_d, "sqshl d1, d2, d4", 1, 2, 4)
   3714 GEN_THREEVEC_TEST(sqshl_s_s_s, "sqshl s1, s2, s4", 1, 2, 4)
   3715 GEN_THREEVEC_TEST(sqshl_h_h_h, "sqshl h1, h2, h4", 1, 2, 4)
   3716 GEN_THREEVEC_TEST(sqshl_b_b_b, "sqshl b1, b2, b4", 1, 2, 4)
   3717 GEN_THREEVEC_TEST(uqshl_d_d_d, "uqshl d1, d2, d4", 1, 2, 4)
   3718 GEN_THREEVEC_TEST(uqshl_s_s_s, "uqshl s1, s2, s4", 1, 2, 4)
   3719 GEN_THREEVEC_TEST(uqshl_h_h_h, "uqshl h1, h2, h4", 1, 2, 4)
   3720 GEN_THREEVEC_TEST(uqshl_b_b_b, "uqshl b1, b2, b4", 1, 2, 4)
   3721 GEN_THREEVEC_TEST(sqrshl_d_d_d, "sqrshl d1, d2, d4", 1, 2, 4)
   3722 GEN_THREEVEC_TEST(sqrshl_s_s_s, "sqrshl s1, s2, s4", 1, 2, 4)
   3723 GEN_THREEVEC_TEST(sqrshl_h_h_h, "sqrshl h1, h2, h4", 1, 2, 4)
   3724 GEN_THREEVEC_TEST(sqrshl_b_b_b, "sqrshl b1, b2, b4", 1, 2, 4)
   3725 GEN_THREEVEC_TEST(uqrshl_d_d_d, "uqrshl d1, d2, d4", 1, 2, 4)
   3726 GEN_THREEVEC_TEST(uqrshl_s_s_s, "uqrshl s1, s2, s4", 1, 2, 4)
   3727 GEN_THREEVEC_TEST(uqrshl_h_h_h, "uqrshl h1, h2, h4", 1, 2, 4)
   3728 GEN_THREEVEC_TEST(uqrshl_b_b_b, "uqrshl b1, b2, b4", 1, 2, 4)
   3729 
   3730 GEN_THREEVEC_TEST(sqshl_2d_2d_2d,    "sqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3731 GEN_THREEVEC_TEST(sqshl_4s_4s_4s,    "sqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3732 GEN_THREEVEC_TEST(sqshl_2s_2s_2s,    "sqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3733 GEN_THREEVEC_TEST(sqshl_8h_8h_8h,    "sqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3734 GEN_THREEVEC_TEST(sqshl_4h_4h_4h,    "sqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3735 GEN_THREEVEC_TEST(sqshl_16b_16b_16b, "sqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3736 GEN_THREEVEC_TEST(sqshl_8b_8b_8b,    "sqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3737 GEN_THREEVEC_TEST(uqshl_2d_2d_2d,    "uqshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3738 GEN_THREEVEC_TEST(uqshl_4s_4s_4s,    "uqshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3739 GEN_THREEVEC_TEST(uqshl_2s_2s_2s,    "uqshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3740 GEN_THREEVEC_TEST(uqshl_8h_8h_8h,    "uqshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3741 GEN_THREEVEC_TEST(uqshl_4h_4h_4h,    "uqshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3742 GEN_THREEVEC_TEST(uqshl_16b_16b_16b, "uqshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3743 GEN_THREEVEC_TEST(uqshl_8b_8b_8b,    "uqshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3744 GEN_THREEVEC_TEST(sqrshl_2d_2d_2d,    "sqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3745 GEN_THREEVEC_TEST(sqrshl_4s_4s_4s,    "sqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3746 GEN_THREEVEC_TEST(sqrshl_2s_2s_2s,    "sqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3747 GEN_THREEVEC_TEST(sqrshl_8h_8h_8h,    "sqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3748 GEN_THREEVEC_TEST(sqrshl_4h_4h_4h,    "sqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3749 GEN_THREEVEC_TEST(sqrshl_16b_16b_16b, "sqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3750 GEN_THREEVEC_TEST(sqrshl_8b_8b_8b,    "sqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3751 GEN_THREEVEC_TEST(uqrshl_2d_2d_2d,    "uqrshl v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   3752 GEN_THREEVEC_TEST(uqrshl_4s_4s_4s,    "uqrshl v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   3753 GEN_THREEVEC_TEST(uqrshl_2s_2s_2s,    "uqrshl v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   3754 GEN_THREEVEC_TEST(uqrshl_8h_8h_8h,    "uqrshl v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   3755 GEN_THREEVEC_TEST(uqrshl_4h_4h_4h,    "uqrshl v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   3756 GEN_THREEVEC_TEST(uqrshl_16b_16b_16b, "uqrshl v1.16b, v2.16b, v4.16b", 1, 2, 4)
   3757 GEN_THREEVEC_TEST(uqrshl_8b_8b_8b,    "uqrshl v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   3758 
   3759 GEN_TWOVEC_TEST(sqrshrn_s_d_1,  "sqrshrn s2, d5, #1",  2, 5)
   3760 GEN_TWOVEC_TEST(sqrshrn_s_d_17, "sqrshrn s2, d5, #17", 2, 5)
   3761 GEN_TWOVEC_TEST(sqrshrn_s_d_32, "sqrshrn s2, d5, #32", 2, 5)
   3762 GEN_TWOVEC_TEST(sqrshrn_h_s_1,  "sqrshrn h2, s5, #1",  2, 5)
   3763 GEN_TWOVEC_TEST(sqrshrn_h_s_9,  "sqrshrn h2, s5, #9",  2, 5)
   3764 GEN_TWOVEC_TEST(sqrshrn_h_s_16, "sqrshrn h2, s5, #16", 2, 5)
   3765 GEN_TWOVEC_TEST(sqrshrn_b_h_1,  "sqrshrn b2, h5, #1",  2, 5)
   3766 GEN_TWOVEC_TEST(sqrshrn_b_h_4,  "sqrshrn b2, h5, #4",  2, 5)
   3767 GEN_TWOVEC_TEST(sqrshrn_b_h_8,  "sqrshrn b2, h5, #8",  2, 5)
   3768 GEN_TWOVEC_TEST(uqrshrn_s_d_1,  "uqrshrn s2, d5, #1",  2, 5)
   3769 GEN_TWOVEC_TEST(uqrshrn_s_d_17, "uqrshrn s2, d5, #17", 2, 5)
   3770 GEN_TWOVEC_TEST(uqrshrn_s_d_32, "uqrshrn s2, d5, #32", 2, 5)
   3771 GEN_TWOVEC_TEST(uqrshrn_h_s_1,  "uqrshrn h2, s5, #1",  2, 5)
   3772 GEN_TWOVEC_TEST(uqrshrn_h_s_9,  "uqrshrn h2, s5, #9",  2, 5)
   3773 GEN_TWOVEC_TEST(uqrshrn_h_s_16, "uqrshrn h2, s5, #16", 2, 5)
   3774 GEN_TWOVEC_TEST(uqrshrn_b_h_1,  "uqrshrn b2, h5, #1",  2, 5)
   3775 GEN_TWOVEC_TEST(uqrshrn_b_h_4,  "uqrshrn b2, h5, #4",  2, 5)
   3776 GEN_TWOVEC_TEST(uqrshrn_b_h_8,  "uqrshrn b2, h5, #8",  2, 5)
   3777 GEN_TWOVEC_TEST(sqshrn_s_d_1,  "sqshrn s2, d5, #1",  2, 5)
   3778 GEN_TWOVEC_TEST(sqshrn_s_d_17, "sqshrn s2, d5, #17", 2, 5)
   3779 GEN_TWOVEC_TEST(sqshrn_s_d_32, "sqshrn s2, d5, #32", 2, 5)
   3780 GEN_TWOVEC_TEST(sqshrn_h_s_1,  "sqshrn h2, s5, #1",  2, 5)
   3781 GEN_TWOVEC_TEST(sqshrn_h_s_9,  "sqshrn h2, s5, #9",  2, 5)
   3782 GEN_TWOVEC_TEST(sqshrn_h_s_16, "sqshrn h2, s5, #16", 2, 5)
   3783 GEN_TWOVEC_TEST(sqshrn_b_h_1,  "sqshrn b2, h5, #1",  2, 5)
   3784 GEN_TWOVEC_TEST(sqshrn_b_h_4,  "sqshrn b2, h5, #4",  2, 5)
   3785 GEN_TWOVEC_TEST(sqshrn_b_h_8,  "sqshrn b2, h5, #8",  2, 5)
   3786 GEN_TWOVEC_TEST(uqshrn_s_d_1,  "uqshrn s2, d5, #1",  2, 5)
   3787 GEN_TWOVEC_TEST(uqshrn_s_d_17, "uqshrn s2, d5, #17", 2, 5)
   3788 GEN_TWOVEC_TEST(uqshrn_s_d_32, "uqshrn s2, d5, #32", 2, 5)
   3789 GEN_TWOVEC_TEST(uqshrn_h_s_1,  "uqshrn h2, s5, #1",  2, 5)
   3790 GEN_TWOVEC_TEST(uqshrn_h_s_9,  "uqshrn h2, s5, #9",  2, 5)
   3791 GEN_TWOVEC_TEST(uqshrn_h_s_16, "uqshrn h2, s5, #16", 2, 5)
   3792 GEN_TWOVEC_TEST(uqshrn_b_h_1,  "uqshrn b2, h5, #1",  2, 5)
   3793 GEN_TWOVEC_TEST(uqshrn_b_h_4,  "uqshrn b2, h5, #4",  2, 5)
   3794 GEN_TWOVEC_TEST(uqshrn_b_h_8,  "uqshrn b2, h5, #8",  2, 5)
   3795 GEN_TWOVEC_TEST(sqrshrun_s_d_1,  "sqrshrun s2, d5, #1",  2, 5)
   3796 GEN_TWOVEC_TEST(sqrshrun_s_d_17, "sqrshrun s2, d5, #17", 2, 5)
   3797 GEN_TWOVEC_TEST(sqrshrun_s_d_32, "sqrshrun s2, d5, #32", 2, 5)
   3798 GEN_TWOVEC_TEST(sqrshrun_h_s_1,  "sqrshrun h2, s5, #1",  2, 5)
   3799 GEN_TWOVEC_TEST(sqrshrun_h_s_9,  "sqrshrun h2, s5, #9",  2, 5)
   3800 GEN_TWOVEC_TEST(sqrshrun_h_s_16, "sqrshrun h2, s5, #16", 2, 5)
   3801 GEN_TWOVEC_TEST(sqrshrun_b_h_1,  "sqrshrun b2, h5, #1",  2, 5)
   3802 GEN_TWOVEC_TEST(sqrshrun_b_h_4,  "sqrshrun b2, h5, #4",  2, 5)
   3803 GEN_TWOVEC_TEST(sqrshrun_b_h_8,  "sqrshrun b2, h5, #8",  2, 5)
   3804 GEN_TWOVEC_TEST(sqshrun_s_d_1,  "sqshrun s2, d5, #1",  2, 5)
   3805 GEN_TWOVEC_TEST(sqshrun_s_d_17, "sqshrun s2, d5, #17", 2, 5)
   3806 GEN_TWOVEC_TEST(sqshrun_s_d_32, "sqshrun s2, d5, #32", 2, 5)
   3807 GEN_TWOVEC_TEST(sqshrun_h_s_1,  "sqshrun h2, s5, #1",  2, 5)
   3808 GEN_TWOVEC_TEST(sqshrun_h_s_9,  "sqshrun h2, s5, #9",  2, 5)
   3809 GEN_TWOVEC_TEST(sqshrun_h_s_16, "sqshrun h2, s5, #16", 2, 5)
   3810 GEN_TWOVEC_TEST(sqshrun_b_h_1,  "sqshrun b2, h5, #1",  2, 5)
   3811 GEN_TWOVEC_TEST(sqshrun_b_h_4,  "sqshrun b2, h5, #4",  2, 5)
   3812 GEN_TWOVEC_TEST(sqshrun_b_h_8,  "sqshrun b2, h5, #8",  2, 5)
   3813 
   3814 GEN_TWOVEC_TEST(sqrshrn_2s_2d_1,   "sqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3815 GEN_TWOVEC_TEST(sqrshrn_2s_2d_17,  "sqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3816 GEN_TWOVEC_TEST(sqrshrn_2s_2d_32,  "sqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3817 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_1,  "sqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3818 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_17, "sqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3819 GEN_TWOVEC_TEST(sqrshrn2_4s_2d_32, "sqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3820 GEN_TWOVEC_TEST(sqrshrn_4h_4s_1,   "sqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3821 GEN_TWOVEC_TEST(sqrshrn_4h_4s_9,   "sqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3822 GEN_TWOVEC_TEST(sqrshrn_4h_4s_16,  "sqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3823 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_1,  "sqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3824 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_9,  "sqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3825 GEN_TWOVEC_TEST(sqrshrn2_8h_4s_16, "sqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3826 GEN_TWOVEC_TEST(sqrshrn_8b_8h_1,   "sqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3827 GEN_TWOVEC_TEST(sqrshrn_8b_8h_4,   "sqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3828 GEN_TWOVEC_TEST(sqrshrn_8b_8h_8,   "sqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3829 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_1, "sqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3830 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_4, "sqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3831 GEN_TWOVEC_TEST(sqrshrn2_16b_8h_8, "sqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3832 GEN_TWOVEC_TEST(uqrshrn_2s_2d_1,   "uqrshrn  v4.2s,  v29.2d, #1",  4, 29)
   3833 GEN_TWOVEC_TEST(uqrshrn_2s_2d_17,  "uqrshrn  v4.2s,  v29.2d, #17", 4, 29)
   3834 GEN_TWOVEC_TEST(uqrshrn_2s_2d_32,  "uqrshrn  v4.2s,  v29.2d, #32", 4, 29)
   3835 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_1,  "uqrshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3836 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_17, "uqrshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3837 GEN_TWOVEC_TEST(uqrshrn2_4s_2d_32, "uqrshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3838 GEN_TWOVEC_TEST(uqrshrn_4h_4s_1,   "uqrshrn  v4.4h,  v29.4s, #1",  4, 29)
   3839 GEN_TWOVEC_TEST(uqrshrn_4h_4s_9,   "uqrshrn  v4.4h,  v29.4s, #9",  4, 29)
   3840 GEN_TWOVEC_TEST(uqrshrn_4h_4s_16,  "uqrshrn  v4.4h,  v29.4s, #16", 4, 29)
   3841 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_1,  "uqrshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3842 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_9,  "uqrshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3843 GEN_TWOVEC_TEST(uqrshrn2_8h_4s_16, "uqrshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3844 GEN_TWOVEC_TEST(uqrshrn_8b_8h_1,   "uqrshrn  v4.8b,  v29.8h, #1",  4, 29)
   3845 GEN_TWOVEC_TEST(uqrshrn_8b_8h_4,   "uqrshrn  v4.8b,  v29.8h, #4",  4, 29)
   3846 GEN_TWOVEC_TEST(uqrshrn_8b_8h_8,   "uqrshrn  v4.8b,  v29.8h, #8",  4, 29)
   3847 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_1, "uqrshrn2 v4.16b, v29.8h, #1",  4, 29)
   3848 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_4, "uqrshrn2 v4.16b, v29.8h, #4",  4, 29)
   3849 GEN_TWOVEC_TEST(uqrshrn2_16b_8h_8, "uqrshrn2 v4.16b, v29.8h, #8",  4, 29)
   3850 GEN_TWOVEC_TEST(sqshrn_2s_2d_1,   "sqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3851 GEN_TWOVEC_TEST(sqshrn_2s_2d_17,  "sqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3852 GEN_TWOVEC_TEST(sqshrn_2s_2d_32,  "sqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3853 GEN_TWOVEC_TEST(sqshrn2_4s_2d_1,  "sqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3854 GEN_TWOVEC_TEST(sqshrn2_4s_2d_17, "sqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3855 GEN_TWOVEC_TEST(sqshrn2_4s_2d_32, "sqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3856 GEN_TWOVEC_TEST(sqshrn_4h_4s_1,   "sqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3857 GEN_TWOVEC_TEST(sqshrn_4h_4s_9,   "sqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3858 GEN_TWOVEC_TEST(sqshrn_4h_4s_16,  "sqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3859 GEN_TWOVEC_TEST(sqshrn2_8h_4s_1,  "sqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3860 GEN_TWOVEC_TEST(sqshrn2_8h_4s_9,  "sqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3861 GEN_TWOVEC_TEST(sqshrn2_8h_4s_16, "sqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3862 GEN_TWOVEC_TEST(sqshrn_8b_8h_1,   "sqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3863 GEN_TWOVEC_TEST(sqshrn_8b_8h_4,   "sqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3864 GEN_TWOVEC_TEST(sqshrn_8b_8h_8,   "sqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3865 GEN_TWOVEC_TEST(sqshrn2_16b_8h_1, "sqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3866 GEN_TWOVEC_TEST(sqshrn2_16b_8h_4, "sqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3867 GEN_TWOVEC_TEST(sqshrn2_16b_8h_8, "sqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3868 GEN_TWOVEC_TEST(uqshrn_2s_2d_1,   "uqshrn  v4.2s,  v29.2d, #1",  4, 29)
   3869 GEN_TWOVEC_TEST(uqshrn_2s_2d_17,  "uqshrn  v4.2s,  v29.2d, #17", 4, 29)
   3870 GEN_TWOVEC_TEST(uqshrn_2s_2d_32,  "uqshrn  v4.2s,  v29.2d, #32", 4, 29)
   3871 GEN_TWOVEC_TEST(uqshrn2_4s_2d_1,  "uqshrn2 v4.4s,  v29.2d, #1",  4, 29)
   3872 GEN_TWOVEC_TEST(uqshrn2_4s_2d_17, "uqshrn2 v4.4s,  v29.2d, #17", 4, 29)
   3873 GEN_TWOVEC_TEST(uqshrn2_4s_2d_32, "uqshrn2 v4.4s,  v29.2d, #32", 4, 29)
   3874 GEN_TWOVEC_TEST(uqshrn_4h_4s_1,   "uqshrn  v4.4h,  v29.4s, #1",  4, 29)
   3875 GEN_TWOVEC_TEST(uqshrn_4h_4s_9,   "uqshrn  v4.4h,  v29.4s, #9",  4, 29)
   3876 GEN_TWOVEC_TEST(uqshrn_4h_4s_16,  "uqshrn  v4.4h,  v29.4s, #16", 4, 29)
   3877 GEN_TWOVEC_TEST(uqshrn2_8h_4s_1,  "uqshrn2 v4.8h,  v29.4s, #1",  4, 29)
   3878 GEN_TWOVEC_TEST(uqshrn2_8h_4s_9,  "uqshrn2 v4.8h,  v29.4s, #9",  4, 29)
   3879 GEN_TWOVEC_TEST(uqshrn2_8h_4s_16, "uqshrn2 v4.8h,  v29.4s, #16", 4, 29)
   3880 GEN_TWOVEC_TEST(uqshrn_8b_8h_1,   "uqshrn  v4.8b,  v29.8h, #1",  4, 29)
   3881 GEN_TWOVEC_TEST(uqshrn_8b_8h_4,   "uqshrn  v4.8b,  v29.8h, #4",  4, 29)
   3882 GEN_TWOVEC_TEST(uqshrn_8b_8h_8,   "uqshrn  v4.8b,  v29.8h, #8",  4, 29)
   3883 GEN_TWOVEC_TEST(uqshrn2_16b_8h_1, "uqshrn2 v4.16b, v29.8h, #1",  4, 29)
   3884 GEN_TWOVEC_TEST(uqshrn2_16b_8h_4, "uqshrn2 v4.16b, v29.8h, #4",  4, 29)
   3885 GEN_TWOVEC_TEST(uqshrn2_16b_8h_8, "uqshrn2 v4.16b, v29.8h, #8",  4, 29)
   3886 GEN_TWOVEC_TEST(sqrshrun_2s_2d_1,   "sqrshrun  v4.2s,  v29.2d, #1",  4, 29)
   3887 GEN_TWOVEC_TEST(sqrshrun_2s_2d_17,  "sqrshrun  v4.2s,  v29.2d, #17", 4, 29)
   3888 GEN_TWOVEC_TEST(sqrshrun_2s_2d_32,  "sqrshrun  v4.2s,  v29.2d, #32", 4, 29)
   3889 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_1,  "sqrshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3890 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_17, "sqrshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3891 GEN_TWOVEC_TEST(sqrshrun2_4s_2d_32, "sqrshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3892 GEN_TWOVEC_TEST(sqrshrun_4h_4s_1,   "sqrshrun  v4.4h,  v29.4s, #1",  4, 29)
   3893 GEN_TWOVEC_TEST(sqrshrun_4h_4s_9,   "sqrshrun  v4.4h,  v29.4s, #9",  4, 29)
   3894 GEN_TWOVEC_TEST(sqrshrun_4h_4s_16,  "sqrshrun  v4.4h,  v29.4s, #16", 4, 29)
   3895 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_1,  "sqrshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3896 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_9,  "sqrshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3897 GEN_TWOVEC_TEST(sqrshrun2_8h_4s_16, "sqrshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3898 GEN_TWOVEC_TEST(sqrshrun_8b_8h_1,   "sqrshrun  v4.8b,  v29.8h, #1",  4, 29)
   3899 GEN_TWOVEC_TEST(sqrshrun_8b_8h_4,   "sqrshrun  v4.8b,  v29.8h, #4",  4, 29)
   3900 GEN_TWOVEC_TEST(sqrshrun_8b_8h_8,   "sqrshrun  v4.8b,  v29.8h, #8",  4, 29)
   3901 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_1, "sqrshrun2 v4.16b, v29.8h, #1",  4, 29)
   3902 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_4, "sqrshrun2 v4.16b, v29.8h, #4",  4, 29)
   3903 GEN_TWOVEC_TEST(sqrshrun2_16b_8h_8, "sqrshrun2 v4.16b, v29.8h, #8",  4, 29)
   3904 GEN_TWOVEC_TEST(sqshrun_2s_2d_1,   "sqshrun  v4.2s,  v29.2d, #1",  4, 29)
   3905 GEN_TWOVEC_TEST(sqshrun_2s_2d_17,  "sqshrun  v4.2s,  v29.2d, #17", 4, 29)
   3906 GEN_TWOVEC_TEST(sqshrun_2s_2d_32,  "sqshrun  v4.2s,  v29.2d, #32", 4, 29)
   3907 GEN_TWOVEC_TEST(sqshrun2_4s_2d_1,  "sqshrun2 v4.4s,  v29.2d, #1",  4, 29)
   3908 GEN_TWOVEC_TEST(sqshrun2_4s_2d_17, "sqshrun2 v4.4s,  v29.2d, #17", 4, 29)
   3909 GEN_TWOVEC_TEST(sqshrun2_4s_2d_32, "sqshrun2 v4.4s,  v29.2d, #32", 4, 29)
   3910 GEN_TWOVEC_TEST(sqshrun_4h_4s_1,   "sqshrun  v4.4h,  v29.4s, #1",  4, 29)
   3911 GEN_TWOVEC_TEST(sqshrun_4h_4s_9,   "sqshrun  v4.4h,  v29.4s, #9",  4, 29)
   3912 GEN_TWOVEC_TEST(sqshrun_4h_4s_16,  "sqshrun  v4.4h,  v29.4s, #16", 4, 29)
   3913 GEN_TWOVEC_TEST(sqshrun2_8h_4s_1,  "sqshrun2 v4.8h,  v29.4s, #1",  4, 29)
   3914 GEN_TWOVEC_TEST(sqshrun2_8h_4s_9,  "sqshrun2 v4.8h,  v29.4s, #9",  4, 29)
   3915 GEN_TWOVEC_TEST(sqshrun2_8h_4s_16, "sqshrun2 v4.8h,  v29.4s, #16", 4, 29)
   3916 GEN_TWOVEC_TEST(sqshrun_8b_8h_1,   "sqshrun  v4.8b,  v29.8h, #1",  4, 29)
   3917 GEN_TWOVEC_TEST(sqshrun_8b_8h_4,   "sqshrun  v4.8b,  v29.8h, #4",  4, 29)
   3918 GEN_TWOVEC_TEST(sqshrun_8b_8h_8,   "sqshrun  v4.8b,  v29.8h, #8",  4, 29)
   3919 GEN_TWOVEC_TEST(sqshrun2_16b_8h_1, "sqshrun2 v4.16b, v29.8h, #1",  4, 29)
   3920 GEN_TWOVEC_TEST(sqshrun2_16b_8h_4, "sqshrun2 v4.16b, v29.8h, #4",  4, 29)
   3921 GEN_TWOVEC_TEST(sqshrun2_16b_8h_8, "sqshrun2 v4.16b, v29.8h, #8",  4, 29)
   3922 
   3923 GEN_TWOVEC_TEST(sqshl_d_d_0,  "sqshl d5, d28, #0",  5, 28)
   3924 GEN_TWOVEC_TEST(sqshl_d_d_32, "sqshl d5, d28, #32", 5, 28)
   3925 GEN_TWOVEC_TEST(sqshl_d_d_63, "sqshl d5, d28, #63", 5, 28)
   3926 GEN_TWOVEC_TEST(sqshl_s_s_0,  "sqshl s5, s28, #0",  5, 28)
   3927 GEN_TWOVEC_TEST(sqshl_s_s_16, "sqshl s5, s28, #16", 5, 28)
   3928 GEN_TWOVEC_TEST(sqshl_s_s_31, "sqshl s5, s28, #31", 5, 28)
   3929 GEN_TWOVEC_TEST(sqshl_h_h_0,  "sqshl h5, h28, #0",  5, 28)
   3930 GEN_TWOVEC_TEST(sqshl_h_h_8,  "sqshl h5, h28, #8",  5, 28)
   3931 GEN_TWOVEC_TEST(sqshl_h_h_15, "sqshl h5, h28, #15", 5, 28)
   3932 GEN_TWOVEC_TEST(sqshl_b_b_0,  "sqshl b5, b28, #0",  5, 28)
   3933 GEN_TWOVEC_TEST(sqshl_b_b_1,  "sqshl b5, b28, #1",  5, 28)
   3934 GEN_TWOVEC_TEST(sqshl_b_b_4,  "sqshl b5, b28, #4",  5, 28)
   3935 GEN_TWOVEC_TEST(sqshl_b_b_6,  "sqshl b5, b28, #6",  5, 28)
   3936 GEN_TWOVEC_TEST(sqshl_b_b_7,  "sqshl b5, b28, #7",  5, 28)
   3937 GEN_TWOVEC_TEST(uqshl_d_d_0,  "uqshl d5, d28, #0",  5, 28)
   3938 GEN_TWOVEC_TEST(uqshl_d_d_32, "uqshl d5, d28, #32", 5, 28)
   3939 GEN_TWOVEC_TEST(uqshl_d_d_63, "uqshl d5, d28, #63", 5, 28)
   3940 GEN_TWOVEC_TEST(uqshl_s_s_0,  "uqshl s5, s28, #0",  5, 28)
   3941 GEN_TWOVEC_TEST(uqshl_s_s_16, "uqshl s5, s28, #16", 5, 28)
   3942 GEN_TWOVEC_TEST(uqshl_s_s_31, "uqshl s5, s28, #31", 5, 28)
   3943 GEN_TWOVEC_TEST(uqshl_h_h_0,  "uqshl h5, h28, #0",  5, 28)
   3944 GEN_TWOVEC_TEST(uqshl_h_h_8,  "uqshl h5, h28, #8",  5, 28)
   3945 GEN_TWOVEC_TEST(uqshl_h_h_15, "uqshl h5, h28, #15", 5, 28)
   3946 GEN_TWOVEC_TEST(uqshl_b_b_0,  "uqshl b5, b28, #0",  5, 28)
   3947 GEN_TWOVEC_TEST(uqshl_b_b_1,  "uqshl b5, b28, #1",  5, 28)
   3948 GEN_TWOVEC_TEST(uqshl_b_b_4,  "uqshl b5, b28, #4",  5, 28)
   3949 GEN_TWOVEC_TEST(uqshl_b_b_6,  "uqshl b5, b28, #6",  5, 28)
   3950 GEN_TWOVEC_TEST(uqshl_b_b_7,  "uqshl b5, b28, #7",  5, 28)
   3951 GEN_TWOVEC_TEST(sqshlu_d_d_0,  "sqshlu d5, d28, #0",  5, 28)
   3952 GEN_TWOVEC_TEST(sqshlu_d_d_32, "sqshlu d5, d28, #32", 5, 28)
   3953 GEN_TWOVEC_TEST(sqshlu_d_d_63, "sqshlu d5, d28, #63", 5, 28)
   3954 GEN_TWOVEC_TEST(sqshlu_s_s_0,  "sqshlu s5, s28, #0",  5, 28)
   3955 GEN_TWOVEC_TEST(sqshlu_s_s_16, "sqshlu s5, s28, #16", 5, 28)
   3956 GEN_TWOVEC_TEST(sqshlu_s_s_31, "sqshlu s5, s28, #31", 5, 28)
   3957 GEN_TWOVEC_TEST(sqshlu_h_h_0,  "sqshlu h5, h28, #0",  5, 28)
   3958 GEN_TWOVEC_TEST(sqshlu_h_h_8,  "sqshlu h5, h28, #8",  5, 28)
   3959 GEN_TWOVEC_TEST(sqshlu_h_h_15, "sqshlu h5, h28, #15", 5, 28)
   3960 GEN_TWOVEC_TEST(sqshlu_b_b_0,  "sqshlu b5, b28, #0",  5, 28)
   3961 GEN_TWOVEC_TEST(sqshlu_b_b_1,  "sqshlu b5, b28, #1",  5, 28)
   3962 GEN_TWOVEC_TEST(sqshlu_b_b_2,  "sqshlu b5, b28, #2",  5, 28)
   3963 GEN_TWOVEC_TEST(sqshlu_b_b_3,  "sqshlu b5, b28, #3",  5, 28)
   3964 GEN_TWOVEC_TEST(sqshlu_b_b_4,  "sqshlu b5, b28, #4",  5, 28)
   3965 GEN_TWOVEC_TEST(sqshlu_b_b_5,  "sqshlu b5, b28, #5",  5, 28)
   3966 GEN_TWOVEC_TEST(sqshlu_b_b_6,  "sqshlu b5, b28, #6",  5, 28)
   3967 GEN_TWOVEC_TEST(sqshlu_b_b_7,  "sqshlu b5, b28, #7",  5, 28)
   3968 
   3969 GEN_TWOVEC_TEST(sqshl_2d_2d_0,   "sqshl v6.2d,  v27.2d, #0",  6, 27)
   3970 GEN_TWOVEC_TEST(sqshl_2d_2d_32,  "sqshl v6.2d,  v27.2d, #32", 6, 27)
   3971 GEN_TWOVEC_TEST(sqshl_2d_2d_63,  "sqshl v6.2d,  v27.2d, #63", 6, 27)
   3972 GEN_TWOVEC_TEST(sqshl_4s_4s_0,   "sqshl v6.4s,  v27.4s, #0",  6, 27)
   3973 GEN_TWOVEC_TEST(sqshl_4s_4s_16,  "sqshl v6.4s,  v27.4s, #16", 6, 27)
   3974 GEN_TWOVEC_TEST(sqshl_4s_4s_31,  "sqshl v6.4s,  v27.4s, #31", 6, 27)
   3975 GEN_TWOVEC_TEST(sqshl_2s_2s_0,   "sqshl v6.2s,  v27.2s, #0",  6, 27)
   3976 GEN_TWOVEC_TEST(sqshl_2s_2s_16,  "sqshl v6.2s,  v27.2s, #16", 6, 27)
   3977 GEN_TWOVEC_TEST(sqshl_2s_2s_31,  "sqshl v6.2s,  v27.2s, #31", 6, 27)
   3978 GEN_TWOVEC_TEST(sqshl_8h_8h_0,   "sqshl v6.8h,  v27.8h, #0",  6, 27)
   3979 GEN_TWOVEC_TEST(sqshl_8h_8h_8,   "sqshl v6.8h,  v27.8h, #8",  6, 27)
   3980 GEN_TWOVEC_TEST(sqshl_8h_8h_15,  "sqshl v6.8h,  v27.8h, #15", 6, 27)
   3981 GEN_TWOVEC_TEST(sqshl_4h_4h_0,   "sqshl v6.4h,  v27.4h, #0",  6, 27)
   3982 GEN_TWOVEC_TEST(sqshl_4h_4h_8,   "sqshl v6.4h,  v27.4h, #8",  6, 27)
   3983 GEN_TWOVEC_TEST(sqshl_4h_4h_15,  "sqshl v6.4h,  v27.4h, #15", 6, 27)
   3984 GEN_TWOVEC_TEST(sqshl_16b_16b_0, "sqshl v6.16b, v27.16b, #0", 6, 27)
   3985 GEN_TWOVEC_TEST(sqshl_16b_16b_3, "sqshl v6.16b, v27.16b, #3", 6, 27)
   3986 GEN_TWOVEC_TEST(sqshl_16b_16b_7, "sqshl v6.16b, v27.16b, #7", 6, 27)
   3987 GEN_TWOVEC_TEST(sqshl_8b_8b_0,   "sqshl v6.8b,  v27.8b, #0",  6, 27)
   3988 GEN_TWOVEC_TEST(sqshl_8b_8b_3,   "sqshl v6.8b,  v27.8b, #3",  6, 27)
   3989 GEN_TWOVEC_TEST(sqshl_8b_8b_7,   "sqshl v6.8b,  v27.8b, #7",  6, 27)
   3990 GEN_TWOVEC_TEST(uqshl_2d_2d_0,   "uqshl v6.2d,  v27.2d, #0",  6, 27)
   3991 GEN_TWOVEC_TEST(uqshl_2d_2d_32,  "uqshl v6.2d,  v27.2d, #32", 6, 27)
   3992 GEN_TWOVEC_TEST(uqshl_2d_2d_63,  "uqshl v6.2d,  v27.2d, #63", 6, 27)
   3993 GEN_TWOVEC_TEST(uqshl_4s_4s_0,   "uqshl v6.4s,  v27.4s, #0",  6, 27)
   3994 GEN_TWOVEC_TEST(uqshl_4s_4s_16,  "uqshl v6.4s,  v27.4s, #16", 6, 27)
   3995 GEN_TWOVEC_TEST(uqshl_4s_4s_31,  "uqshl v6.4s,  v27.4s, #31", 6, 27)
   3996 GEN_TWOVEC_TEST(uqshl_2s_2s_0,   "uqshl v6.2s,  v27.2s, #0",  6, 27)
   3997 GEN_TWOVEC_TEST(uqshl_2s_2s_16,  "uqshl v6.2s,  v27.2s, #16", 6, 27)
   3998 GEN_TWOVEC_TEST(uqshl_2s_2s_31,  "uqshl v6.2s,  v27.2s, #31", 6, 27)
   3999 GEN_TWOVEC_TEST(uqshl_8h_8h_0,   "uqshl v6.8h,  v27.8h, #0",  6, 27)
   4000 GEN_TWOVEC_TEST(uqshl_8h_8h_8,   "uqshl v6.8h,  v27.8h, #8",  6, 27)
   4001 GEN_TWOVEC_TEST(uqshl_8h_8h_15,  "uqshl v6.8h,  v27.8h, #15", 6, 27)
   4002 GEN_TWOVEC_TEST(uqshl_4h_4h_0,   "uqshl v6.4h,  v27.4h, #0",  6, 27)
   4003 GEN_TWOVEC_TEST(uqshl_4h_4h_8,   "uqshl v6.4h,  v27.4h, #8",  6, 27)
   4004 GEN_TWOVEC_TEST(uqshl_4h_4h_15,  "uqshl v6.4h,  v27.4h, #15", 6, 27)
   4005 GEN_TWOVEC_TEST(uqshl_16b_16b_0, "uqshl v6.16b, v27.16b, #0", 6, 27)
   4006 GEN_TWOVEC_TEST(uqshl_16b_16b_3, "uqshl v6.16b, v27.16b, #3", 6, 27)
   4007 GEN_TWOVEC_TEST(uqshl_16b_16b_7, "uqshl v6.16b, v27.16b, #7", 6, 27)
   4008 GEN_TWOVEC_TEST(uqshl_8b_8b_0,   "uqshl v6.8b,  v27.8b, #0",  6, 27)
   4009 GEN_TWOVEC_TEST(uqshl_8b_8b_3,   "uqshl v6.8b,  v27.8b, #3",  6, 27)
   4010 GEN_TWOVEC_TEST(uqshl_8b_8b_7,   "uqshl v6.8b,  v27.8b, #7",  6, 27)
   4011 GEN_TWOVEC_TEST(sqshlu_2d_2d_0,   "sqshlu v6.2d,  v27.2d, #0",  6, 27)
   4012 GEN_TWOVEC_TEST(sqshlu_2d_2d_32,  "sqshlu v6.2d,  v27.2d, #32", 6, 27)
   4013 GEN_TWOVEC_TEST(sqshlu_2d_2d_63,  "sqshlu v6.2d,  v27.2d, #63", 6, 27)
   4014 GEN_TWOVEC_TEST(sqshlu_4s_4s_0,   "sqshlu v6.4s,  v27.4s, #0",  6, 27)
   4015 GEN_TWOVEC_TEST(sqshlu_4s_4s_16,  "sqshlu v6.4s,  v27.4s, #16", 6, 27)
   4016 GEN_TWOVEC_TEST(sqshlu_4s_4s_31,  "sqshlu v6.4s,  v27.4s, #31", 6, 27)
   4017 GEN_TWOVEC_TEST(sqshlu_2s_2s_0,   "sqshlu v6.2s,  v27.2s, #0",  6, 27)
   4018 GEN_TWOVEC_TEST(sqshlu_2s_2s_16,  "sqshlu v6.2s,  v27.2s, #16", 6, 27)
   4019 GEN_TWOVEC_TEST(sqshlu_2s_2s_31,  "sqshlu v6.2s,  v27.2s, #31", 6, 27)
   4020 GEN_TWOVEC_TEST(sqshlu_8h_8h_0,   "sqshlu v6.8h,  v27.8h, #0",  6, 27)
   4021 GEN_TWOVEC_TEST(sqshlu_8h_8h_8,   "sqshlu v6.8h,  v27.8h, #8",  6, 27)
   4022 GEN_TWOVEC_TEST(sqshlu_8h_8h_15,  "sqshlu v6.8h,  v27.8h, #15", 6, 27)
   4023 GEN_TWOVEC_TEST(sqshlu_4h_4h_0,   "sqshlu v6.4h,  v27.4h, #0",  6, 27)
   4024 GEN_TWOVEC_TEST(sqshlu_4h_4h_8,   "sqshlu v6.4h,  v27.4h, #8",  6, 27)
   4025 GEN_TWOVEC_TEST(sqshlu_4h_4h_15,  "sqshlu v6.4h,  v27.4h, #15", 6, 27)
   4026 GEN_TWOVEC_TEST(sqshlu_16b_16b_0, "sqshlu v6.16b, v27.16b, #0", 6, 27)
   4027 GEN_TWOVEC_TEST(sqshlu_16b_16b_3, "sqshlu v6.16b, v27.16b, #3", 6, 27)
   4028 GEN_TWOVEC_TEST(sqshlu_16b_16b_7, "sqshlu v6.16b, v27.16b, #7", 6, 27)
   4029 GEN_TWOVEC_TEST(sqshlu_8b_8b_0,   "sqshlu v6.8b,  v27.8b, #0",  6, 27)
   4030 GEN_TWOVEC_TEST(sqshlu_8b_8b_3,   "sqshlu v6.8b,  v27.8b, #3",  6, 27)
   4031 GEN_TWOVEC_TEST(sqshlu_8b_8b_7,   "sqshlu v6.8b,  v27.8b, #7",  6, 27)
   4032 
   4033 GEN_TWOVEC_TEST(sqxtn_s_d,  "sqxtn s31,  d0", 31, 0)
   4034 GEN_TWOVEC_TEST(sqxtn_h_s,  "sqxtn h31,  s0", 31, 0)
   4035 GEN_TWOVEC_TEST(sqxtn_b_h,  "sqxtn b31,  h0", 31, 0)
   4036 GEN_TWOVEC_TEST(uqxtn_s_d,  "uqxtn s31,  d0", 31, 0)
   4037 GEN_TWOVEC_TEST(uqxtn_h_s,  "uqxtn h31,  s0", 31, 0)
   4038 GEN_TWOVEC_TEST(uqxtn_b_h,  "uqxtn b31,  h0", 31, 0)
   4039 GEN_TWOVEC_TEST(sqxtun_s_d, "sqxtun s31, d0", 31, 0)
   4040 GEN_TWOVEC_TEST(sqxtun_h_s, "sqxtun h31, s0", 31, 0)
   4041 GEN_TWOVEC_TEST(sqxtun_b_h, "sqxtun b31, h0", 31, 0)
   4042 
   4043 GEN_UNARY_TEST(sqxtn,   2s, 2d)
   4044 GEN_UNARY_TEST(sqxtn2,  4s, 2d)
   4045 GEN_UNARY_TEST(sqxtn,   4h, 4s)
   4046 GEN_UNARY_TEST(sqxtn2,  8h, 4s)
   4047 GEN_UNARY_TEST(sqxtn,   8b, 8h)
   4048 GEN_UNARY_TEST(sqxtn2, 16b, 8h)
   4049 GEN_UNARY_TEST(uqxtn,   2s, 2d)
   4050 GEN_UNARY_TEST(uqxtn2,  4s, 2d)
   4051 GEN_UNARY_TEST(uqxtn,   4h, 4s)
   4052 GEN_UNARY_TEST(uqxtn2,  8h, 4s)
   4053 GEN_UNARY_TEST(uqxtn,   8b, 8h)
   4054 GEN_UNARY_TEST(uqxtn2, 16b, 8h)
   4055 GEN_UNARY_TEST(sqxtun,   2s, 2d)
   4056 GEN_UNARY_TEST(sqxtun2,  4s, 2d)
   4057 GEN_UNARY_TEST(sqxtun,   4h, 4s)
   4058 GEN_UNARY_TEST(sqxtun2,  8h, 4s)
   4059 GEN_UNARY_TEST(sqxtun,   8b, 8h)
   4060 GEN_UNARY_TEST(sqxtun2, 16b, 8h)
   4061 
   4062 GEN_THREEVEC_TEST(srhadd_4s_4s_4s,"srhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4063 GEN_THREEVEC_TEST(srhadd_2s_2s_2s,"srhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4064 GEN_THREEVEC_TEST(srhadd_8h_8h_8h,"srhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4065 GEN_THREEVEC_TEST(srhadd_4h_4h_4h,"srhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4066 GEN_THREEVEC_TEST(srhadd_16b_16b_16b,
   4067                                   "srhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4068 GEN_THREEVEC_TEST(srhadd_8b_8b_8b,"srhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4069 GEN_THREEVEC_TEST(urhadd_4s_4s_4s,"urhadd v2.4s,  v11.4s,  v29.4s", 2, 11, 29)
   4070 GEN_THREEVEC_TEST(urhadd_2s_2s_2s,"urhadd v2.2s,  v11.2s,  v29.2s", 2, 11, 29)
   4071 GEN_THREEVEC_TEST(urhadd_8h_8h_8h,"urhadd v2.8h,  v11.8h,  v29.8h", 2, 11, 29)
   4072 GEN_THREEVEC_TEST(urhadd_4h_4h_4h,"urhadd v2.4h,  v11.4h,  v29.4h", 2, 11, 29)
   4073 GEN_THREEVEC_TEST(urhadd_16b_16b_16b,
   4074                                   "urhadd v2.16b, v11.16b, v29.16b", 2, 11, 29)
   4075 GEN_THREEVEC_TEST(urhadd_8b_8b_8b,"urhadd v2.8b,  v11.8b,  v29.8b", 2, 11, 29)
   4076 
   4077 GEN_THREEVEC_TEST(sshl_d_d_d, "sshl d29, d28, d27", 29, 28, 27)
   4078 GEN_THREEVEC_TEST(ushl_d_d_d, "ushl d29, d28, d27", 29, 28, 27)
   4079 
   4080 GEN_THREEVEC_TEST(sshl_2d_2d_2d,    "sshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4081 GEN_THREEVEC_TEST(sshl_4s_4s_4s,    "sshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4082 GEN_THREEVEC_TEST(sshl_2s_2s_2s,    "sshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4083 GEN_THREEVEC_TEST(sshl_8h_8h_8h,    "sshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4084 GEN_THREEVEC_TEST(sshl_4h_4h_4h,    "sshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4085 GEN_THREEVEC_TEST(sshl_16b_16b_16b, "sshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4086 GEN_THREEVEC_TEST(sshl_8b_8b_8b,    "sshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4087 GEN_THREEVEC_TEST(ushl_2d_2d_2d,    "ushl v29.2d, v28.2d, v27.2d", 29,28,27)
   4088 GEN_THREEVEC_TEST(ushl_4s_4s_4s,    "ushl v29.4s, v28.4s, v27.4s", 29,28,27)
   4089 GEN_THREEVEC_TEST(ushl_2s_2s_2s,    "ushl v29.2s, v28.2s, v27.2s", 29,28,27)
   4090 GEN_THREEVEC_TEST(ushl_8h_8h_8h,    "ushl v29.8h, v28.8h, v27.8h", 29,28,27)
   4091 GEN_THREEVEC_TEST(ushl_4h_4h_4h,    "ushl v29.4h, v28.4h, v27.4h", 29,28,27)
   4092 GEN_THREEVEC_TEST(ushl_16b_16b_16b, "ushl v29.16b, v28.16b, v27.16b", 29,28,27)
   4093 GEN_THREEVEC_TEST(ushl_8b_8b_8b,    "ushl v29.8b, v28.8b, v27.8b", 29,28,27)
   4094 
   4095 GEN_TWOVEC_TEST(shl_d_d_0,  "shl d5, d28, #0",  5, 28)
   4096 GEN_TWOVEC_TEST(shl_d_d_32, "shl d5, d28, #32", 5, 28)
   4097 GEN_TWOVEC_TEST(shl_d_d_63, "shl d5, d28, #63", 5, 28)
   4098 GEN_TWOVEC_TEST(sshr_d_d_1,  "sshr d5, d28, #1",  5, 28)
   4099 GEN_TWOVEC_TEST(sshr_d_d_32, "sshr d5, d28, #32", 5, 28)
   4100 GEN_TWOVEC_TEST(sshr_d_d_64, "sshr d5, d28, #64", 5, 28)
   4101 GEN_TWOVEC_TEST(ushr_d_d_1,  "ushr d5, d28, #1",  5, 28)
   4102 GEN_TWOVEC_TEST(ushr_d_d_32, "ushr d5, d28, #32", 5, 28)
   4103 GEN_TWOVEC_TEST(ushr_d_d_64, "ushr d5, d28, #64", 5, 28)
   4104 
   4105 GEN_SHIFT_TEST(shl,  2d, 2d, 0)
   4106 GEN_SHIFT_TEST(shl,  2d, 2d, 13)
   4107 GEN_SHIFT_TEST(shl,  2d, 2d, 63)
   4108 GEN_SHIFT_TEST(shl,  4s, 4s, 0)
   4109 GEN_SHIFT_TEST(shl,  4s, 4s, 13)
   4110 GEN_SHIFT_TEST(shl,  4s, 4s, 31)
   4111 GEN_SHIFT_TEST(shl,  2s, 2s, 0)
   4112 GEN_SHIFT_TEST(shl,  2s, 2s, 13)
   4113 GEN_SHIFT_TEST(shl,  2s, 2s, 31)
   4114 GEN_SHIFT_TEST(shl,  8h, 8h, 0)
   4115 GEN_SHIFT_TEST(shl,  8h, 8h, 13)
   4116 GEN_SHIFT_TEST(shl,  8h, 8h, 15)
   4117 GEN_SHIFT_TEST(shl,  4h, 4h, 0)
   4118 GEN_SHIFT_TEST(shl,  4h, 4h, 13)
   4119 GEN_SHIFT_TEST(shl,  4h, 4h, 15)
   4120 GEN_SHIFT_TEST(shl,  16b, 16b, 0)
   4121 GEN_SHIFT_TEST(shl,  16b, 16b, 7)
   4122 GEN_SHIFT_TEST(shl,  8b, 8b, 0)
   4123 GEN_SHIFT_TEST(shl,  8b, 8b, 7)
   4124 GEN_SHIFT_TEST(sshr, 2d, 2d, 1)
   4125 GEN_SHIFT_TEST(sshr, 2d, 2d, 13)
   4126 GEN_SHIFT_TEST(sshr, 2d, 2d, 64)
   4127 GEN_SHIFT_TEST(sshr, 4s, 4s, 1)
   4128 GEN_SHIFT_TEST(sshr, 4s, 4s, 13)
   4129 GEN_SHIFT_TEST(sshr, 4s, 4s, 32)
   4130 GEN_SHIFT_TEST(sshr, 2s, 2s, 1)
   4131 GEN_SHIFT_TEST(sshr, 2s, 2s, 13)
   4132 GEN_SHIFT_TEST(sshr, 2s, 2s, 32)
   4133 GEN_SHIFT_TEST(sshr, 8h, 8h, 1)
   4134 GEN_SHIFT_TEST(sshr, 8h, 8h, 13)
   4135 GEN_SHIFT_TEST(sshr, 8h, 8h, 16)
   4136 GEN_SHIFT_TEST(sshr, 4h, 4h, 1)
   4137 GEN_SHIFT_TEST(sshr, 4h, 4h, 13)
   4138 GEN_SHIFT_TEST(sshr, 4h, 4h, 16)
   4139 GEN_SHIFT_TEST(sshr, 16b, 16b, 1)
   4140 GEN_SHIFT_TEST(sshr, 16b, 16b, 8)
   4141 GEN_SHIFT_TEST(sshr, 8b, 8b, 1)
   4142 GEN_SHIFT_TEST(sshr, 8b, 8b, 8)
   4143 GEN_SHIFT_TEST(ushr, 2d, 2d, 1)
   4144 GEN_SHIFT_TEST(ushr, 2d, 2d, 13)
   4145 GEN_SHIFT_TEST(ushr, 2d, 2d, 64)
   4146 GEN_SHIFT_TEST(ushr, 4s, 4s, 1)
   4147 GEN_SHIFT_TEST(ushr, 4s, 4s, 13)
   4148 GEN_SHIFT_TEST(ushr, 4s, 4s, 32)
   4149 GEN_SHIFT_TEST(ushr, 2s, 2s, 1)
   4150 GEN_SHIFT_TEST(ushr, 2s, 2s, 13)
   4151 GEN_SHIFT_TEST(ushr, 2s, 2s, 32)
   4152 GEN_SHIFT_TEST(ushr, 8h, 8h, 1)
   4153 GEN_SHIFT_TEST(ushr, 8h, 8h, 13)
   4154 GEN_SHIFT_TEST(ushr, 8h, 8h, 16)
   4155 GEN_SHIFT_TEST(ushr, 4h, 4h, 1)
   4156 GEN_SHIFT_TEST(ushr, 4h, 4h, 13)
   4157 GEN_SHIFT_TEST(ushr, 4h, 4h, 16)
   4158 GEN_SHIFT_TEST(ushr, 16b, 16b, 1)
   4159 GEN_SHIFT_TEST(ushr, 16b, 16b, 8)
   4160 GEN_SHIFT_TEST(ushr, 8b, 8b, 1)
   4161 GEN_SHIFT_TEST(ushr, 8b, 8b, 8)
   4162 
   4163 GEN_TWOVEC_TEST(ssra_d_d_1,  "ssra d5, d28, #1",  5, 28)
   4164 GEN_TWOVEC_TEST(ssra_d_d_32, "ssra d5, d28, #32", 5, 28)
   4165 GEN_TWOVEC_TEST(ssra_d_d_64, "ssra d5, d28, #64", 5, 28)
   4166 GEN_TWOVEC_TEST(usra_d_d_1,  "usra d5, d28, #1",  5, 28)
   4167 GEN_TWOVEC_TEST(usra_d_d_32, "usra d5, d28, #32", 5, 28)
   4168 GEN_TWOVEC_TEST(usra_d_d_64, "usra d5, d28, #64", 5, 28)
   4169 
   4170 GEN_TWOVEC_TEST(ssra_2d_2d_1,   "ssra v6.2d,  v27.2d, #1",  6, 27)
   4171 GEN_TWOVEC_TEST(ssra_2d_2d_32,  "ssra v6.2d,  v27.2d, #32", 6, 27)
   4172 GEN_TWOVEC_TEST(ssra_2d_2d_64,  "ssra v6.2d,  v27.2d, #64", 6, 27)
   4173 GEN_TWOVEC_TEST(ssra_4s_4s_1,   "ssra v6.4s,  v27.4s, #1",  6, 27)
   4174 GEN_TWOVEC_TEST(ssra_4s_4s_16,  "ssra v6.4s,  v27.4s, #16", 6, 27)
   4175 GEN_TWOVEC_TEST(ssra_4s_4s_32,  "ssra v6.4s,  v27.4s, #32", 6, 27)
   4176 GEN_TWOVEC_TEST(ssra_2s_2s_1,   "ssra v6.2s,  v27.2s, #1",  6, 27)
   4177 GEN_TWOVEC_TEST(ssra_2s_2s_16,  "ssra v6.2s,  v27.2s, #16", 6, 27)
   4178 GEN_TWOVEC_TEST(ssra_2s_2s_32,  "ssra v6.2s,  v27.2s, #32", 6, 27)
   4179 GEN_TWOVEC_TEST(ssra_8h_8h_1,   "ssra v6.8h,  v27.8h, #1",  6, 27)
   4180 GEN_TWOVEC_TEST(ssra_8h_8h_8,   "ssra v6.8h,  v27.8h, #8",  6, 27)
   4181 GEN_TWOVEC_TEST(ssra_8h_8h_16,  "ssra v6.8h,  v27.8h, #16", 6, 27)
   4182 GEN_TWOVEC_TEST(ssra_4h_4h_1,   "ssra v6.4h,  v27.4h, #1",  6, 27)
   4183 GEN_TWOVEC_TEST(ssra_4h_4h_8,   "ssra v6.4h,  v27.4h, #8",  6, 27)
   4184 GEN_TWOVEC_TEST(ssra_4h_4h_16,  "ssra v6.4h,  v27.4h, #16", 6, 27)
   4185 GEN_TWOVEC_TEST(ssra_16b_16b_1, "ssra v6.16b, v27.16b, #1", 6, 27)
   4186 GEN_TWOVEC_TEST(ssra_16b_16b_3, "ssra v6.16b, v27.16b, #3", 6, 27)
   4187 GEN_TWOVEC_TEST(ssra_16b_16b_8, "ssra v6.16b, v27.16b, #8", 6, 27)
   4188 GEN_TWOVEC_TEST(ssra_8b_8b_1,   "ssra v6.8b,  v27.8b, #1",  6, 27)
   4189 GEN_TWOVEC_TEST(ssra_8b_8b_3,   "ssra v6.8b,  v27.8b, #3",  6, 27)
   4190 GEN_TWOVEC_TEST(ssra_8b_8b_8,   "ssra v6.8b,  v27.8b, #8",  6, 27)
   4191 GEN_TWOVEC_TEST(usra_2d_2d_1,   "usra v6.2d,  v27.2d, #1",  6, 27)
   4192 GEN_TWOVEC_TEST(usra_2d_2d_32,  "usra v6.2d,  v27.2d, #32", 6, 27)
   4193 GEN_TWOVEC_TEST(usra_2d_2d_64,  "usra v6.2d,  v27.2d, #64", 6, 27)
   4194 GEN_TWOVEC_TEST(usra_4s_4s_1,   "usra v6.4s,  v27.4s, #1",  6, 27)
   4195 GEN_TWOVEC_TEST(usra_4s_4s_16,  "usra v6.4s,  v27.4s, #16", 6, 27)
   4196 GEN_TWOVEC_TEST(usra_4s_4s_32,  "usra v6.4s,  v27.4s, #32", 6, 27)
   4197 GEN_TWOVEC_TEST(usra_2s_2s_1,   "usra v6.2s,  v27.2s, #1",  6, 27)
   4198 GEN_TWOVEC_TEST(usra_2s_2s_16,  "usra v6.2s,  v27.2s, #16", 6, 27)
   4199 GEN_TWOVEC_TEST(usra_2s_2s_32,  "usra v6.2s,  v27.2s, #32", 6, 27)
   4200 GEN_TWOVEC_TEST(usra_8h_8h_1,   "usra v6.8h,  v27.8h, #1",  6, 27)
   4201 GEN_TWOVEC_TEST(usra_8h_8h_8,   "usra v6.8h,  v27.8h, #8",  6, 27)
   4202 GEN_TWOVEC_TEST(usra_8h_8h_16,  "usra v6.8h,  v27.8h, #16", 6, 27)
   4203 GEN_TWOVEC_TEST(usra_4h_4h_1,   "usra v6.4h,  v27.4h, #1",  6, 27)
   4204 GEN_TWOVEC_TEST(usra_4h_4h_8,   "usra v6.4h,  v27.4h, #8",  6, 27)
   4205 GEN_TWOVEC_TEST(usra_4h_4h_16,  "usra v6.4h,  v27.4h, #16", 6, 27)
   4206 GEN_TWOVEC_TEST(usra_16b_16b_1, "usra v6.16b, v27.16b, #1", 6, 27)
   4207 GEN_TWOVEC_TEST(usra_16b_16b_3, "usra v6.16b, v27.16b, #3", 6, 27)
   4208 GEN_TWOVEC_TEST(usra_16b_16b_8, "usra v6.16b, v27.16b, #8", 6, 27)
   4209 GEN_TWOVEC_TEST(usra_8b_8b_1,   "usra v6.8b,  v27.8b, #1",  6, 27)
   4210 GEN_TWOVEC_TEST(usra_8b_8b_3,   "usra v6.8b,  v27.8b, #3",  6, 27)
   4211 GEN_TWOVEC_TEST(usra_8b_8b_8,   "usra v6.8b,  v27.8b, #8",  6, 27)
   4212 
   4213 GEN_THREEVEC_TEST(srshl_d_d_d, "srshl d29, d28, d27", 29, 28, 27)
   4214 GEN_THREEVEC_TEST(urshl_d_d_d, "urshl d29, d28, d27", 29, 28, 27)
   4215 
   4216 GEN_THREEVEC_TEST(srshl_2d_2d_2d,   "srshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4217 GEN_THREEVEC_TEST(srshl_4s_4s_4s,   "srshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4218 GEN_THREEVEC_TEST(srshl_2s_2s_2s,   "srshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4219 GEN_THREEVEC_TEST(srshl_8h_8h_8h,   "srshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4220 GEN_THREEVEC_TEST(srshl_4h_4h_4h,   "srshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4221 GEN_THREEVEC_TEST(srshl_16b_16b_16b,"srshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4222 GEN_THREEVEC_TEST(srshl_8b_8b_8b,   "srshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4223 GEN_THREEVEC_TEST(urshl_2d_2d_2d,   "urshl v29.2d, v28.2d, v27.2d", 29,28,27)
   4224 GEN_THREEVEC_TEST(urshl_4s_4s_4s,   "urshl v29.4s, v28.4s, v27.4s", 29,28,27)
   4225 GEN_THREEVEC_TEST(urshl_2s_2s_2s,   "urshl v29.2s, v28.2s, v27.2s", 29,28,27)
   4226 GEN_THREEVEC_TEST(urshl_8h_8h_8h,   "urshl v29.8h, v28.8h, v27.8h", 29,28,27)
   4227 GEN_THREEVEC_TEST(urshl_4h_4h_4h,   "urshl v29.4h, v28.4h, v27.4h", 29,28,27)
   4228 GEN_THREEVEC_TEST(urshl_16b_16b_16b,"urshl v29.16b, v28.16b, v27.16b", 29,28,27)
   4229 GEN_THREEVEC_TEST(urshl_8b_8b_8b,   "urshl v29.8b, v28.8b, v27.8b", 29,28,27)
   4230 
   4231 GEN_TWOVEC_TEST(srshr_d_d_1,  "srshr d5, d28, #1",  5, 28)
   4232 GEN_TWOVEC_TEST(srshr_d_d_32, "srshr d5, d28, #32", 5, 28)
   4233 GEN_TWOVEC_TEST(srshr_d_d_64, "srshr d5, d28, #64", 5, 28)
   4234 GEN_TWOVEC_TEST(urshr_d_d_1,  "urshr d5, d28, #1",  5, 28)
   4235 GEN_TWOVEC_TEST(urshr_d_d_32, "urshr d5, d28, #32", 5, 28)
   4236 GEN_TWOVEC_TEST(urshr_d_d_64, "urshr d5, d28, #64", 5, 28)
   4237 
   4238 GEN_TWOVEC_TEST(srshr_2d_2d_1,   "srshr v6.2d,  v27.2d, #1",  6, 27)
   4239 GEN_TWOVEC_TEST(srshr_2d_2d_32,  "srshr v6.2d,  v27.2d, #32", 6, 27)
   4240 GEN_TWOVEC_TEST(srshr_2d_2d_64,  "srshr v6.2d,  v27.2d, #64", 6, 27)
   4241 GEN_TWOVEC_TEST(srshr_4s_4s_1,   "srshr v6.4s,  v27.4s, #1",  6, 27)
   4242 GEN_TWOVEC_TEST(srshr_4s_4s_16,  "srshr v6.4s,  v27.4s, #16", 6, 27)
   4243 GEN_TWOVEC_TEST(srshr_4s_4s_32,  "srshr v6.4s,  v27.4s, #32", 6, 27)
   4244 GEN_TWOVEC_TEST(srshr_2s_2s_1,   "srshr v6.2s,  v27.2s, #1",  6, 27)
   4245 GEN_TWOVEC_TEST(srshr_2s_2s_16,  "srshr v6.2s,  v27.2s, #16", 6, 27)
   4246 GEN_TWOVEC_TEST(srshr_2s_2s_32,  "srshr v6.2s,  v27.2s, #32", 6, 27)
   4247 GEN_TWOVEC_TEST(srshr_8h_8h_1,   "srshr v6.8h,  v27.8h, #1",  6, 27)
   4248 GEN_TWOVEC_TEST(srshr_8h_8h_8,   "srshr v6.8h,  v27.8h, #8",  6, 27)
   4249 GEN_TWOVEC_TEST(srshr_8h_8h_16,  "srshr v6.8h,  v27.8h, #16", 6, 27)
   4250 GEN_TWOVEC_TEST(srshr_4h_4h_1,   "srshr v6.4h,  v27.4h, #1",  6, 27)
   4251 GEN_TWOVEC_TEST(srshr_4h_4h_8,   "srshr v6.4h,  v27.4h, #8",  6, 27)
   4252 GEN_TWOVEC_TEST(srshr_4h_4h_16,  "srshr v6.4h,  v27.4h, #16", 6, 27)
   4253 GEN_TWOVEC_TEST(srshr_16b_16b_1, "srshr v6.16b, v27.16b, #1", 6, 27)
   4254 GEN_TWOVEC_TEST(srshr_16b_16b_3, "srshr v6.16b, v27.16b, #3", 6, 27)
   4255 GEN_TWOVEC_TEST(srshr_16b_16b_8, "srshr v6.16b, v27.16b, #8", 6, 27)
   4256 GEN_TWOVEC_TEST(srshr_8b_8b_1,   "srshr v6.8b,  v27.8b, #1",  6, 27)
   4257 GEN_TWOVEC_TEST(srshr_8b_8b_3,   "srshr v6.8b,  v27.8b, #3",  6, 27)
   4258 GEN_TWOVEC_TEST(srshr_8b_8b_8,   "srshr v6.8b,  v27.8b, #8",  6, 27)
   4259 GEN_TWOVEC_TEST(urshr_2d_2d_1,   "urshr v6.2d,  v27.2d, #1",  6, 27)
   4260 GEN_TWOVEC_TEST(urshr_2d_2d_32,  "urshr v6.2d,  v27.2d, #32", 6, 27)
   4261 GEN_TWOVEC_TEST(urshr_2d_2d_64,  "urshr v6.2d,  v27.2d, #64", 6, 27)
   4262 GEN_TWOVEC_TEST(urshr_4s_4s_1,   "urshr v6.4s,  v27.4s, #1",  6, 27)
   4263 GEN_TWOVEC_TEST(urshr_4s_4s_16,  "urshr v6.4s,  v27.4s, #16", 6, 27)
   4264 GEN_TWOVEC_TEST(urshr_4s_4s_32,  "urshr v6.4s,  v27.4s, #32", 6, 27)
   4265 GEN_TWOVEC_TEST(urshr_2s_2s_1,   "urshr v6.2s,  v27.2s, #1",  6, 27)
   4266 GEN_TWOVEC_TEST(urshr_2s_2s_16,  "urshr v6.2s,  v27.2s, #16", 6, 27)
   4267 GEN_TWOVEC_TEST(urshr_2s_2s_32,  "urshr v6.2s,  v27.2s, #32", 6, 27)
   4268 GEN_TWOVEC_TEST(urshr_8h_8h_1,   "urshr v6.8h,  v27.8h, #1",  6, 27)
   4269 GEN_TWOVEC_TEST(urshr_8h_8h_8,   "urshr v6.8h,  v27.8h, #8",  6, 27)
   4270 GEN_TWOVEC_TEST(urshr_8h_8h_16,  "urshr v6.8h,  v27.8h, #16", 6, 27)
   4271 GEN_TWOVEC_TEST(urshr_4h_4h_1,   "urshr v6.4h,  v27.4h, #1",  6, 27)
   4272 GEN_TWOVEC_TEST(urshr_4h_4h_8,   "urshr v6.4h,  v27.4h, #8",  6, 27)
   4273 GEN_TWOVEC_TEST(urshr_4h_4h_16,  "urshr v6.4h,  v27.4h, #16", 6, 27)
   4274 GEN_TWOVEC_TEST(urshr_16b_16b_1, "urshr v6.16b, v27.16b, #1", 6, 27)
   4275 GEN_TWOVEC_TEST(urshr_16b_16b_3, "urshr v6.16b, v27.16b, #3", 6, 27)
   4276 GEN_TWOVEC_TEST(urshr_16b_16b_8, "urshr v6.16b, v27.16b, #8", 6, 27)
   4277 GEN_TWOVEC_TEST(urshr_8b_8b_1,   "urshr v6.8b,  v27.8b, #1",  6, 27)
   4278 GEN_TWOVEC_TEST(urshr_8b_8b_3,   "urshr v6.8b,  v27.8b, #3",  6, 27)
   4279 GEN_TWOVEC_TEST(urshr_8b_8b_8,   "urshr v6.8b,  v27.8b, #8",  6, 27)
   4280 
   4281 GEN_TWOVEC_TEST(srsra_d_d_1,  "srsra d5, d28, #1",  5, 28)
   4282 GEN_TWOVEC_TEST(srsra_d_d_32, "srsra d5, d28, #32", 5, 28)
   4283 GEN_TWOVEC_TEST(srsra_d_d_64, "srsra d5, d28, #64", 5, 28)
   4284 GEN_TWOVEC_TEST(ursra_d_d_1,  "ursra d5, d28, #1",  5, 28)
   4285 GEN_TWOVEC_TEST(ursra_d_d_32, "ursra d5, d28, #32", 5, 28)
   4286 GEN_TWOVEC_TEST(ursra_d_d_64, "ursra d5, d28, #64", 5, 28)
   4287 
   4288 GEN_TWOVEC_TEST(srsra_2d_2d_1,   "srsra v6.2d,  v27.2d, #1",  6, 27)
   4289 GEN_TWOVEC_TEST(srsra_2d_2d_32,  "srsra v6.2d,  v27.2d, #32", 6, 27)
   4290 GEN_TWOVEC_TEST(srsra_2d_2d_64,  "srsra v6.2d,  v27.2d, #64", 6, 27)
   4291 GEN_TWOVEC_TEST(srsra_4s_4s_1,   "srsra v6.4s,  v27.4s, #1",  6, 27)
   4292 GEN_TWOVEC_TEST(srsra_4s_4s_16,  "srsra v6.4s,  v27.4s, #16", 6, 27)
   4293 GEN_TWOVEC_TEST(srsra_4s_4s_32,  "srsra v6.4s,  v27.4s, #32", 6, 27)
   4294 GEN_TWOVEC_TEST(srsra_2s_2s_1,   "srsra v6.2s,  v27.2s, #1",  6, 27)
   4295 GEN_TWOVEC_TEST(srsra_2s_2s_16,  "srsra v6.2s,  v27.2s, #16", 6, 27)
   4296 GEN_TWOVEC_TEST(srsra_2s_2s_32,  "srsra v6.2s,  v27.2s, #32", 6, 27)
   4297 GEN_TWOVEC_TEST(srsra_8h_8h_1,   "srsra v6.8h,  v27.8h, #1",  6, 27)
   4298 GEN_TWOVEC_TEST(srsra_8h_8h_8,   "srsra v6.8h,  v27.8h, #8",  6, 27)
   4299 GEN_TWOVEC_TEST(srsra_8h_8h_16,  "srsra v6.8h,  v27.8h, #16", 6, 27)
   4300 GEN_TWOVEC_TEST(srsra_4h_4h_1,   "srsra v6.4h,  v27.4h, #1",  6, 27)
   4301 GEN_TWOVEC_TEST(srsra_4h_4h_8,   "srsra v6.4h,  v27.4h, #8",  6, 27)
   4302 GEN_TWOVEC_TEST(srsra_4h_4h_16,  "srsra v6.4h,  v27.4h, #16", 6, 27)
   4303 GEN_TWOVEC_TEST(srsra_16b_16b_1, "srsra v6.16b, v27.16b, #1", 6, 27)
   4304 GEN_TWOVEC_TEST(srsra_16b_16b_3, "srsra v6.16b, v27.16b, #3", 6, 27)
   4305 GEN_TWOVEC_TEST(srsra_16b_16b_8, "srsra v6.16b, v27.16b, #8", 6, 27)
   4306 GEN_TWOVEC_TEST(srsra_8b_8b_1,   "srsra v6.8b,  v27.8b, #1",  6, 27)
   4307 GEN_TWOVEC_TEST(srsra_8b_8b_3,   "srsra v6.8b,  v27.8b, #3",  6, 27)
   4308 GEN_TWOVEC_TEST(srsra_8b_8b_8,   "srsra v6.8b,  v27.8b, #8",  6, 27)
   4309 GEN_TWOVEC_TEST(ursra_2d_2d_1,   "ursra v6.2d,  v27.2d, #1",  6, 27)
   4310 GEN_TWOVEC_TEST(ursra_2d_2d_32,  "ursra v6.2d,  v27.2d, #32", 6, 27)
   4311 GEN_TWOVEC_TEST(ursra_2d_2d_64,  "ursra v6.2d,  v27.2d, #64", 6, 27)
   4312 GEN_TWOVEC_TEST(ursra_4s_4s_1,   "ursra v6.4s,  v27.4s, #1",  6, 27)
   4313 GEN_TWOVEC_TEST(ursra_4s_4s_16,  "ursra v6.4s,  v27.4s, #16", 6, 27)
   4314 GEN_TWOVEC_TEST(ursra_4s_4s_32,  "ursra v6.4s,  v27.4s, #32", 6, 27)
   4315 GEN_TWOVEC_TEST(ursra_2s_2s_1,   "ursra v6.2s,  v27.2s, #1",  6, 27)
   4316 GEN_TWOVEC_TEST(ursra_2s_2s_16,  "ursra v6.2s,  v27.2s, #16", 6, 27)
   4317 GEN_TWOVEC_TEST(ursra_2s_2s_32,  "ursra v6.2s,  v27.2s, #32", 6, 27)
   4318 GEN_TWOVEC_TEST(ursra_8h_8h_1,   "ursra v6.8h,  v27.8h, #1",  6, 27)
   4319 GEN_TWOVEC_TEST(ursra_8h_8h_8,   "ursra v6.8h,  v27.8h, #8",  6, 27)
   4320 GEN_TWOVEC_TEST(ursra_8h_8h_16,  "ursra v6.8h,  v27.8h, #16", 6, 27)
   4321 GEN_TWOVEC_TEST(ursra_4h_4h_1,   "ursra v6.4h,  v27.4h, #1",  6, 27)
   4322 GEN_TWOVEC_TEST(ursra_4h_4h_8,   "ursra v6.4h,  v27.4h, #8",  6, 27)
   4323 GEN_TWOVEC_TEST(ursra_4h_4h_16,  "ursra v6.4h,  v27.4h, #16", 6, 27)
   4324 GEN_TWOVEC_TEST(ursra_16b_16b_1, "ursra v6.16b, v27.16b, #1", 6, 27)
   4325 GEN_TWOVEC_TEST(ursra_16b_16b_3, "ursra v6.16b, v27.16b, #3", 6, 27)
   4326 GEN_TWOVEC_TEST(ursra_16b_16b_8, "ursra v6.16b, v27.16b, #8", 6, 27)
   4327 GEN_TWOVEC_TEST(ursra_8b_8b_1,   "ursra v6.8b,  v27.8b, #1",  6, 27)
   4328 GEN_TWOVEC_TEST(ursra_8b_8b_3,   "ursra v6.8b,  v27.8b, #3",  6, 27)
   4329 GEN_TWOVEC_TEST(ursra_8b_8b_8,   "ursra v6.8b,  v27.8b, #8",  6, 27)
   4330 
   4331 GEN_SHIFT_TEST(sshll,  2d, 2s,  0)
   4332 GEN_SHIFT_TEST(sshll,  2d, 2s,  15)
   4333 GEN_SHIFT_TEST(sshll,  2d, 2s,  31)
   4334 GEN_SHIFT_TEST(sshll2, 2d, 4s,  0)
   4335 GEN_SHIFT_TEST(sshll2, 2d, 4s,  15)
   4336 GEN_SHIFT_TEST(sshll2, 2d, 4s,  31)
   4337 GEN_SHIFT_TEST(sshll,  4s, 4h,  0)
   4338 GEN_SHIFT_TEST(sshll,  4s, 4h,  7)
   4339 GEN_SHIFT_TEST(sshll,  4s, 4h,  15)
   4340 GEN_SHIFT_TEST(sshll2, 4s, 8h,  0)
   4341 GEN_SHIFT_TEST(sshll2, 4s, 8h,  7)
   4342 GEN_SHIFT_TEST(sshll2, 4s, 8h,  15)
   4343 GEN_SHIFT_TEST(sshll,  8h, 8b,  0)
   4344 GEN_SHIFT_TEST(sshll,  8h, 8b,  3)
   4345 GEN_SHIFT_TEST(sshll,  8h, 8b,  7)
   4346 GEN_SHIFT_TEST(sshll2, 8h, 16b, 0)
   4347 GEN_SHIFT_TEST(sshll2, 8h, 16b, 3)
   4348 GEN_SHIFT_TEST(sshll2, 8h, 16b, 7)
   4349 GEN_SHIFT_TEST(ushll,  2d, 2s, 0)
   4350 GEN_SHIFT_TEST(ushll,  2d, 2s, 15)
   4351 GEN_SHIFT_TEST(ushll,  2d, 2s, 31)
   4352 GEN_SHIFT_TEST(ushll2, 2d, 4s, 0)
   4353 GEN_SHIFT_TEST(ushll2, 2d, 4s, 15)
   4354 GEN_SHIFT_TEST(ushll2, 2d, 4s, 31)
   4355 GEN_SHIFT_TEST(ushll,  4s, 4h,  0)
   4356 GEN_SHIFT_TEST(ushll,  4s, 4h,  7)
   4357 GEN_SHIFT_TEST(ushll,  4s, 4h,  15)
   4358 GEN_SHIFT_TEST(ushll2, 4s, 8h,  0)
   4359 GEN_SHIFT_TEST(ushll2, 4s, 8h,  7)
   4360 GEN_SHIFT_TEST(ushll2, 4s, 8h,  15)
   4361 GEN_SHIFT_TEST(ushll,  8h, 8b,  0)
   4362 GEN_SHIFT_TEST(ushll,  8h, 8b,  3)
   4363 GEN_SHIFT_TEST(ushll,  8h, 8b,  7)
   4364 GEN_SHIFT_TEST(ushll2, 8h, 16b, 0)
   4365 GEN_SHIFT_TEST(ushll2, 8h, 16b, 3)
   4366 GEN_SHIFT_TEST(ushll2, 8h, 16b, 7)
   4367 
   4368 GEN_TWOVEC_TEST(suqadd_d_d,  "suqadd d22, d23",   22, 23)
   4369 GEN_TWOVEC_TEST(suqadd_s_s,  "suqadd s22, s23",   22, 23)
   4370 GEN_TWOVEC_TEST(suqadd_h_h,  "suqadd h22, h23",   22, 23)
   4371 GEN_TWOVEC_TEST(suqadd_b_b,  "suqadd b22, b23",   22, 23)
   4372 GEN_TWOVEC_TEST(usqadd_d_d,  "usqadd d22, d23",   22, 23)
   4373 GEN_TWOVEC_TEST(usqadd_s_s,  "usqadd s22, s23",   22, 23)
   4374 GEN_TWOVEC_TEST(usqadd_h_h,  "usqadd h22, h23",   22, 23)
   4375 GEN_TWOVEC_TEST(usqadd_b_b,  "usqadd b22, b23",   22, 23)
   4376 
   4377 GEN_TWOVEC_TEST(suqadd_2d_2d,   "suqadd v6.2d,  v27.2d",  6, 27)
   4378 GEN_TWOVEC_TEST(suqadd_4s_4s,   "suqadd v6.4s,  v27.4s",  6, 27)
   4379 GEN_TWOVEC_TEST(suqadd_2s_2s,   "suqadd v6.2s,  v27.2s",  6, 27)
   4380 GEN_TWOVEC_TEST(suqadd_8h_8h,   "suqadd v6.8h,  v27.8h",  6, 27)
   4381 GEN_TWOVEC_TEST(suqadd_4h_4h,   "suqadd v6.4h,  v27.4h",  6, 27)
   4382 GEN_TWOVEC_TEST(suqadd_16b_16b, "suqadd v6.16b, v27.16b", 6, 27)
   4383 GEN_TWOVEC_TEST(suqadd_8b_8b,   "suqadd v6.8b,  v27.8b",  6, 27)
   4384 GEN_TWOVEC_TEST(usqadd_2d_2d,   "usqadd v6.2d,  v27.2d",  6, 27)
   4385 GEN_TWOVEC_TEST(usqadd_4s_4s,   "usqadd v6.4s,  v27.4s",  6, 27)
   4386 GEN_TWOVEC_TEST(usqadd_2s_2s,   "usqadd v6.2s,  v27.2s",  6, 27)
   4387 GEN_TWOVEC_TEST(usqadd_8h_8h,   "usqadd v6.8h,  v27.8h",  6, 27)
   4388 GEN_TWOVEC_TEST(usqadd_4h_4h,   "usqadd v6.4h,  v27.4h",  6, 27)
   4389 GEN_TWOVEC_TEST(usqadd_16b_16b, "usqadd v6.16b, v27.16b", 6, 27)
   4390 GEN_TWOVEC_TEST(usqadd_8b_8b,   "usqadd v6.8b,  v27.8b",  6, 27)
   4391 
   4392 // Uses v15 as the first table entry
   4393 GEN_THREEVEC_TEST(
   4394    tbl_16b_1reg, "tbl v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4395 // and v15 ^ v21 as the second table entry
   4396 GEN_THREEVEC_TEST(
   4397    tbl_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4398                  "tbl v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4399 // and v15 ^ v23 as the third table entry
   4400 GEN_THREEVEC_TEST(
   4401    tbl_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4402                  "eor v17.16b, v15.16b, v23.16b ; "
   4403                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4404                  21, 15, 23)
   4405 // and v21 ^ v23 as the fourth table entry
   4406 GEN_THREEVEC_TEST(
   4407    tbl_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4408                  "eor v17.16b, v15.16b, v23.16b ; "
   4409                  "eor v18.16b, v21.16b, v23.16b ; "
   4410                  "tbl v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4411                  21, 15, 23)
   4412 // Same register scheme for tbl .8b, tbx .16b, tbx.8b
   4413 GEN_THREEVEC_TEST(
   4414    tbl_8b_1reg, "tbl v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4415 GEN_THREEVEC_TEST(
   4416    tbl_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4417                 "tbl v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4418 GEN_THREEVEC_TEST(
   4419    tbl_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4420                 "eor v17.16b, v15.16b, v23.16b ; "
   4421                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4422                 21, 15, 23)
   4423 GEN_THREEVEC_TEST(
   4424    tbl_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4425                 "eor v17.16b, v15.16b, v23.16b ; "
   4426                 "eor v18.16b, v21.16b, v23.16b ; "
   4427                 "tbl v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4428                 21, 15, 23)
   4429 
   4430 GEN_THREEVEC_TEST(
   4431    tbx_16b_1reg, "tbx v21.16b, {v15.16b}, v23.16b", 21, 15, 23)
   4432 GEN_THREEVEC_TEST(
   4433    tbx_16b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4434                  "tbx v21.16b, {v15.16b, v16.16b}, v23.16b", 21, 15, 23)
   4435 GEN_THREEVEC_TEST(
   4436    tbx_16b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4437                  "eor v17.16b, v15.16b, v23.16b ; "
   4438                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b}, v23.16b",
   4439                  21, 15, 23)
   4440 GEN_THREEVEC_TEST(
   4441    tbx_16b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4442                  "eor v17.16b, v15.16b, v23.16b ; "
   4443                  "eor v18.16b, v21.16b, v23.16b ; "
   4444                  "tbx v21.16b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.16b",
   4445                  21, 15, 23)
   4446 // Same register scheme for tbx .8b, tbx .16b, tbx.8b
   4447 GEN_THREEVEC_TEST(
   4448    tbx_8b_1reg, "tbx v21.8b, {v15.16b}, v23.8b", 21, 15, 23)
   4449 GEN_THREEVEC_TEST(
   4450    tbx_8b_2reg, "eor v16.16b, v15.16b, v21.16b ; "
   4451                 "tbx v21.8b, {v15.16b, v16.16b}, v23.8b", 21, 15, 23)
   4452 GEN_THREEVEC_TEST(
   4453    tbx_8b_3reg, "eor v16.16b, v15.16b, v21.16b ; "
   4454                 "eor v17.16b, v15.16b, v23.16b ; "
   4455                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b}, v23.8b",
   4456                 21, 15, 23)
   4457 GEN_THREEVEC_TEST(
   4458    tbx_8b_4reg, "eor v16.16b, v15.16b, v21.16b ; "
   4459                 "eor v17.16b, v15.16b, v23.16b ; "
   4460                 "eor v18.16b, v21.16b, v23.16b ; "
   4461                 "tbx v21.8b, {v15.16b, v16.16b, v17.16b, v18.16b}, v23.8b",
   4462                 21, 15, 23)
   4463 
   4464 GEN_THREEVEC_TEST(trn1_2d_2d_2d,    "trn1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4465 GEN_THREEVEC_TEST(trn1_4s_4s_4s,    "trn1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4466 GEN_THREEVEC_TEST(trn1_2s_2s_2s,    "trn1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4467 GEN_THREEVEC_TEST(trn1_8h_8h_8h,    "trn1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4468 GEN_THREEVEC_TEST(trn1_4h_4h_4h,    "trn1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4469 GEN_THREEVEC_TEST(trn1_16b_16b_16b, "trn1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4470 GEN_THREEVEC_TEST(trn1_8b_8b_8b,    "trn1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4471 GEN_THREEVEC_TEST(trn2_2d_2d_2d,    "trn2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4472 GEN_THREEVEC_TEST(trn2_4s_4s_4s,    "trn2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4473 GEN_THREEVEC_TEST(trn2_2s_2s_2s,    "trn2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4474 GEN_THREEVEC_TEST(trn2_8h_8h_8h,    "trn2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4475 GEN_THREEVEC_TEST(trn2_4h_4h_4h,    "trn2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4476 GEN_THREEVEC_TEST(trn2_16b_16b_16b, "trn2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4477 GEN_THREEVEC_TEST(trn2_8b_8b_8b,    "trn2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4478 
   4479 GEN_TWOVEC_TEST(urecpe_4s_4s,   "urecpe v6.4s,  v27.4s",  6, 27)
   4480 GEN_TWOVEC_TEST(urecpe_2s_2s,   "urecpe v6.2s,  v27.2s",  6, 27)
   4481 GEN_TWOVEC_TEST(ursqrte_4s_4s,   "ursqrte v6.4s,  v27.4s",  6, 27)
   4482 GEN_TWOVEC_TEST(ursqrte_2s_2s,   "ursqrte v6.2s,  v27.2s",  6, 27)
   4483 
   4484 GEN_THREEVEC_TEST(uzp1_2d_2d_2d,    "uzp1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4485 GEN_THREEVEC_TEST(uzp1_4s_4s_4s,    "uzp1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4486 GEN_THREEVEC_TEST(uzp1_2s_2s_2s,    "uzp1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4487 GEN_THREEVEC_TEST(uzp1_8h_8h_8h,    "uzp1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4488 GEN_THREEVEC_TEST(uzp1_4h_4h_4h,    "uzp1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4489 GEN_THREEVEC_TEST(uzp1_16b_16b_16b, "uzp1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4490 GEN_THREEVEC_TEST(uzp1_8b_8b_8b,    "uzp1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4491 GEN_THREEVEC_TEST(uzp2_2d_2d_2d,    "uzp2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4492 GEN_THREEVEC_TEST(uzp2_4s_4s_4s,    "uzp2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4493 GEN_THREEVEC_TEST(uzp2_2s_2s_2s,    "uzp2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4494 GEN_THREEVEC_TEST(uzp2_8h_8h_8h,    "uzp2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4495 GEN_THREEVEC_TEST(uzp2_4h_4h_4h,    "uzp2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4496 GEN_THREEVEC_TEST(uzp2_16b_16b_16b, "uzp2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4497 GEN_THREEVEC_TEST(uzp2_8b_8b_8b,    "uzp2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4498 GEN_THREEVEC_TEST(zip1_2d_2d_2d,    "zip1 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4499 GEN_THREEVEC_TEST(zip1_4s_4s_4s,    "zip1 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4500 GEN_THREEVEC_TEST(zip1_2s_2s_2s,    "zip1 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4501 GEN_THREEVEC_TEST(zip1_8h_8h_8h,    "zip1 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4502 GEN_THREEVEC_TEST(zip1_4h_4h_4h,    "zip1 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4503 GEN_THREEVEC_TEST(zip1_16b_16b_16b, "zip1 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4504 GEN_THREEVEC_TEST(zip1_8b_8b_8b,    "zip1 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4505 GEN_THREEVEC_TEST(zip2_2d_2d_2d,    "zip2 v1.2d,  v2.2d,  v4.2d",  1, 2, 4)
   4506 GEN_THREEVEC_TEST(zip2_4s_4s_4s,    "zip2 v1.4s,  v2.4s,  v4.4s",  1, 2, 4)
   4507 GEN_THREEVEC_TEST(zip2_2s_2s_2s,    "zip2 v1.2s,  v2.2s,  v4.2s",  1, 2, 4)
   4508 GEN_THREEVEC_TEST(zip2_8h_8h_8h,    "zip2 v1.8h,  v2.8h,  v4.8h",  1, 2, 4)
   4509 GEN_THREEVEC_TEST(zip2_4h_4h_4h,    "zip2 v1.4h,  v2.4h,  v4.4h",  1, 2, 4)
   4510 GEN_THREEVEC_TEST(zip2_16b_16b_16b, "zip2 v1.16b, v2.16b, v4.16b", 1, 2, 4)
   4511 GEN_THREEVEC_TEST(zip2_8b_8b_8b,    "zip2 v1.8b,  v2.8b,  v4.8b",  1, 2, 4)
   4512 
   4513 GEN_UNARY_TEST(xtn,  2s, 2d)
   4514 GEN_UNARY_TEST(xtn2, 4s, 2d)
   4515 GEN_UNARY_TEST(xtn,  4h, 4s)
   4516 GEN_UNARY_TEST(xtn2, 8h, 4s)
   4517 GEN_UNARY_TEST(xtn,  8b, 8h)
   4518 GEN_UNARY_TEST(xtn2, 16b, 8h)
   4519 
   4520 // ======================== MEM ========================
   4521 
   4522 // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   4523 
   4524 // ======================== CRYPTO ========================
   4525 
   4526 GEN_TWOVEC_TEST(aesd_16b_16b,    "aesd v6.16b,  v27.16b",  6, 27)
   4527 GEN_TWOVEC_TEST(aese_16b_16b,    "aese v6.16b,  v27.16b",  6, 27)
   4528 GEN_TWOVEC_TEST(aesimc_16b_16b,  "aesimc v6.16b,  v27.16b",  6, 27)
   4529 GEN_TWOVEC_TEST(aesmc_16b_16b,   "aesmc v6.16b,  v27.16b",  6, 27)
   4530 
   4531 GEN_THREEVEC_TEST(sha1c_q_s_4s,     "sha1c q29, s28, v27.4s", 29,28,27)
   4532 GEN_TWOVEC_TEST(sha1h_s_s,          "sha1h s6,  s27",  6, 27)
   4533 GEN_THREEVEC_TEST(sha1m_q_s_4s,     "sha1m q29, s28, v27.4s", 29,28,27)
   4534 GEN_THREEVEC_TEST(sha1p_q_s_4s,     "sha1p q29, s28, v27.4s", 29,28,27)
   4535 GEN_THREEVEC_TEST(sha1su0_4s_4s_4s, "sha1su0 v29.4s, v28.4s, v27.4s", 29,28,27)
   4536 GEN_TWOVEC_TEST(sha1su1_4s_4s,      "sha1su1 v6.4s,  v27.4s",  6, 27)
   4537 
   4538 GEN_THREEVEC_TEST(sha256h2_q_q_4s,  "sha256h2 q29, q28, v27.4s", 29,28,27)
   4539 GEN_THREEVEC_TEST(sha256h_q_q_4s,   "sha256h q29, q28, v27.4s", 29,28,27)
   4540 GEN_TWOVEC_TEST(sha256su0_4s_4s,    "sha256su0 v6.4s,  v27.4s",  6, 27)
   4541 GEN_THREEVEC_TEST(sha256su1_4s_4s_4s, "sha256su1 v29.4s, v28.4s, v27.4s",
   4542                                       29,28,27)
   4543 
   4544 
   4545 /* ---------------------------------------------------------------- */
   4546 /* -- main()                                                     -- */
   4547 /* ---------------------------------------------------------------- */
   4548 
   4549 int main ( void )
   4550 {
   4551    assert(sizeof(V128) == 16);
   4552 
   4553    // ======================== FP ========================
   4554 
   4555    // fabs      d,s
   4556    // fabs      2d,4s,2s
   4557    if (1) test_fabs_d_d(TyDF);
   4558    if (1) test_fabs_s_s(TySF);
   4559    if (1) test_fabs_2d_2d(TyDF);
   4560    if (1) test_fabs_4s_4s(TySF);
   4561    if (1) test_fabs_2s_2s(TyDF);
   4562 
   4563    // fneg      d,s
   4564    // fneg      2d,4s,2s
   4565    if (1) test_fneg_d_d(TyDF);
   4566    if (1) test_fneg_s_s(TySF);
   4567    if (1) test_fneg_2d_2d(TySF);
   4568    if (1) test_fneg_4s_4s(TyDF);
   4569    if (1) test_fneg_2s_2s(TySF);
   4570 
   4571    // fsqrt     d,s
   4572    // fsqrt     2d,4s,2s
   4573    if (1) test_fsqrt_d_d(TyDF);
   4574    if (1) test_fsqrt_s_s(TySF);
   4575    if (1) test_fsqrt_2d_2d(TySF);
   4576    if (1) test_fsqrt_4s_4s(TyDF);
   4577    if (1) test_fsqrt_2s_2s(TySF);
   4578 
   4579    // fadd      d,s
   4580    // fsub      d,s
   4581    if (1) test_fadd_d_d_d(TyDF);
   4582    if (1) test_fadd_s_s_s(TySF);
   4583    if (1) test_fsub_d_d_d(TyDF);
   4584    if (1) test_fsub_s_s_s(TySF);
   4585 
   4586    // fadd      2d,4s,2s
   4587    // fsub      2d,4s,2s
   4588    if (1) test_fadd_2d_2d_2d(TyDF);
   4589    if (1) test_fadd_4s_4s_4s(TySF);
   4590    if (1) test_fadd_2s_2s_2s(TySF);
   4591    if (1) test_fsub_2d_2d_2d(TyDF);
   4592    if (1) test_fsub_4s_4s_4s(TySF);
   4593    if (1) test_fsub_2s_2s_2s(TySF);
   4594 
   4595    // fabd      d,s
   4596    // fabd      2d,4s,2s
   4597    if (1) test_fabd_d_d_d(TyDF);
   4598    if (1) test_fabd_s_s_s(TySF);
   4599    if (1) test_fabd_2d_2d_2d(TyDF);
   4600    if (1) test_fabd_4s_4s_4s(TySF);
   4601    if (1) test_fabd_2s_2s_2s(TySF);
   4602 
   4603    // faddp     d,s (floating add pair)
   4604    // faddp     2d,4s,2s
   4605    if (1) test_faddp_d_2d(TyDF);
   4606    if (1) test_faddp_s_2s(TySF);
   4607    if (1) test_faddp_2d_2d_2d(TySF);
   4608    if (1) test_faddp_4s_4s_4s(TyDF);
   4609    if (1) test_faddp_2s_2s_2s(TySF);
   4610 
   4611    // fccmp     d,s (floating point conditional quiet compare)
   4612    // fccmpe    d,s (floating point conditional signaling compare)
   4613    if (1) DO50( test_FCCMP_D_D_0xF_EQ() );
   4614    if (1) DO50( test_FCCMP_D_D_0xF_NE() );
   4615    if (1) DO50( test_FCCMP_D_D_0x0_EQ() );
   4616    if (1) DO50( test_FCCMP_D_D_0x0_NE() );
   4617    if (1) DO50( test_FCCMP_S_S_0xF_EQ() );
   4618    if (1) DO50( test_FCCMP_S_S_0xF_NE() );
   4619    if (1) DO50( test_FCCMP_S_S_0x0_EQ() );
   4620    if (1) DO50( test_FCCMP_S_S_0x0_NE() );
   4621    if (1) DO50( test_FCCMPE_D_D_0xF_EQ() );
   4622    if (1) DO50( test_FCCMPE_D_D_0xF_NE() );
   4623    if (1) DO50( test_FCCMPE_D_D_0x0_EQ() );
   4624    if (1) DO50( test_FCCMPE_D_D_0x0_NE() );
   4625    if (1) DO50( test_FCCMPE_S_S_0xF_EQ() );
   4626    if (1) DO50( test_FCCMPE_S_S_0xF_NE() );
   4627    if (1) DO50( test_FCCMPE_S_S_0x0_EQ() );
   4628    if (1) DO50( test_FCCMPE_S_S_0x0_NE() );
   4629 
   4630    // fcmeq     d,s
   4631    // fcmge     d,s
   4632    // fcmgt     d,s
   4633    // facgt     d,s  (floating abs compare GE)
   4634    // facge     d,s  (floating abs compare GE)
   4635    if (1) DO50( test_FCMEQ_D_D_D() );
   4636    if (1) DO50( test_FCMEQ_S_S_S() );
   4637    if (1) DO50( test_FCMGE_D_D_D() );
   4638    if (1) DO50( test_FCMGE_S_S_S() );
   4639    if (1) DO50( test_FCMGT_D_D_D() );
   4640    if (1) DO50( test_FCMGT_S_S_S() );
   4641    if (1) DO50( test_FACGT_D_D_D() );
   4642    if (1) DO50( test_FACGT_S_S_S() );
   4643    if (1) DO50( test_FACGE_D_D_D() );
   4644    if (1) DO50( test_FACGE_S_S_S() );
   4645 
   4646    // fcmeq     2d,4s,2s
   4647    // fcmge     2d,4s,2s
   4648    // fcmgt     2d,4s,2s
   4649    // facge     2d,4s,2s
   4650    // facgt     2d,4s,2s
   4651    if (1) test_fcmeq_2d_2d_2d(TyDF);
   4652    if (1) test_fcmeq_4s_4s_4s(TySF);
   4653    if (1) test_fcmeq_2s_2s_2s(TySF);
   4654    if (1) test_fcmge_2d_2d_2d(TyDF);
   4655    if (1) test_fcmge_4s_4s_4s(TySF);
   4656    if (1) test_fcmge_2s_2s_2s(TySF);
   4657    if (1) test_fcmgt_2d_2d_2d(TyDF);
   4658    if (1) test_fcmgt_4s_4s_4s(TySF);
   4659    if (1) test_fcmgt_2s_2s_2s(TySF);
   4660    if (1) test_facge_2d_2d_2d(TyDF);
   4661    if (1) test_facge_4s_4s_4s(TySF);
   4662    if (1) test_facge_2s_2s_2s(TySF);
   4663    if (1) test_facgt_2d_2d_2d(TyDF);
   4664    if (1) test_facgt_4s_4s_4s(TySF);
   4665    if (1) test_facgt_2s_2s_2s(TySF);
   4666 
   4667    // fcmeq_z   d,s
   4668    // fcmge_z   d,s
   4669    // fcmgt_z   d,s
   4670    // fcmle_z   d,s
   4671    // fcmlt_z   d,s
   4672    if (1) DO50( test_FCMEQ_D_D_Z() );
   4673    if (1) DO50( test_FCMEQ_S_S_Z() );
   4674    if (1) DO50( test_FCMGE_D_D_Z() );
   4675    if (1) DO50( test_FCMGE_S_S_Z() );
   4676    if (1) DO50( test_FCMGT_D_D_Z() );
   4677    if (1) DO50( test_FCMGT_S_S_Z() );
   4678    if (1) DO50( test_FCMLE_D_D_Z() );
   4679    if (1) DO50( test_FCMLE_S_S_Z() );
   4680    if (1) DO50( test_FCMLT_D_D_Z() );
   4681    if (1) DO50( test_FCMLT_S_S_Z() );
   4682 
   4683    // fcmeq_z   2d,4s,2s
   4684    // fcmge_z   2d,4s,2s
   4685    // fcmgt_z   2d,4s,2s
   4686    // fcmle_z   2d,4s,2s
   4687    // fcmlt_z   2d,4s,2s
   4688    if (1) test_fcmeq_z_2d_2d(TyDF);
   4689    if (1) test_fcmeq_z_4s_4s(TySF);
   4690    if (1) test_fcmeq_z_2s_2s(TySF);
   4691    if (1) test_fcmge_z_2d_2d(TyDF);
   4692    if (1) test_fcmge_z_4s_4s(TySF);
   4693    if (1) test_fcmge_z_2s_2s(TySF);
   4694    if (1) test_fcmgt_z_2d_2d(TyDF);
   4695    if (1) test_fcmgt_z_4s_4s(TySF);
   4696    if (1) test_fcmgt_z_2s_2s(TySF);
   4697    if (1) test_fcmle_z_2d_2d(TyDF);
   4698    if (1) test_fcmle_z_4s_4s(TySF);
   4699    if (1) test_fcmle_z_2s_2s(TySF);
   4700    if (1) test_fcmlt_z_2d_2d(TyDF);
   4701    if (1) test_fcmlt_z_4s_4s(TySF);
   4702    if (1) test_fcmlt_z_2s_2s(TySF);
   4703 
   4704    // fcmp_z    d,s
   4705    // fcmpe_z   d,s
   4706    // fcmp      d,s (floating point quiet, set flags)
   4707    // fcmpe     d,s (floating point signaling, set flags)
   4708    if (1) DO50( test_FCMP_D_Z() );
   4709    if (1) DO50( test_FCMP_S_Z() );
   4710    if (1) DO50( test_FCMPE_D_Z() );
   4711    if (1) DO50( test_FCMPE_S_Z() );
   4712    if (1) DO50( test_FCMP_D_D() );
   4713    if (1) DO50( test_FCMP_S_S() );
   4714    if (1) DO50( test_FCMPE_D_D() );
   4715    if (1) DO50( test_FCMPE_S_S() );
   4716 
   4717    // fcsel     d,s (fp cond select)
   4718    if (1) DO50( test_FCSEL_D_D_D_EQ() );
   4719    if (1) DO50( test_FCSEL_D_D_D_NE() );
   4720    if (1) DO50( test_FCSEL_S_S_S_EQ() );
   4721    if (1) DO50( test_FCSEL_S_S_S_NE() );
   4722 
   4723    // fdiv      d,s
   4724    // fdiv      2d,4s,2s
   4725    if (1) test_fdiv_d_d_d(TyDF);
   4726    if (1) test_fdiv_s_s_s(TySF);
   4727    if (1) test_fdiv_2d_2d_2d(TyDF);
   4728    if (1) test_fdiv_4s_4s_4s(TySF);
   4729    if (1) test_fdiv_2s_2s_2s(TySF);
   4730 
   4731    // fmadd     d,s
   4732    // fnmadd    d,s
   4733    // fmsub     d,s
   4734    // fnmsub    d,s
   4735    if (1) test_fmadd_d_d_d_d(TyDF);
   4736    if (1) test_fmadd_s_s_s_s(TySF);
   4737    if (1) test_fnmadd_d_d_d_d(TyDF);
   4738    if (1) test_fnmadd_s_s_s_s(TySF);
   4739    if (1) test_fmsub_d_d_d_d(TyDF);
   4740    if (1) test_fmsub_s_s_s_s(TySF);
   4741    if (1) test_fnmsub_d_d_d_d(TyDF);
   4742    if (1) test_fnmsub_s_s_s_s(TySF);
   4743 
   4744    // fnmul     d,s
   4745    if (1) test_fnmul_d_d_d(TyDF);
   4746    if (1) test_fnmul_s_s_s(TySF);
   4747 
   4748    // fmax      d,s
   4749    // fmin      d,s
   4750    // fmaxnm    d,s ("max number")
   4751    // fminnm    d,s
   4752    if (1) test_fmax_d_d_d(TyDF);
   4753    if (1) test_fmax_s_s_s(TySF);
   4754    if (1) test_fmin_d_d_d(TyDF);
   4755    if (1) test_fmin_s_s_s(TySF);
   4756    if (1) test_fmaxnm_d_d_d(TyDF);
   4757    if (1) test_fmaxnm_s_s_s(TySF);
   4758    if (1) test_fminnm_d_d_d(TyDF);
   4759    if (1) test_fminnm_s_s_s(TySF);
   4760 
   4761    // fmax      2d,4s,2s
   4762    // fmin      2d,4s,2s
   4763    // fmaxnm    2d,4s,2s
   4764    // fminnm    2d,4s,2s
   4765    if (1) test_fmax_2d_2d_2d(TyDF);
   4766    if (1) test_fmax_4s_4s_4s(TySF);
   4767    if (1) test_fmax_2s_2s_2s(TySF);
   4768    if (1) test_fmin_2d_2d_2d(TyDF);
   4769    if (1) test_fmin_4s_4s_4s(TySF);
   4770    if (1) test_fmin_2s_2s_2s(TySF);
   4771    if (1) test_fmaxnm_2d_2d_2d(TyDF);
   4772    if (1) test_fmaxnm_4s_4s_4s(TySF);
   4773    if (1) test_fmaxnm_2s_2s_2s(TySF);
   4774    if (1) test_fminnm_2d_2d_2d(TyDF);
   4775    if (1) test_fminnm_4s_4s_4s(TySF);
   4776    if (1) test_fminnm_2s_2s_2s(TySF);
   4777 
   4778    // fmaxnmp   d_2d,s_2s ("max number pairwise")
   4779    // fminnmp   d_2d,s_2s
   4780    if (1) test_fmaxnmp_d_2d(TyDF);
   4781    if (1) test_fmaxnmp_s_2s(TySF);
   4782    if (1) test_fminnmp_d_2d(TyDF);
   4783    if (1) test_fminnmp_s_2s(TySF);
   4784 
   4785    // fmaxnmp   2d,4s,2s
   4786    // fminnmp   2d,4s,2s
   4787    if (1) test_fmaxnmp_2d_2d_2d(TyDF);
   4788    if (1) test_fmaxnmp_4s_4s_4s(TySF);
   4789    if (1) test_fmaxnmp_2s_2s_2s(TySF);
   4790    if (1) test_fminnmp_2d_2d_2d(TyDF);
   4791    if (1) test_fminnmp_4s_4s_4s(TySF);
   4792    if (1) test_fminnmp_2s_2s_2s(TySF);
   4793 
   4794    // fmaxnmv   s_4s (maxnum across vector)
   4795    // fminnmv   s_4s
   4796    if (1) test_fmaxnmv_s_4s(TySF);
   4797    if (1) test_fminnmv_s_4s(TySF);
   4798 
   4799    // fmaxp     d_2d,s_2s (max of a pair)
   4800    // fminp     d_2d,s_2s (max of a pair)
   4801    if (1) test_fmaxp_d_2d(TyDF);
   4802    if (1) test_fmaxp_s_2s(TySF);
   4803    if (1) test_fminp_d_2d(TyDF);
   4804    if (1) test_fminp_s_2s(TySF);
   4805 
   4806    // fmaxp     2d,4s,2s  (max pairwise)
   4807    // fminp     2d,4s,2s
   4808    if (1) test_fmaxp_2d_2d_2d(TyDF);
   4809    if (1) test_fmaxp_4s_4s_4s(TySF);
   4810    if (1) test_fmaxp_2s_2s_2s(TySF);
   4811    if (1) test_fminp_2d_2d_2d(TyDF);
   4812    if (1) test_fminp_4s_4s_4s(TySF);
   4813    if (1) test_fminp_2s_2s_2s(TySF);
   4814 
   4815    // fmaxv     s_4s (max across vector)
   4816    // fminv     s_4s
   4817    if (1) test_fmaxv_s_4s(TySF);
   4818    if (1) test_fminv_s_4s(TySF);
   4819 
   4820    // fmla      2d,4s,2s
   4821    // fmls      2d,4s,2s
   4822    if (1) test_fmla_2d_2d_2d(TyDF);
   4823    if (1) test_fmla_4s_4s_4s(TySF);
   4824    if (1) test_fmla_2s_2s_2s(TySF);
   4825    if (1) test_fmls_2d_2d_2d(TyDF);
   4826    if (1) test_fmls_4s_4s_4s(TySF);
   4827    if (1) test_fmls_2s_2s_2s(TySF);
   4828 
   4829    // fmla      d_d_d[],s_s_s[] (by element)
   4830    // fmls      d_d_d[],s_s_s[] (by element)
   4831    if (1) test_fmla_d_d_d0(TyDF);
   4832    if (1) test_fmla_d_d_d1(TyDF);
   4833    if (1) test_fmla_s_s_s0(TySF);
   4834    if (1) test_fmla_s_s_s3(TySF);
   4835    if (1) test_fmls_d_d_d0(TyDF);
   4836    if (1) test_fmls_d_d_d1(TyDF);
   4837    if (1) test_fmls_s_s_s0(TySF);
   4838    if (1) test_fmls_s_s_s3(TySF);
   4839 
   4840    // fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4841    // fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4842    if (1) test_fmla_2d_2d_d0(TyDF);
   4843    if (1) test_fmla_2d_2d_d1(TyDF);
   4844    if (1) test_fmla_4s_4s_s0(TySF);
   4845    if (1) test_fmla_4s_4s_s3(TySF);
   4846    if (1) test_fmla_2s_2s_s0(TySF);
   4847    if (1) test_fmla_2s_2s_s3(TySF);
   4848    if (1) test_fmls_2d_2d_d0(TyDF);
   4849    if (1) test_fmls_2d_2d_d1(TyDF);
   4850    if (1) test_fmls_4s_4s_s0(TySF);
   4851    if (1) test_fmls_4s_4s_s3(TySF);
   4852    if (1) test_fmls_2s_2s_s0(TySF);
   4853    if (1) test_fmls_2s_2s_s3(TySF);
   4854 
   4855    // fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   4856    if (1) test_fmov_2d_imm_01(TyD);
   4857    if (1) test_fmov_2d_imm_02(TyD);
   4858    if (1) test_fmov_2d_imm_03(TyD);
   4859    if (1) test_fmov_4s_imm_01(TyS);
   4860    if (1) test_fmov_4s_imm_02(TyS);
   4861    if (1) test_fmov_4s_imm_03(TyS);
   4862    if (1) test_fmov_2s_imm_01(TyS);
   4863    if (1) test_fmov_2s_imm_02(TyS);
   4864    if (1) test_fmov_2s_imm_03(TyS);
   4865 
   4866    // fmov      d_d,s_s
   4867    if (1) test_fmov_d_d(TyDF);
   4868    if (1) test_fmov_s_s(TySF);
   4869 
   4870    // fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   4871    if (1) test_fmov_s_w(TyS);
   4872    if (1) test_fmov_d_x(TyD);
   4873    if (1) test_fmov_d1_x(TyD);
   4874    if (1) test_fmov_w_s(TyS);
   4875    if (1) test_fmov_x_d(TyD);
   4876    if (1) test_fmov_x_d1(TyD);
   4877 
   4878    // fmov      d,s #imm
   4879    if (1) test_fmov_d_imm_01(TyNONE);
   4880    if (1) test_fmov_d_imm_02(TyNONE);
   4881    if (1) test_fmov_d_imm_03(TyNONE);
   4882    if (1) test_fmov_s_imm_01(TyNONE);
   4883    if (1) test_fmov_s_imm_02(TyNONE);
   4884    if (1) test_fmov_s_imm_03(TyNONE);
   4885 
   4886    // fmul      d_d_d[],s_s_s[]
   4887    if (1) test_fmul_d_d_d0(TyDF);
   4888    if (1) test_fmul_d_d_d1(TyDF);
   4889    if (1) test_fmul_s_s_s0(TySF);
   4890    if (1) test_fmul_s_s_s3(TySF);
   4891 
   4892    // fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4893    if (1) test_fmul_2d_2d_d0(TyDF);
   4894    if (1) test_fmul_2d_2d_d1(TyDF);
   4895    if (1) test_fmul_4s_4s_s0(TySF);
   4896    if (1) test_fmul_4s_4s_s3(TySF);
   4897    if (1) test_fmul_2s_2s_s0(TySF);
   4898    if (1) test_fmul_2s_2s_s3(TySF);
   4899 
   4900    // fmul      d,s
   4901    // fmul      2d,4s,2s
   4902    if (1) test_fmul_d_d_d(TyDF);
   4903    if (1) test_fmul_s_s_s(TySF);
   4904    if (1) test_fmul_2d_2d_2d(TyDF);
   4905    if (1) test_fmul_4s_4s_4s(TySF);
   4906    if (1) test_fmul_2s_2s_2s(TySF);
   4907 
   4908    // fmulx     d_d_d[],s_s_s[]
   4909    // fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   4910    if (1) test_fmulx_d_d_d0(TyDF);
   4911    if (1) test_fmulx_d_d_d1(TyDF);
   4912    if (1) test_fmulx_s_s_s0(TySF);
   4913    if (1) test_fmulx_s_s_s3(TySF);
   4914    if (1) test_fmulx_2d_2d_d0(TyDF);
   4915    if (1) test_fmulx_2d_2d_d1(TyDF);
   4916    if (1) test_fmulx_4s_4s_s0(TySF);
   4917    if (1) test_fmulx_4s_4s_s3(TySF);
   4918    if (1) test_fmulx_2s_2s_s0(TySF);
   4919    if (1) test_fmulx_2s_2s_s3(TySF);
   4920 
   4921    // fmulx     d,s
   4922    // fmulx     2d,4s,2s
   4923    if (1) test_fmulx_d_d_d(TyDF);
   4924    if (1) test_fmulx_s_s_s(TySF);
   4925    if (1) test_fmulx_2d_2d_2d(TyDF);
   4926    if (1) test_fmulx_4s_4s_4s(TySF);
   4927    if (1) test_fmulx_2s_2s_2s(TySF);
   4928 
   4929    // frecpe    d,s (recip estimate)
   4930    // frecpe    2d,4s,2s
   4931    if (1) test_frecpe_d_d(TyDF);
   4932    if (1) test_frecpe_s_s(TySF);
   4933    if (1) test_frecpe_2d_2d(TyDF);
   4934    if (1) test_frecpe_4s_4s(TySF);
   4935    if (1) test_frecpe_2s_2s(TySF);
   4936 
   4937    // frecps    d,s (recip step)
   4938    // frecps    2d,4s,2s
   4939    if (1) test_frecps_d_d_d(TyDF);
   4940    if (1) test_frecps_s_s_s(TySF);
   4941    if (1) test_frecps_2d_2d_2d(TyDF);
   4942    if (1) test_frecps_4s_4s_4s(TySF);
   4943    if (1) test_frecps_2s_2s_2s(TySF);
   4944 
   4945    // frecpx    d,s (recip exponent)
   4946    if (1) test_frecpx_d_d(TyDF);
   4947    if (1) test_frecpx_s_s(TySF);
   4948 
   4949    // frinta    d,s
   4950    // frinti    d,s
   4951    // frintm    d,s
   4952    // frintn    d,s
   4953    // frintp    d,s
   4954    // frintx    d,s
   4955    // frintz    d,s
   4956    if (1) test_frinta_d_d(TyDF);
   4957    if (1) test_frinta_s_s(TySF);
   4958    if (1) test_frinti_d_d(TyDF);
   4959    if (1) test_frinti_s_s(TySF);
   4960    if (1) test_frintm_d_d(TyDF);
   4961    if (1) test_frintm_s_s(TySF);
   4962    if (1) test_frintn_d_d(TyDF);
   4963    if (1) test_frintn_s_s(TySF);
   4964    if (1) test_frintp_d_d(TyDF);
   4965    if (1) test_frintp_s_s(TySF);
   4966    if (1) test_frintx_d_d(TyDF);
   4967    if (1) test_frintx_s_s(TySF);
   4968    if (1) test_frintz_d_d(TyDF);
   4969    if (1) test_frintz_s_s(TySF);
   4970 
   4971    // frinta    2d,4s,2s (round to integral, nearest away)
   4972    // frinti    2d,4s,2s (round to integral, per FPCR)
   4973    // frintm    2d,4s,2s (round to integral, minus inf)
   4974    // frintn    2d,4s,2s (round to integral, nearest, to even)
   4975    // frintp    2d,4s,2s (round to integral, plus inf)
   4976    // frintx    2d,4s,2s (round to integral exact, per FPCR)
   4977    // frintz    2d,4s,2s (round to integral, zero)
   4978    if (1) test_frinta_2d_2d(TyDF);
   4979    if (1) test_frinta_4s_4s(TySF);
   4980    if (1) test_frinta_2s_2s(TySF);
   4981    if (1) test_frinti_2d_2d(TyDF);
   4982    if (1) test_frinti_4s_4s(TySF);
   4983    if (1) test_frinti_2s_2s(TySF);
   4984    if (1) test_frintm_2d_2d(TyDF);
   4985    if (1) test_frintm_4s_4s(TySF);
   4986    if (1) test_frintm_2s_2s(TySF);
   4987    if (1) test_frintn_2d_2d(TyDF);
   4988    if (1) test_frintn_4s_4s(TySF);
   4989    if (1) test_frintn_2s_2s(TySF);
   4990    if (1) test_frintp_2d_2d(TyDF);
   4991    if (1) test_frintp_4s_4s(TySF);
   4992    if (1) test_frintp_2s_2s(TySF);
   4993    if (1) test_frintx_2d_2d(TyDF);
   4994    if (1) test_frintx_4s_4s(TySF);
   4995    if (1) test_frintx_2s_2s(TySF);
   4996    if (1) test_frintz_2d_2d(TyDF);
   4997    if (1) test_frintz_4s_4s(TySF);
   4998    if (1) test_frintz_2s_2s(TySF);
   4999 
   5000    // frsqrte   d,s (est)
   5001    // frsqrte   2d,4s,2s
   5002    if (1) test_frsqrte_d_d(TyDF);
   5003    if (1) test_frsqrte_s_s(TySF);
   5004    if (1) test_frsqrte_2d_2d(TyDF);
   5005    if (1) test_frsqrte_4s_4s(TySF);
   5006    if (1) test_frsqrte_2s_2s(TySF);
   5007 
   5008    // frsqrts   d,s (step)
   5009    // frsqrts   2d,4s,2s
   5010    if (1) test_frsqrts_d_d_d(TyDF);
   5011    if (1) test_frsqrts_s_s_s(TySF);
   5012    if (1) test_frsqrts_2d_2d_2d(TyDF);
   5013    if (1) test_frsqrts_4s_4s_4s(TySF);
   5014    if (1) test_frsqrts_2s_2s_2s(TySF);
   5015 
   5016    // ======================== CONV ========================
   5017 
   5018    // fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   5019    if (1) test_fcvt_s_h(TyHF);
   5020    if (1) test_fcvt_d_h(TyHF);
   5021    if (1) test_fcvt_h_s(TySF);
   5022    if (1) test_fcvt_d_s(TySF);
   5023    if (1) test_fcvt_h_d(TyDF);
   5024    if (1) test_fcvt_s_d(TyDF);
   5025 
   5026    // fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   5027    if (1) test_fcvtl_4s_4h(TyHF);
   5028    if (1) test_fcvtl_4s_8h(TyHF);
   5029    if (1) test_fcvtl_2d_2s(TySF);
   5030    if (1) test_fcvtl_2d_4s(TySF);
   5031 
   5032    // fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   5033    if (1) test_fcvtn_4h_4s(TySF);
   5034    if (1) test_fcvtn_8h_4s(TySF);
   5035    if (1) test_fcvtn_2s_2d(TyDF);
   5036    if (1) test_fcvtn_4s_2d(TyDF);
   5037 
   5038    // fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   5039    // fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   5040    // fcvtas    2d,4s,2s
   5041    // fcvtau    2d,4s,2s
   5042    // fcvtas    w_s,x_s,w_d,x_d
   5043    // fcvtau    w_s,x_s,w_d,x_d
   5044    if (1) test_fcvtas_d_d(TyDF);
   5045    if (1) test_fcvtau_d_d(TyDF);
   5046    if (1) test_fcvtas_s_s(TySF);
   5047    if (1) test_fcvtau_s_s(TySF);
   5048    if (1) test_fcvtas_2d_2d(TyDF);
   5049    if (1) test_fcvtau_2d_2d(TyDF);
   5050    if (1) test_fcvtas_4s_4s(TySF);
   5051    if (1) test_fcvtau_4s_4s(TySF);
   5052    if (1) test_fcvtas_2s_2s(TySF);
   5053    if (1) test_fcvtau_2s_2s(TySF);
   5054    if (1) test_fcvtas_w_s(TySF);
   5055    if (1) test_fcvtau_w_s(TySF);
   5056    if (1) test_fcvtas_x_s(TySF);
   5057    if (1) test_fcvtau_x_s(TySF);
   5058    if (1) test_fcvtas_w_d(TyDF);
   5059    if (1) test_fcvtau_w_d(TyDF);
   5060    if (1) test_fcvtas_x_d(TyDF);
   5061    if (1) test_fcvtau_x_d(TyDF);
   5062 
   5063    // fcvtms    d,s  (fcvt to signed int,   minus inf)
   5064    // fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   5065    // fcvtms    2d,4s,2s
   5066    // fcvtmu    2d,4s,2s
   5067    // fcvtms    w_s,x_s,w_d,x_d
   5068    // fcvtmu    w_s,x_s,w_d,x_d
   5069    if (1) test_fcvtms_d_d(TyDF);
   5070    if (1) test_fcvtmu_d_d(TyDF);
   5071    if (1) test_fcvtms_s_s(TySF);
   5072    if (1) test_fcvtmu_s_s(TySF);
   5073    if (1) test_fcvtms_2d_2d(TyDF);
   5074    if (1) test_fcvtmu_2d_2d(TyDF);
   5075    if (1) test_fcvtms_4s_4s(TySF);
   5076    if (1) test_fcvtmu_4s_4s(TySF);
   5077    if (1) test_fcvtms_2s_2s(TySF);
   5078    if (1) test_fcvtmu_2s_2s(TySF);
   5079    if (1) test_fcvtms_w_s(TySF);
   5080    if (1) test_fcvtmu_w_s(TySF);
   5081    if (1) test_fcvtms_x_s(TySF);
   5082    if (1) test_fcvtmu_x_s(TySF);
   5083    if (1) test_fcvtms_w_d(TyDF);
   5084    if (1) test_fcvtmu_w_d(TyDF);
   5085    if (1) test_fcvtms_x_d(TyDF);
   5086    if (1) test_fcvtmu_x_d(TyDF);
   5087 
   5088    // fcvtns    d,s  (fcvt to signed int,   nearest)
   5089    // fcvtnu    d,s  (fcvt to unsigned int, nearest)
   5090    // fcvtns    2d,4s,2s
   5091    // fcvtnu    2d,4s,2s
   5092    // fcvtns    w_s,x_s,w_d,x_d
   5093    // fcvtnu    w_s,x_s,w_d,x_d
   5094    if (1) test_fcvtns_d_d(TyDF);
   5095    if (1) test_fcvtnu_d_d(TyDF);
   5096    if (1) test_fcvtns_s_s(TySF);
   5097    if (1) test_fcvtnu_s_s(TySF);
   5098    if (1) test_fcvtns_2d_2d(TyDF);
   5099    if (1) test_fcvtnu_2d_2d(TyDF);
   5100    if (1) test_fcvtns_4s_4s(TySF);
   5101    if (1) test_fcvtnu_4s_4s(TySF);
   5102    if (1) test_fcvtns_2s_2s(TySF);
   5103    if (1) test_fcvtnu_2s_2s(TySF);
   5104    if (1) test_fcvtns_w_s(TySF);
   5105    if (1) test_fcvtnu_w_s(TySF);
   5106    if (1) test_fcvtns_x_s(TySF);
   5107    if (1) test_fcvtnu_x_s(TySF);
   5108    if (1) test_fcvtns_w_d(TyDF);
   5109    if (1) test_fcvtnu_w_d(TyDF);
   5110    if (1) test_fcvtns_x_d(TyDF);
   5111    if (1) test_fcvtnu_x_d(TyDF);
   5112 
   5113    // fcvtps    d,s  (fcvt to signed int,   plus inf)
   5114    // fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   5115    // fcvtps    2d,4s,2s
   5116    // fcvtpu    2d,4s,2s
   5117    // fcvtps    w_s,x_s,w_d,x_d
   5118    // fcvtpu    w_s,x_s,w_d,x_d
   5119    if (1) test_fcvtps_d_d(TyDF);
   5120    if (1) test_fcvtpu_d_d(TyDF);
   5121    if (1) test_fcvtps_s_s(TySF);
   5122    if (1) test_fcvtpu_s_s(TySF);
   5123    if (1) test_fcvtps_2d_2d(TyDF);
   5124    if (1) test_fcvtpu_2d_2d(TyDF);
   5125    if (1) test_fcvtps_4s_4s(TySF);
   5126    if (1) test_fcvtpu_4s_4s(TySF);
   5127    if (1) test_fcvtps_2s_2s(TySF);
   5128    if (1) test_fcvtpu_2s_2s(TySF);
   5129    if (1) test_fcvtps_w_s(TySF);
   5130    if (1) test_fcvtpu_w_s(TySF);
   5131    if (1) test_fcvtps_x_s(TySF);
   5132    if (1) test_fcvtpu_x_s(TySF);
   5133    if (1) test_fcvtps_w_d(TyDF);
   5134    if (1) test_fcvtpu_w_d(TyDF);
   5135    if (1) test_fcvtps_x_d(TyDF);
   5136    if (1) test_fcvtpu_x_d(TyDF);
   5137 
   5138    // fcvtzs    d,s (fcvt to signed integer,   to zero)
   5139    // fcvtzu    d,s (fcvt to unsigned integer, to zero)
   5140    // fcvtzs    2d,4s,2s
   5141    // fcvtzu    2d,4s,2s
   5142    // fcvtzs    w_s,x_s,w_d,x_d
   5143    // fcvtzu    w_s,x_s,w_d,x_d
   5144    if (1) test_fcvtzs_d_d(TyDF);
   5145    if (1) test_fcvtzu_d_d(TyDF);
   5146    if (1) test_fcvtzs_s_s(TySF);
   5147    if (1) test_fcvtzu_s_s(TySF);
   5148    if (1) test_fcvtzs_2d_2d(TyDF);
   5149    if (1) test_fcvtzu_2d_2d(TyDF);
   5150    if (1) test_fcvtzs_4s_4s(TySF);
   5151    if (1) test_fcvtzu_4s_4s(TySF);
   5152    if (1) test_fcvtzs_2s_2s(TySF);
   5153    if (1) test_fcvtzu_2s_2s(TySF);
   5154    if (1) test_fcvtzs_w_s(TySF);
   5155    if (1) test_fcvtzu_w_s(TySF);
   5156    if (1) test_fcvtzs_x_s(TySF);
   5157    if (1) test_fcvtzu_x_s(TySF);
   5158    if (1) test_fcvtzs_w_d(TyDF);
   5159    if (1) test_fcvtzu_w_d(TyDF);
   5160    if (1) test_fcvtzs_x_d(TyDF);
   5161    if (1) test_fcvtzu_x_d(TyDF);
   5162 
   5163    // fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5164    // fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5165    // fcvtzs    2d,4s,2s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5166    // fcvtzu    2d,4s,2s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5167    // fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   5168    // fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   5169    if (1) test_fcvtzs_d_d_fbits1(TyDF);
   5170    if (1) test_fcvtzs_d_d_fbits32(TyDF);
   5171    if (1) test_fcvtzs_d_d_fbits64(TyDF);
   5172    if (1) test_fcvtzu_d_d_fbits1(TyDF);
   5173    if (1) test_fcvtzu_d_d_fbits32(TyDF);
   5174    if (1) test_fcvtzu_d_d_fbits64(TyDF);
   5175    if (1) test_fcvtzs_s_s_fbits1(TySF);
   5176    if (1) test_fcvtzs_s_s_fbits16(TySF);
   5177    if (1) test_fcvtzs_s_s_fbits32(TySF);
   5178    if (1) test_fcvtzu_s_s_fbits1(TySF);
   5179    if (1) test_fcvtzu_s_s_fbits16(TySF);
   5180    if (1) test_fcvtzu_s_s_fbits32(TySF);
   5181    if (1) test_fcvtzs_2d_2d_fbits1(TyDF);
   5182    if (1) test_fcvtzs_2d_2d_fbits32(TyDF);
   5183    if (1) test_fcvtzs_2d_2d_fbits64(TyDF);
   5184    if (1) test_fcvtzu_2d_2d_fbits1(TyDF);
   5185    if (1) test_fcvtzu_2d_2d_fbits32(TyDF);
   5186    if (1) test_fcvtzu_2d_2d_fbits64(TyDF);
   5187    if (1) test_fcvtzs_4s_4s_fbits1(TySF);
   5188    if (1) test_fcvtzs_4s_4s_fbits16(TySF);
   5189    if (1) test_fcvtzs_4s_4s_fbits32(TySF);
   5190    if (1) test_fcvtzu_4s_4s_fbits1(TySF);
   5191    if (1) test_fcvtzu_4s_4s_fbits16(TySF);
   5192    if (1) test_fcvtzu_4s_4s_fbits32(TySF);
   5193    if (1) test_fcvtzs_2s_2s_fbits1(TySF);
   5194    if (1) test_fcvtzs_2s_2s_fbits16(TySF);
   5195    if (1) test_fcvtzs_2s_2s_fbits32(TySF);
   5196    if (1) test_fcvtzu_2s_2s_fbits1(TySF);
   5197    if (1) test_fcvtzu_2s_2s_fbits16(TySF);
   5198    if (1) test_fcvtzu_2s_2s_fbits32(TySF);
   5199    if (1) test_fcvtzs_w_s_fbits1(TySF);
   5200    if (1) test_fcvtzs_w_s_fbits16(TySF);
   5201    if (1) test_fcvtzs_w_s_fbits32(TySF);
   5202    if (1) test_fcvtzu_w_s_fbits1(TySF);
   5203    if (1) test_fcvtzu_w_s_fbits16(TySF);
   5204    if (1) test_fcvtzu_w_s_fbits32(TySF);
   5205    if (1) test_fcvtzs_x_s_fbits1(TySF);
   5206    if (1) test_fcvtzs_x_s_fbits32(TySF);
   5207    if (1) test_fcvtzs_x_s_fbits64(TySF);
   5208    if (1) test_fcvtzu_x_s_fbits1(TySF);
   5209    if (1) test_fcvtzu_x_s_fbits32(TySF);
   5210    if (1) test_fcvtzu_x_s_fbits64(TySF);
   5211    if (1) test_fcvtzs_w_d_fbits1(TyDF);
   5212    if (1) test_fcvtzs_w_d_fbits16(TyDF);
   5213    if (1) test_fcvtzs_w_d_fbits32(TyDF);
   5214    if (1) test_fcvtzu_w_d_fbits1(TyDF);
   5215    if (1) test_fcvtzu_w_d_fbits16(TyDF);
   5216    if (1) test_fcvtzu_w_d_fbits32(TyDF);
   5217    if (1) test_fcvtzs_x_d_fbits1(TyDF);
   5218    if (1) test_fcvtzs_x_d_fbits32(TyDF);
   5219    if (1) test_fcvtzs_x_d_fbits64(TyDF);
   5220    if (1) test_fcvtzu_x_d_fbits1(TyDF);
   5221    if (1) test_fcvtzu_x_d_fbits32(TyDF);
   5222    if (1) test_fcvtzu_x_d_fbits64(TyDF);
   5223 
   5224    // fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   5225    // fcvtxn    2s_2d,4s_2d
   5226    if (1) test_fcvtxn_s_d(TyDF);
   5227    if (1) test_fcvtxn_2s_2d(TyDF);
   5228    if (1) test_fcvtxn_4s_2d(TyDF);
   5229 
   5230    // scvtf     d,s        _#fbits
   5231    // ucvtf     d,s        _#fbits
   5232    // scvtf     2d,4s,2s   _#fbits
   5233    // ucvtf     2d,4s,2s   _#fbits
   5234    if (1) test_scvtf_d_d_fbits1(TyD);
   5235    if (1) test_scvtf_d_d_fbits32(TyD);
   5236    if (1) test_scvtf_d_d_fbits64(TyD);
   5237    if (1) test_ucvtf_d_d_fbits1(TyD);
   5238    if (1) test_ucvtf_d_d_fbits32(TyD);
   5239    if (1) test_ucvtf_d_d_fbits64(TyD);
   5240    if (1) test_scvtf_s_s_fbits1(TyS);
   5241    if (1) test_scvtf_s_s_fbits16(TyS);
   5242    if (1) test_scvtf_s_s_fbits32(TyS);
   5243    if (1) test_ucvtf_s_s_fbits1(TyS);
   5244    if (1) test_ucvtf_s_s_fbits16(TyS);
   5245    if (1) test_ucvtf_s_s_fbits32(TyS);
   5246    if (1) test_scvtf_2d_2d_fbits1(TyD);
   5247    if (1) test_scvtf_2d_2d_fbits32(TyD);
   5248    if (1) test_scvtf_2d_2d_fbits64(TyD);
   5249    if (1) test_ucvtf_2d_2d_fbits1(TyD);
   5250    if (1) test_ucvtf_2d_2d_fbits32(TyD);
   5251    if (1) test_ucvtf_2d_2d_fbits64(TyD);
   5252    if (1) test_scvtf_4s_4s_fbits1(TyS);
   5253    if (1) test_scvtf_4s_4s_fbits16(TyS);
   5254    if (1) test_scvtf_4s_4s_fbits32(TyS);
   5255    if (1) test_ucvtf_4s_4s_fbits1(TyS);
   5256    if (1) test_ucvtf_4s_4s_fbits16(TyS);
   5257    if (1) test_ucvtf_4s_4s_fbits32(TyS);
   5258    if (1) test_scvtf_2s_2s_fbits1(TyS);
   5259    if (1) test_scvtf_2s_2s_fbits16(TyS);
   5260    if (1) test_scvtf_2s_2s_fbits32(TyS);
   5261    if (1) test_ucvtf_2s_2s_fbits1(TyS);
   5262    if (1) test_ucvtf_2s_2s_fbits16(TyS);
   5263    if (1) test_ucvtf_2s_2s_fbits32(TyS);
   5264 
   5265    // scvtf     d,s
   5266    // ucvtf     d,s
   5267    // scvtf     2d,4s,2s
   5268    // ucvtf     2d,4s,2s
   5269    if (1) test_scvtf_d_d(TyD);
   5270    if (1) test_ucvtf_d_d(TyD);
   5271    if (1) test_scvtf_s_s(TyS);
   5272    if (1) test_ucvtf_s_s(TyS);
   5273    if (1) test_scvtf_2d_2d(TyD);
   5274    if (1) test_ucvtf_2d_2d(TyD);
   5275    if (1) test_scvtf_4s_4s(TyS);
   5276    if (1) test_ucvtf_4s_4s(TyS);
   5277    if (1) test_scvtf_2s_2s(TyS);
   5278    if (1) test_ucvtf_2s_2s(TyS);
   5279 
   5280    // scvtf     s_w, d_w, s_x, d_x,   _#fbits
   5281    // ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   5282    if (1) test_scvtf_s_w_fbits1(TyS);
   5283    if (1) test_scvtf_s_w_fbits16(TyS);
   5284    if (1) test_scvtf_s_w_fbits32(TyS);
   5285    if (1) test_scvtf_d_w_fbits1(TyS);
   5286    if (1) test_scvtf_d_w_fbits16(TyS);
   5287    if (1) test_scvtf_d_w_fbits32(TyS);
   5288    if (1) test_scvtf_s_x_fbits1(TyD);
   5289    if (1) test_scvtf_s_x_fbits32(TyD);
   5290    if (1) test_scvtf_s_x_fbits64(TyD);
   5291    if (1) test_scvtf_d_x_fbits1(TyD);
   5292    if (1) test_scvtf_d_x_fbits32(TyD);
   5293    if (1) test_scvtf_d_x_fbits64(TyD);
   5294    if (1) test_ucvtf_s_w_fbits1(TyS);
   5295    if (1) test_ucvtf_s_w_fbits16(TyS);
   5296    if (1) test_ucvtf_s_w_fbits32(TyS);
   5297    if (1) test_ucvtf_d_w_fbits1(TyS);
   5298    if (1) test_ucvtf_d_w_fbits16(TyS);
   5299    if (1) test_ucvtf_d_w_fbits32(TyS);
   5300    if (1) test_ucvtf_s_x_fbits1(TyD);
   5301    if (1) test_ucvtf_s_x_fbits32(TyD);
   5302    if (1) test_ucvtf_s_x_fbits64(TyD);
   5303    if (1) test_ucvtf_d_x_fbits1(TyD);
   5304    if (1) test_ucvtf_d_x_fbits32(TyD);
   5305    if (1) test_ucvtf_d_x_fbits64(TyD);
   5306 
   5307    // scvtf     s_w, d_w, s_x, d_x
   5308    // ucvtf     s_w, d_w, s_x, d_x
   5309    if (1) test_scvtf_s_w(TyS);
   5310    if (1) test_scvtf_d_w(TyS);
   5311    if (1) test_scvtf_s_x(TyD);
   5312    if (1) test_scvtf_d_x(TyD);
   5313    if (1) test_ucvtf_s_w(TyS);
   5314    if (1) test_ucvtf_d_w(TyS);
   5315    if (1) test_ucvtf_s_x(TyD);
   5316    if (1) test_ucvtf_d_x(TyD);
   5317 
   5318    // ======================== INT ========================
   5319 
   5320    // abs       d
   5321    // neg       d
   5322    if (1) test_abs_d_d(TyD);
   5323    if (1) test_neg_d_d(TyD);
   5324 
   5325    // abs       2d,4s,2s,8h,4h,16b,8b
   5326    // neg       2d,4s,2s,8h,4h,16b,8b
   5327    if (1) test_abs_2d_2d(TyD);
   5328    if (1) test_abs_4s_4s(TyS);
   5329    if (1) test_abs_2s_2s(TyS);
   5330    if (1) test_abs_8h_8h(TyH);
   5331    if (1) test_abs_4h_4h(TyH);
   5332    if (1) test_abs_16b_16b(TyB);
   5333    if (1) test_abs_8b_8b(TyB);
   5334    if (1) test_neg_2d_2d(TyD);
   5335    if (1) test_neg_4s_4s(TyS);
   5336    if (1) test_neg_2s_2s(TyS);
   5337    if (1) test_neg_8h_8h(TyH);
   5338    if (1) test_neg_4h_4h(TyH);
   5339    if (1) test_neg_16b_16b(TyB);
   5340    if (1) test_neg_8b_8b(TyB);
   5341 
   5342    // add       d
   5343    // sub       d
   5344    if (1) test_add_d_d_d(TyD);
   5345    if (1) test_sub_d_d_d(TyD);
   5346 
   5347    // add       2d,4s,2s,8h,4h,16b,8b
   5348    // sub       2d,4s,2s,8h,4h,16b,8b
   5349    if (1) test_add_2d_2d_2d(TyD);
   5350    if (1) test_add_4s_4s_4s(TyS);
   5351    if (1) test_add_2s_2s_2s(TyS);
   5352    if (1) test_add_8h_8h_8h(TyH);
   5353    if (1) test_add_4h_4h_4h(TyH);
   5354    if (1) test_add_16b_16b_16b(TyB);
   5355    if (1) test_add_8b_8b_8b(TyB);
   5356    if (1) test_sub_2d_2d_2d(TyD);
   5357    if (1) test_sub_4s_4s_4s(TyS);
   5358    if (1) test_sub_2s_2s_2s(TyS);
   5359    if (1) test_sub_8h_8h_8h(TyH);
   5360    if (1) test_sub_4h_4h_4h(TyH);
   5361    if (1) test_sub_16b_16b_16b(TyB);
   5362    if (1) test_sub_8b_8b_8b(TyB);
   5363 
   5364    // addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5365    // subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5366    // raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5367    // rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   5368    if (1) test_addhn_2s_2d_2d(TyD);
   5369    if (1) test_addhn2_4s_2d_2d(TyD);
   5370    if (1) test_addhn_4h_4s_4s(TyS);
   5371    if (1) test_addhn2_8h_4s_4s(TyS);
   5372    if (1) test_addhn_8b_8h_8h(TyH);
   5373    if (1) test_addhn2_16b_8h_8h(TyH);
   5374    if (1) test_subhn_2s_2d_2d(TyD);
   5375    if (1) test_subhn2_4s_2d_2d(TyD);
   5376    if (1) test_subhn_4h_4s_4s(TyS);
   5377    if (1) test_subhn2_8h_4s_4s(TyS);
   5378    if (1) test_subhn_8b_8h_8h(TyH);
   5379    if (1) test_subhn2_16b_8h_8h(TyH);
   5380    if (1) test_raddhn_2s_2d_2d(TyD);
   5381    if (1) test_raddhn2_4s_2d_2d(TyD);
   5382    if (1) test_raddhn_4h_4s_4s(TyS);
   5383    if (1) test_raddhn2_8h_4s_4s(TyS);
   5384    if (1) test_raddhn_8b_8h_8h(TyH);
   5385    if (1) test_raddhn2_16b_8h_8h(TyH);
   5386    if (1) test_rsubhn_2s_2d_2d(TyD);
   5387    if (1) test_rsubhn2_4s_2d_2d(TyD);
   5388    if (1) test_rsubhn_4h_4s_4s(TyS);
   5389    if (1) test_rsubhn2_8h_4s_4s(TyS);
   5390    if (1) test_rsubhn_8b_8h_8h(TyH);
   5391    if (1) test_rsubhn2_16b_8h_8h(TyH);
   5392 
   5393    // addp     d (add pairs, across)
   5394    if (1) test_addp_d_2d(TyD);
   5395 
   5396    // addp     2d,4s,2s,8h,4h,16b,8b
   5397    if (1) test_addp_2d_2d_2d(TyD);
   5398    if (1) test_addp_4s_4s_4s(TyS);
   5399    if (1) test_addp_2s_2s_2s(TyS);
   5400    if (1) test_addp_8h_8h_8h(TyH);
   5401    if (1) test_addp_4h_4h_4h(TyH);
   5402    if (1) test_addp_16b_16b_16b(TyB);
   5403    if (1) test_addp_8b_8b_8b(TyB);
   5404 
   5405    // addv     4s,8h,4h,16b,18b (reduce across vector)
   5406    if (1) test_addv_s_4s(TyS);
   5407    if (1) test_addv_h_8h(TyH);
   5408    if (1) test_addv_h_4h(TyH);
   5409    if (1) test_addv_b_16b(TyB);
   5410    if (1) test_addv_b_8b(TyB);
   5411 
   5412    // and      16b,8b
   5413    // bic      16b,8b
   5414    // orn      16b,8b
   5415    // orr      16b,8b
   5416    if (1) test_and_16b_16b_16b(TyB);
   5417    if (1) test_and_8b_8b_8b(TyB);
   5418    if (1) test_bic_16b_16b_16b(TyB);
   5419    if (1) test_bic_8b_8b_8b(TyB);
   5420    if (1) test_orr_16b_16b_16b(TyB);
   5421    if (1) test_orr_8b_8b_8b(TyB);
   5422    if (1) test_orn_16b_16b_16b(TyB);
   5423    if (1) test_orn_8b_8b_8b(TyB);
   5424 
   5425    // orr      8h,4h   #imm8, LSL #0 or 8
   5426    // orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5427    // bic      8h,4h   #imm8, LSL #0 or 8
   5428    // bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   5429    // movi and mvni are very similar, a superset of these.
   5430    // Cases are below.
   5431    if (1) test_orr_8h_0x5A_lsl0(TyH);
   5432    if (1) test_orr_8h_0xA5_lsl8(TyH);
   5433    if (1) test_orr_4h_0x5A_lsl0(TyH);
   5434    if (1) test_orr_4h_0xA5_lsl8(TyH);
   5435    if (1) test_orr_4s_0x5A_lsl0(TyS);
   5436    if (1) test_orr_4s_0x6B_lsl8(TyS);
   5437    if (1) test_orr_4s_0x49_lsl16(TyS);
   5438    if (1) test_orr_4s_0x3D_lsl24(TyS);
   5439    if (1) test_orr_2s_0x5A_lsl0(TyS);
   5440    if (1) test_orr_2s_0x6B_lsl8(TyS);
   5441    if (1) test_orr_2s_0x49_lsl16(TyS);
   5442    if (1) test_orr_2s_0x3D_lsl24(TyS);
   5443    if (1) test_bic_8h_0x5A_lsl0(TyH);
   5444    if (1) test_bic_8h_0xA5_lsl8(TyH);
   5445    if (1) test_bic_4h_0x5A_lsl0(TyH);
   5446    if (1) test_bic_4h_0xA5_lsl8(TyH);
   5447    if (1) test_bic_4s_0x5A_lsl0(TyS);
   5448    if (1) test_bic_4s_0x6B_lsl8(TyS);
   5449    if (1) test_bic_4s_0x49_lsl16(TyS);
   5450    if (1) test_bic_4s_0x3D_lsl24(TyS);
   5451    if (1) test_bic_2s_0x5A_lsl0(TyS);
   5452    if (1) test_bic_2s_0x6B_lsl8(TyS);
   5453    if (1) test_bic_2s_0x49_lsl16(TyS);
   5454    if (1) test_bic_2s_0x3D_lsl24(TyS);
   5455 
   5456    // bif      16b,8b (vector) (bit insert if false)
   5457    // bit      16b,8b (vector) (bit insert if true)
   5458    // bsl      16b,8b (vector) (bit select)
   5459    // eor      16b,8b (vector)
   5460    if (1) test_bif_16b_16b_16b(TyB);
   5461    if (1) test_bif_8b_8b_8b(TyB);
   5462    if (1) test_bit_16b_16b_16b(TyB);
   5463    if (1) test_bit_8b_8b_8b(TyB);
   5464    if (1) test_bsl_16b_16b_16b(TyB);
   5465    if (1) test_bsl_8b_8b_8b(TyB);
   5466    if (1) test_eor_16b_16b_16b(TyB);
   5467    if (1) test_eor_8b_8b_8b(TyB);
   5468 
   5469    // cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   5470    // clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   5471    if (1) test_cls_4s_4s(TyS);
   5472    if (1) test_cls_2s_2s(TyS);
   5473    if (1) test_cls_8h_8h(TyH);
   5474    if (1) test_cls_4h_4h(TyH);
   5475    if (1) test_cls_16b_16b(TyB);
   5476    if (1) test_cls_8b_8b(TyB);
   5477    if (1) test_clz_4s_4s(TyS);
   5478    if (1) test_clz_2s_2s(TyS);
   5479    if (1) test_clz_8h_8h(TyH);
   5480    if (1) test_clz_4h_4h(TyH);
   5481    if (1) test_clz_16b_16b(TyB);
   5482    if (1) test_clz_8b_8b(TyB);
   5483 
   5484    // cmeq     d
   5485    // cmge     d
   5486    // cmgt     d
   5487    // cmhi     d
   5488    // cmhs     d
   5489    // cmtst    d
   5490    if (1) test_cmeq_d_d_d(TyD);
   5491    if (1) test_cmge_d_d_d(TyD);
   5492    if (1) test_cmgt_d_d_d(TyD);
   5493    if (1) test_cmhi_d_d_d(TyD);
   5494    if (1) test_cmhs_d_d_d(TyD);
   5495    if (1) test_cmtst_d_d_d(TyD);
   5496 
   5497    // cmeq     2d,4s,2s,8h,4h,16b,8b
   5498    // cmge     2d,4s,2s,8h,4h,16b,8b
   5499    // cmgt     2d,4s,2s,8h,4h,16b,8b
   5500    // cmhi     2d,4s,2s,8h,4h,16b,8b
   5501    // cmhs     2d,4s,2s,8h,4h,16b,8b
   5502    // cmtst    2d,4s,2s,8h,4h,16b,8b
   5503    if (1) test_cmeq_2d_2d_2d(TyD);
   5504    if (1) test_cmeq_4s_4s_4s(TyS);
   5505    if (1) test_cmeq_2s_2s_2s(TyS);
   5506    if (1) test_cmeq_8h_8h_8h(TyH);
   5507    if (1) test_cmeq_4h_4h_4h(TyH);
   5508    if (1) test_cmeq_16b_16b_16b(TyB);
   5509    if (1) test_cmeq_8b_8b_8b(TyB);
   5510    if (1) test_cmge_2d_2d_2d(TyD);
   5511    if (1) test_cmge_4s_4s_4s(TyS);
   5512    if (1) test_cmge_2s_2s_2s(TyS);
   5513    if (1) test_cmge_8h_8h_8h(TyH);
   5514    if (1) test_cmge_4h_4h_4h(TyH);
   5515    if (1) test_cmge_16b_16b_16b(TyB);
   5516    if (1) test_cmge_8b_8b_8b(TyB);
   5517    if (1) test_cmgt_2d_2d_2d(TyD);
   5518    if (1) test_cmgt_4s_4s_4s(TyS);
   5519    if (1) test_cmgt_2s_2s_2s(TyS);
   5520    if (1) test_cmgt_8h_8h_8h(TyH);
   5521    if (1) test_cmgt_4h_4h_4h(TyH);
   5522    if (1) test_cmgt_16b_16b_16b(TyB);
   5523    if (1) test_cmgt_8b_8b_8b(TyB);
   5524    if (1) test_cmhi_2d_2d_2d(TyD);
   5525    if (1) test_cmhi_4s_4s_4s(TyS);
   5526    if (1) test_cmhi_2s_2s_2s(TyS);
   5527    if (1) test_cmhi_8h_8h_8h(TyH);
   5528    if (1) test_cmhi_4h_4h_4h(TyH);
   5529    if (1) test_cmhi_16b_16b_16b(TyB);
   5530    if (1) test_cmhi_8b_8b_8b(TyB);
   5531    if (1) test_cmhs_2d_2d_2d(TyD);
   5532    if (1) test_cmhs_4s_4s_4s(TyS);
   5533    if (1) test_cmhs_2s_2s_2s(TyS);
   5534    if (1) test_cmhs_8h_8h_8h(TyH);
   5535    if (1) test_cmhs_4h_4h_4h(TyH);
   5536    if (1) test_cmhs_16b_16b_16b(TyB);
   5537    if (1) test_cmhs_8b_8b_8b(TyB);
   5538    if (1) test_cmtst_2d_2d_2d(TyD);
   5539    if (1) test_cmtst_4s_4s_4s(TyS);
   5540    if (1) test_cmtst_2s_2s_2s(TyS);
   5541    if (1) test_cmtst_8h_8h_8h(TyH);
   5542    if (1) test_cmtst_4h_4h_4h(TyH);
   5543    if (1) test_cmtst_16b_16b_16b(TyB);
   5544    if (1) test_cmtst_8b_8b_8b(TyB);
   5545 
   5546    // cmeq_z   d
   5547    // cmge_z   d
   5548    // cmgt_z   d
   5549    // cmle_z   d
   5550    // cmlt_z   d
   5551    if (1) test_cmeq_zero_d_d(TyD);
   5552    if (1) test_cmge_zero_d_d(TyD);
   5553    if (1) test_cmgt_zero_d_d(TyD);
   5554    if (1) test_cmle_zero_d_d(TyD);
   5555    if (1) test_cmlt_zero_d_d(TyD);
   5556 
   5557    // cmeq_z   2d,4s,2s,8h,4h,16b,8b
   5558    // cmge_z   2d,4s,2s,8h,4h,16b,8b
   5559    // cmgt_z   2d,4s,2s,8h,4h,16b,8b
   5560    // cmle_z   2d,4s,2s,8h,4h,16b,8b
   5561    // cmlt_z   2d,4s,2s,8h,4h,16b,8b
   5562    if (1) test_cmeq_zero_2d_2d(TyD);
   5563    if (1) test_cmeq_zero_4s_4s(TyS);
   5564    if (1) test_cmeq_zero_2s_2s(TyS);
   5565    if (1) test_cmeq_zero_8h_8h(TyH);
   5566    if (1) test_cmeq_zero_4h_4h(TyH);
   5567    if (1) test_cmeq_zero_16b_16b(TyB);
   5568    if (1) test_cmeq_zero_8b_8b(TyB);
   5569    if (1) test_cmge_zero_2d_2d(TyD);
   5570    if (1) test_cmge_zero_4s_4s(TyS);
   5571    if (1) test_cmge_zero_2s_2s(TyS);
   5572    if (1) test_cmge_zero_8h_8h(TyH);
   5573    if (1) test_cmge_zero_4h_4h(TyH);
   5574    if (1) test_cmge_zero_16b_16b(TyB);
   5575    if (1) test_cmge_zero_8b_8b(TyB);
   5576    if (1) test_cmgt_zero_2d_2d(TyD);
   5577    if (1) test_cmgt_zero_4s_4s(TyS);
   5578    if (1) test_cmgt_zero_2s_2s(TyS);
   5579    if (1) test_cmgt_zero_8h_8h(TyH);
   5580    if (1) test_cmgt_zero_4h_4h(TyH);
   5581    if (1) test_cmgt_zero_16b_16b(TyB);
   5582    if (1) test_cmgt_zero_8b_8b(TyB);
   5583    if (1) test_cmle_zero_2d_2d(TyD);
   5584    if (1) test_cmle_zero_4s_4s(TyS);
   5585    if (1) test_cmle_zero_2s_2s(TyS);
   5586    if (1) test_cmle_zero_8h_8h(TyH);
   5587    if (1) test_cmle_zero_4h_4h(TyH);
   5588    if (1) test_cmle_zero_16b_16b(TyB);
   5589    if (1) test_cmle_zero_8b_8b(TyB);
   5590    if (1) test_cmlt_zero_2d_2d(TyD);
   5591    if (1) test_cmlt_zero_4s_4s(TyS);
   5592    if (1) test_cmlt_zero_2s_2s(TyS);
   5593    if (1) test_cmlt_zero_8h_8h(TyH);
   5594    if (1) test_cmlt_zero_4h_4h(TyH);
   5595    if (1) test_cmlt_zero_16b_16b(TyB);
   5596    if (1) test_cmlt_zero_8b_8b(TyB);
   5597 
   5598    // cnt      16b,8b (population count per byte)
   5599    if (1) test_cnt_16b_16b(TyB);
   5600    if (1) test_cnt_8b_8b(TyB);
   5601 
   5602    // dup      d,s,h,b (vec elem to scalar)
   5603    if (1) test_dup_d_d0(TyD);
   5604    if (1) test_dup_d_d1(TyD);
   5605    if (1) test_dup_s_s0(TyS);
   5606    if (1) test_dup_s_s3(TyS);
   5607    if (1) test_dup_h_h0(TyH);
   5608    if (1) test_dup_h_h6(TyH);
   5609    if (1) test_dup_b_b0(TyB);
   5610    if (1) test_dup_b_b13(TyB);
   5611 
   5612    // dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   5613    if (1) test_dup_2d_d0(TyD);
   5614    if (1) test_dup_2d_d1(TyD);
   5615    if (1) test_dup_4s_s0(TyS);
   5616    if (1) test_dup_4s_s3(TyS);
   5617    if (1) test_dup_2s_s0(TyS);
   5618    if (1) test_dup_2s_s2(TyS);
   5619    if (1) test_dup_8h_h0(TyH);
   5620    if (1) test_dup_8h_h6(TyH);
   5621    if (1) test_dup_4h_h1(TyH);
   5622    if (1) test_dup_4h_h5(TyH);
   5623    if (1) test_dup_16b_b2(TyB);
   5624    if (1) test_dup_16b_b12(TyB);
   5625    if (1) test_dup_8b_b3(TyB);
   5626    if (1) test_dup_8b_b13(TyB);
   5627 
   5628    // dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   5629    if (1) test_dup_2d_x(TyD);
   5630    if (1) test_dup_4s_w(TyS);
   5631    if (1) test_dup_2s_w(TyS);
   5632    if (1) test_dup_8h_w(TyH);
   5633    if (1) test_dup_4h_w(TyH);
   5634    if (1) test_dup_16b_w(TyB);
   5635    if (1) test_dup_8b_w(TyB);
   5636 
   5637    // ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   5638    if (1) test_ext_16b_16b_16b_0x0(TyB);
   5639    if (1) test_ext_16b_16b_16b_0x1(TyB);
   5640    if (1) test_ext_16b_16b_16b_0x2(TyB);
   5641    if (1) test_ext_16b_16b_16b_0x3(TyB);
   5642    if (1) test_ext_16b_16b_16b_0x4(TyB);
   5643    if (1) test_ext_16b_16b_16b_0x5(TyB);
   5644    if (1) test_ext_16b_16b_16b_0x6(TyB);
   5645    if (1) test_ext_16b_16b_16b_0x7(TyB);
   5646    if (1) test_ext_16b_16b_16b_0x8(TyB);
   5647    if (1) test_ext_16b_16b_16b_0x9(TyB);
   5648    if (1) test_ext_16b_16b_16b_0xA(TyB);
   5649    if (1) test_ext_16b_16b_16b_0xB(TyB);
   5650    if (1) test_ext_16b_16b_16b_0xC(TyB);
   5651    if (1) test_ext_16b_16b_16b_0xD(TyB);
   5652    if (1) test_ext_16b_16b_16b_0xE(TyB);
   5653    if (1) test_ext_16b_16b_16b_0xF(TyB);
   5654    if (1) test_ext_8b_8b_8b_0x0(TyB);
   5655    if (1) test_ext_8b_8b_8b_0x1(TyB);
   5656    if (1) test_ext_8b_8b_8b_0x2(TyB);
   5657    if (1) test_ext_8b_8b_8b_0x3(TyB);
   5658    if (1) test_ext_8b_8b_8b_0x4(TyB);
   5659    if (1) test_ext_8b_8b_8b_0x5(TyB);
   5660    if (1) test_ext_8b_8b_8b_0x6(TyB);
   5661    if (1) test_ext_8b_8b_8b_0x7(TyB);
   5662 
   5663    // ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   5664    if (1) test_ins_d0_d0(TyD);
   5665    if (1) test_ins_d0_d1(TyD);
   5666    if (1) test_ins_d1_d0(TyD);
   5667    if (1) test_ins_d1_d1(TyD);
   5668    if (1) test_ins_s0_s2(TyS);
   5669    if (1) test_ins_s3_s0(TyS);
   5670    if (1) test_ins_s2_s1(TyS);
   5671    if (1) test_ins_s1_s3(TyS);
   5672    if (1) test_ins_h0_h6(TyH);
   5673    if (1) test_ins_h7_h0(TyH);
   5674    if (1) test_ins_h6_h1(TyH);
   5675    if (1) test_ins_h1_h7(TyH);
   5676    if (1) test_ins_b0_b14(TyB);
   5677    if (1) test_ins_b15_b8(TyB);
   5678    if (1) test_ins_b13_b9(TyB);
   5679    if (1) test_ins_b5_b12(TyB);
   5680 
   5681    // ins      d[]_x, s[]_w, h[]_w, b[]_w
   5682    if (1) test_INS_general();
   5683 
   5684    // mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5685    // mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5686    // mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   5687    if (1) test_mla_4s_4s_s0(TyS);
   5688    if (1) test_mla_4s_4s_s3(TyS);
   5689    if (1) test_mla_2s_2s_s0(TyS);
   5690    if (1) test_mla_2s_2s_s3(TyS);
   5691    if (1) test_mla_8h_8h_h1(TyH);
   5692    if (1) test_mla_8h_8h_h5(TyH);
   5693    if (1) test_mla_4h_4h_h2(TyH);
   5694    if (1) test_mla_4h_4h_h7(TyH);
   5695    if (1) test_mls_4s_4s_s0(TyS);
   5696    if (1) test_mls_4s_4s_s3(TyS);
   5697    if (1) test_mls_2s_2s_s0(TyS);
   5698    if (1) test_mls_2s_2s_s3(TyS);
   5699    if (1) test_mls_8h_8h_h1(TyH);
   5700    if (1) test_mls_8h_8h_h5(TyH);
   5701    if (1) test_mls_4h_4h_h2(TyH);
   5702    if (1) test_mls_4h_4h_h7(TyH);
   5703    if (1) test_mul_4s_4s_s0(TyS);
   5704    if (1) test_mul_4s_4s_s3(TyS);
   5705    if (1) test_mul_2s_2s_s0(TyS);
   5706    if (1) test_mul_2s_2s_s3(TyS);
   5707    if (1) test_mul_8h_8h_h1(TyH);
   5708    if (1) test_mul_8h_8h_h5(TyH);
   5709    if (1) test_mul_4h_4h_h2(TyH);
   5710    if (1) test_mul_4h_4h_h7(TyH);
   5711 
   5712    // mla   4s,2s,8h,4h,16b,8b
   5713    // mls   4s,2s,8h,4h,16b,8b
   5714    // mul   4s,2s,8h,4h,16b,8b
   5715    if (1) test_mla_4s_4s_4s(TyS);
   5716    if (1) test_mla_2s_2s_2s(TyS);
   5717    if (1) test_mla_8h_8h_8h(TyH);
   5718    if (1) test_mla_4h_4h_4h(TyH);
   5719    if (1) test_mla_16b_16b_16b(TyB);
   5720    if (1) test_mla_8b_8b_8b(TyB);
   5721    if (1) test_mls_4s_4s_4s(TyS);
   5722    if (1) test_mls_2s_2s_2s(TyS);
   5723    if (1) test_mls_8h_8h_8h(TyH);
   5724    if (1) test_mls_4h_4h_4h(TyH);
   5725    if (1) test_mls_16b_16b_16b(TyB);
   5726    if (1) test_mls_8b_8b_8b(TyB);
   5727    if (1) test_mul_4s_4s_4s(TyS);
   5728    if (1) test_mul_2s_2s_2s(TyS);
   5729    if (1) test_mul_8h_8h_8h(TyH);
   5730    if (1) test_mul_4h_4h_4h(TyH);
   5731    if (1) test_mul_16b_16b_16b(TyB);
   5732    if (1) test_mul_8b_8b_8b(TyB);
   5733 
   5734    // Some of these movi and mvni cases are similar to orr and bic
   5735    // cases with immediates.  Maybe they should be moved together.
   5736    // movi  16b,8b   #imm8, LSL #0
   5737    if (1) test_movi_16b_0x9C_lsl0(TyB);
   5738    if (1) test_movi_8b_0x8B_lsl0(TyB);
   5739 
   5740    // movi  8h,4h    #imm8, LSL #0 or 8
   5741    // mvni  8h,4h    #imm8, LSL #0 or 8
   5742    if (1) test_movi_8h_0x5A_lsl0(TyH);
   5743    if (1) test_movi_8h_0xA5_lsl8(TyH);
   5744    if (1) test_movi_4h_0x5A_lsl0(TyH);
   5745    if (1) test_movi_4h_0xA5_lsl8(TyH);
   5746    if (1) test_mvni_8h_0x5A_lsl0(TyH);
   5747    if (1) test_mvni_8h_0xA5_lsl8(TyH);
   5748    if (1) test_mvni_4h_0x5A_lsl0(TyH);
   5749    if (1) test_mvni_4h_0xA5_lsl8(TyH);
   5750 
   5751    // movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   5752    // mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   5753    if (1) test_movi_4s_0x5A_lsl0(TyS);
   5754    if (1) test_movi_4s_0x6B_lsl8(TyS);
   5755    if (1) test_movi_4s_0x49_lsl16(TyS);
   5756    if (1) test_movi_4s_0x3D_lsl24(TyS);
   5757    if (1) test_movi_2s_0x5A_lsl0(TyS);
   5758    if (1) test_movi_2s_0x6B_lsl8(TyS);
   5759    if (1) test_movi_2s_0x49_lsl16(TyS);
   5760    if (1) test_movi_2s_0x3D_lsl24(TyS);
   5761    if (1) test_mvni_4s_0x5A_lsl0(TyS);
   5762    if (1) test_mvni_4s_0x6B_lsl8(TyS);
   5763    if (1) test_mvni_4s_0x49_lsl16(TyS);
   5764    if (1) test_mvni_4s_0x3D_lsl24(TyS);
   5765    if (1) test_mvni_2s_0x5A_lsl0(TyS);
   5766    if (1) test_mvni_2s_0x6B_lsl8(TyS);
   5767    if (1) test_mvni_2s_0x49_lsl16(TyS);
   5768    if (1) test_mvni_2s_0x3D_lsl24(TyS);
   5769 
   5770    // movi  4s,2s    #imm8, MSL #8 or 16
   5771    // mvni  4s,2s    #imm8, MSL #8 or 16
   5772    if (1) test_movi_4s_0x6B_msl8(TyS);
   5773    if (1) test_movi_4s_0x94_msl16(TyS);
   5774    if (1) test_movi_2s_0x7A_msl8(TyS);
   5775    if (1) test_movi_2s_0xA5_msl16(TyS);
   5776    if (1) test_mvni_4s_0x6B_msl8(TyS);
   5777    if (1) test_mvni_4s_0x94_msl16(TyS);
   5778    if (1) test_mvni_2s_0x7A_msl8(TyS);
   5779    if (1) test_mvni_2s_0xA5_msl16(TyS);
   5780 
   5781    // movi  d,       #imm64
   5782    // movi  2d,      #imm64
   5783    if (1) test_movi_d_0xA5(TyD);
   5784    if (1) test_movi_2d_0xB4(TyD);
   5785 
   5786    // not   16b,8b
   5787    if (1) test_not_16b_16b(TyB);
   5788    if (1) test_not_8b_8b(TyB);
   5789 
   5790    // pmul  16b,8b
   5791    if (1) test_pmul_16b_16b_16b(TyB);
   5792    if (1) test_pmul_8b_8b_8b(TyB);
   5793 
   5794    // pmull{2}  8h_8b_8b,8h_16b_16b
   5795    // pmull{2} 1q_1d_1d,1q_2d_2d is in the crypto section below
   5796    if (1) test_pmull_8h_8b_8b(TyB);
   5797    if (1) test_pmull2_8h_16b_16b(TyB);
   5798 
   5799    // rbit    16b,8b
   5800    // rev16   16b,8b
   5801    // rev32   16b,8b,8h,4h
   5802    // rev64   16b,8b,8h,4h,4s,2s
   5803    if (1) test_rbit_16b_16b(TyB);
   5804    if (1) test_rbit_8b_8b(TyB);
   5805    if (1) test_rev16_16b_16b(TyB);
   5806    if (1) test_rev16_8b_8b(TyB);
   5807    if (1) test_rev32_16b_16b(TyB);
   5808    if (1) test_rev32_8b_8b(TyB);
   5809    if (1) test_rev32_8h_8h(TyH);
   5810    if (1) test_rev32_4h_4h(TyH);
   5811    if (1) test_rev64_16b_16b(TyB);
   5812    if (1) test_rev64_8b_8b(TyB);
   5813    if (1) test_rev64_8h_8h(TyH);
   5814    if (1) test_rev64_4h_4h(TyH);
   5815    if (1) test_rev64_4s_4s(TyS);
   5816    if (1) test_rev64_2s_2s(TyS);
   5817 
   5818    // saba      16b,8b,8h,4h,4s,2s
   5819    // uaba      16b,8b,8h,4h,4s,2s
   5820    if (1) test_saba_4s_4s_4s(TyS);
   5821    if (1) test_saba_2s_2s_2s(TyS);
   5822    if (1) test_saba_8h_8h_8h(TyH);
   5823    if (1) test_saba_4h_4h_4h(TyH);
   5824    if (1) test_saba_16b_16b_16b(TyB);
   5825    if (1) test_saba_8b_8b_8b(TyB);
   5826    if (1) test_uaba_4s_4s_4s(TyS);
   5827    if (1) test_uaba_2s_2s_2s(TyS);
   5828    if (1) test_uaba_8h_8h_8h(TyH);
   5829    if (1) test_uaba_4h_4h_4h(TyH);
   5830    if (1) test_uaba_16b_16b_16b(TyB);
   5831    if (1) test_uaba_8b_8b_8b(TyB);
   5832 
   5833    // sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5834    // uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5835    if (1) test_sabal_2d_2s_2s(TyS);
   5836    if (1) test_sabal2_2d_4s_4s(TyS);
   5837    if (1) test_sabal_4s_4h_4h(TyH);
   5838    if (1) test_sabal2_4s_8h_8h(TyH);
   5839    if (1) test_sabal_8h_8b_8b(TyB);
   5840    if (1) test_sabal2_8h_16b_16b(TyB);
   5841    if (1) test_uabal_2d_2s_2s(TyS);
   5842    if (1) test_uabal2_2d_4s_4s(TyS);
   5843    if (1) test_uabal_4s_4h_4h(TyH);
   5844    if (1) test_uabal2_4s_8h_8h(TyH);
   5845    if (1) test_uabal_8h_8b_8b(TyB);
   5846    if (1) test_uabal2_8h_16b_16b(TyB);
   5847 
   5848    // sabd      16b,8b,8h,4h,4s,2s
   5849    // uabd      16b,8b,8h,4h,4s,2s
   5850    if (1) test_sabd_4s_4s_4s(TyS);
   5851    if (1) test_sabd_2s_2s_2s(TyS);
   5852    if (1) test_sabd_8h_8h_8h(TyH);
   5853    if (1) test_sabd_4h_4h_4h(TyH);
   5854    if (1) test_sabd_16b_16b_16b(TyB);
   5855    if (1) test_sabd_8b_8b_8b(TyB);
   5856    if (1) test_uabd_4s_4s_4s(TyS);
   5857    if (1) test_uabd_2s_2s_2s(TyS);
   5858    if (1) test_uabd_8h_8h_8h(TyH);
   5859    if (1) test_uabd_4h_4h_4h(TyH);
   5860    if (1) test_uabd_16b_16b_16b(TyB);
   5861    if (1) test_uabd_8b_8b_8b(TyB);
   5862 
   5863    // sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5864    // uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5865    if (1) test_sabdl_2d_2s_2s(TyS);
   5866    if (1) test_sabdl2_2d_4s_4s(TyS);
   5867    if (1) test_sabdl_4s_4h_4h(TyH);
   5868    if (1) test_sabdl2_4s_8h_8h(TyH);
   5869    if (1) test_sabdl_8h_8b_8b(TyB);
   5870    if (1) test_sabdl2_8h_16b_16b(TyB);
   5871    if (1) test_uabdl_2d_2s_2s(TyS);
   5872    if (1) test_uabdl2_2d_4s_4s(TyS);
   5873    if (1) test_uabdl_4s_4h_4h(TyH);
   5874    if (1) test_uabdl2_4s_8h_8h(TyH);
   5875    if (1) test_uabdl_8h_8b_8b(TyB);
   5876    if (1) test_uabdl2_8h_16b_16b(TyB);
   5877 
   5878    // sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5879    // uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5880    if (1) test_sadalp_1d_2s(TyS);
   5881    if (1) test_sadalp_2d_4s(TyS);
   5882    if (1) test_sadalp_2s_4h(TyH);
   5883    if (1) test_sadalp_4s_8h(TyH);
   5884    if (1) test_sadalp_4h_8b(TyB);
   5885    if (1) test_sadalp_8h_16b(TyB);
   5886    if (1) test_uadalp_1d_2s(TyS);
   5887    if (1) test_uadalp_2d_4s(TyS);
   5888    if (1) test_uadalp_2s_4h(TyH);
   5889    if (1) test_uadalp_4s_8h(TyH);
   5890    if (1) test_uadalp_4h_8b(TyB);
   5891    if (1) test_uadalp_8h_16b(TyB);
   5892 
   5893    // saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5894    // uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5895    // ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5896    // usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   5897    if (1) test_saddl_2d_2s_2s(TyS);
   5898    if (1) test_saddl2_2d_4s_4s(TyS);
   5899    if (1) test_saddl_4s_4h_4h(TyH);
   5900    if (1) test_saddl2_4s_8h_8h(TyH);
   5901    if (1) test_saddl_8h_8b_8b(TyB);
   5902    if (1) test_saddl2_8h_16b_16b(TyB);
   5903    if (1) test_uaddl_2d_2s_2s(TyS);
   5904    if (1) test_uaddl2_2d_4s_4s(TyS);
   5905    if (1) test_uaddl_4s_4h_4h(TyH);
   5906    if (1) test_uaddl2_4s_8h_8h(TyH);
   5907    if (1) test_uaddl_8h_8b_8b(TyB);
   5908    if (1) test_uaddl2_8h_16b_16b(TyB);
   5909    if (1) test_ssubl_2d_2s_2s(TyS);
   5910    if (1) test_ssubl2_2d_4s_4s(TyS);
   5911    if (1) test_ssubl_4s_4h_4h(TyH);
   5912    if (1) test_ssubl2_4s_8h_8h(TyH);
   5913    if (1) test_ssubl_8h_8b_8b(TyB);
   5914    if (1) test_ssubl2_8h_16b_16b(TyB);
   5915    if (1) test_usubl_2d_2s_2s(TyS);
   5916    if (1) test_usubl2_2d_4s_4s(TyS);
   5917    if (1) test_usubl_4s_4h_4h(TyH);
   5918    if (1) test_usubl2_4s_8h_8h(TyH);
   5919    if (1) test_usubl_8h_8b_8b(TyB);
   5920    if (1) test_usubl2_8h_16b_16b(TyB);
   5921 
   5922    // saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5923    // uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   5924    if (1) test_saddlp_1d_2s(TyS);
   5925    if (1) test_saddlp_2d_4s(TyS);
   5926    if (1) test_saddlp_2s_4h(TyH);
   5927    if (1) test_saddlp_4s_8h(TyH);
   5928    if (1) test_saddlp_4h_8b(TyB);
   5929    if (1) test_saddlp_8h_16b(TyB);
   5930    if (1) test_uaddlp_1d_2s(TyS);
   5931    if (1) test_uaddlp_2d_4s(TyS);
   5932    if (1) test_uaddlp_2s_4h(TyH);
   5933    if (1) test_uaddlp_4s_8h(TyH);
   5934    if (1) test_uaddlp_4h_8b(TyB);
   5935    if (1) test_uaddlp_8h_16b(TyB);
   5936 
   5937    // saddlv    h_16b/8b, s_8h/4h, d_4s
   5938    // uaddlv    h_16b/8b, s_8h/4h, d_4s
   5939    if (1) test_saddlv_h_16b(TyB);
   5940    if (1) test_saddlv_h_8b(TyB);
   5941    if (1) test_saddlv_s_8h(TyH);
   5942    if (1) test_saddlv_s_4h(TyH);
   5943    if (1) test_saddlv_d_4s(TyH);
   5944    if (1) test_uaddlv_h_16b(TyB);
   5945    if (1) test_uaddlv_h_8b(TyB);
   5946    if (1) test_uaddlv_s_8h(TyH);
   5947    if (1) test_uaddlv_s_4h(TyH);
   5948    if (1) test_uaddlv_d_4s(TyH);
   5949 
   5950    // saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5951    // uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5952    // ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5953    // usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_4s/2s
   5954    if (1) test_saddw2_8h_8h_16b(TyB);
   5955    if (1) test_saddw_8h_8h_8b(TyB);
   5956    if (1) test_saddw2_4s_4s_8h(TyH);
   5957    if (1) test_saddw_4s_4s_4h(TyH);
   5958    if (1) test_saddw2_2d_2d_4s(TyS);
   5959    if (1) test_saddw_2d_2d_2s(TyS);
   5960    if (1) test_uaddw2_8h_8h_16b(TyB);
   5961    if (1) test_uaddw_8h_8h_8b(TyB);
   5962    if (1) test_uaddw2_4s_4s_8h(TyH);
   5963    if (1) test_uaddw_4s_4s_4h(TyH);
   5964    if (1) test_uaddw2_2d_2d_4s(TyS);
   5965    if (1) test_uaddw_2d_2d_2s(TyS);
   5966    if (1) test_ssubw2_8h_8h_16b(TyB);
   5967    if (1) test_ssubw_8h_8h_8b(TyB);
   5968    if (1) test_ssubw2_4s_4s_8h(TyH);
   5969    if (1) test_ssubw_4s_4s_4h(TyH);
   5970    if (1) test_ssubw2_2d_2d_4s(TyS);
   5971    if (1) test_ssubw_2d_2d_2s(TyS);
   5972    if (1) test_usubw2_8h_8h_16b(TyB);
   5973    if (1) test_usubw_8h_8h_8b(TyB);
   5974    if (1) test_usubw2_4s_4s_8h(TyH);
   5975    if (1) test_usubw_4s_4s_4h(TyH);
   5976    if (1) test_usubw2_2d_2d_4s(TyS);
   5977    if (1) test_usubw_2d_2d_2s(TyS);
   5978 
   5979    // shadd        16b,8b,8h,4h,4s,2s
   5980    // uhadd        16b,8b,8h,4h,4s,2s
   5981    // shsub        16b,8b,8h,4h,4s,2s
   5982    // uhsub        16b,8b,8h,4h,4s,2s
   5983    if (1) test_shadd_4s_4s_4s(TyS);
   5984    if (1) test_shadd_2s_2s_2s(TyS);
   5985    if (1) test_shadd_8h_8h_8h(TyH);
   5986    if (1) test_shadd_4h_4h_4h(TyH);
   5987    if (1) test_shadd_16b_16b_16b(TyB);
   5988    if (1) test_shadd_8b_8b_8b(TyB);
   5989    if (1) test_uhadd_4s_4s_4s(TyS);
   5990    if (1) test_uhadd_2s_2s_2s(TyS);
   5991    if (1) test_uhadd_8h_8h_8h(TyH);
   5992    if (1) test_uhadd_4h_4h_4h(TyH);
   5993    if (1) test_uhadd_16b_16b_16b(TyB);
   5994    if (1) test_uhadd_8b_8b_8b(TyB);
   5995    if (1) test_shsub_4s_4s_4s(TyS);
   5996    if (1) test_shsub_2s_2s_2s(TyS);
   5997    if (1) test_shsub_8h_8h_8h(TyH);
   5998    if (1) test_shsub_4h_4h_4h(TyH);
   5999    if (1) test_shsub_16b_16b_16b(TyB);
   6000    if (1) test_shsub_8b_8b_8b(TyB);
   6001    if (1) test_uhsub_4s_4s_4s(TyS);
   6002    if (1) test_uhsub_2s_2s_2s(TyS);
   6003    if (1) test_uhsub_8h_8h_8h(TyH);
   6004    if (1) test_uhsub_4h_4h_4h(TyH);
   6005    if (1) test_uhsub_16b_16b_16b(TyB);
   6006    if (1) test_uhsub_8b_8b_8b(TyB);
   6007 
   6008    // shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   6009    if (1) test_shll_8h_8b_8(TyB);
   6010    if (1) test_shll2_8h_16b_8(TyB);
   6011    if (1) test_shll_4s_4h_16(TyH);
   6012    if (1) test_shll2_4s_8h_16(TyH);
   6013    if (1) test_shll_2d_2s_32(TyS);
   6014    if (1) test_shll2_2d_4s_32(TyS);
   6015 
   6016    // shrn{2}      2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6017    // rshrn{2}     2s/4s_2d, 8h/4h_4s, 8b/16b_8h,   #imm in 1 .. elem_bits
   6018    if (1) test_shrn_2s_2d_1(TyD);
   6019    if (1) test_shrn_2s_2d_32(TyD);
   6020    if (1) test_shrn2_4s_2d_1(TyD);
   6021    if (1) test_shrn2_4s_2d_32(TyD);
   6022    if (1) test_shrn_4h_4s_1(TyS);
   6023    if (1) test_shrn_4h_4s_16(TyS);
   6024    if (1) test_shrn2_8h_4s_1(TyS);
   6025    if (1) test_shrn2_8h_4s_16(TyS);
   6026    if (1) test_shrn_8b_8h_1(TyH);
   6027    if (1) test_shrn_8b_8h_8(TyH);
   6028    if (1) test_shrn2_16b_8h_1(TyH);
   6029    if (1) test_shrn2_16b_8h_8(TyH);
   6030    if (1) test_rshrn_2s_2d_1(TyD);
   6031    if (1) test_rshrn_2s_2d_32(TyD);
   6032    if (1) test_rshrn2_4s_2d_1(TyD);
   6033    if (1) test_rshrn2_4s_2d_32(TyD);
   6034    if (1) test_rshrn_4h_4s_1(TyS);
   6035    if (1) test_rshrn_4h_4s_16(TyS);
   6036    if (1) test_rshrn2_8h_4s_1(TyS);
   6037    if (1) test_rshrn2_8h_4s_16(TyS);
   6038    if (1) test_rshrn_8b_8h_1(TyH);
   6039    if (1) test_rshrn_8b_8h_8(TyH);
   6040    if (1) test_rshrn2_16b_8h_1(TyH);
   6041    if (1) test_rshrn2_16b_8h_8(TyH);
   6042 
   6043    // sli          d_#imm
   6044    // sri          d_#imm
   6045    if (1) test_sli_d_d_0(TyD);
   6046    if (1) test_sli_d_d_32(TyD);
   6047    if (1) test_sli_d_d_63(TyD);
   6048    if (1) test_sri_d_d_1(TyD);
   6049    if (1) test_sri_d_d_33(TyD);
   6050    if (1) test_sri_d_d_64(TyD);
   6051 
   6052    // sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   6053    // sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   6054    if (1) test_sli_2d_2d_0(TyD);
   6055    if (1) test_sli_2d_2d_32(TyD);
   6056    if (1) test_sli_2d_2d_63(TyD);
   6057    if (1) test_sli_4s_4s_0(TyS);
   6058    if (1) test_sli_4s_4s_16(TyS);
   6059    if (1) test_sli_4s_4s_31(TyS);
   6060    if (1) test_sli_2s_2s_0(TyS);
   6061    if (1) test_sli_2s_2s_16(TyS);
   6062    if (1) test_sli_2s_2s_31(TyS);
   6063    if (1) test_sli_8h_8h_0(TyH);
   6064    if (1) test_sli_8h_8h_8(TyH);
   6065    if (1) test_sli_8h_8h_15(TyH);
   6066    if (1) test_sli_4h_4h_0(TyH);
   6067    if (1) test_sli_4h_4h_8(TyH);
   6068    if (1) test_sli_4h_4h_15(TyH);
   6069    if (1) test_sli_16b_16b_0(TyB);
   6070    if (1) test_sli_16b_16b_3(TyB);
   6071    if (1) test_sli_16b_16b_7(TyB);
   6072    if (1) test_sli_8b_8b_0(TyB);
   6073    if (1) test_sli_8b_8b_3(TyB);
   6074    if (1) test_sli_8b_8b_7(TyB);
   6075    if (1) test_sri_2d_2d_1(TyD);
   6076    if (1) test_sri_2d_2d_33(TyD);
   6077    if (1) test_sri_2d_2d_64(TyD);
   6078    if (1) test_sri_4s_4s_1(TyS);
   6079    if (1) test_sri_4s_4s_17(TyS);
   6080    if (1) test_sri_4s_4s_32(TyS);
   6081    if (1) test_sri_2s_2s_1(TyS);
   6082    if (1) test_sri_2s_2s_17(TyS);
   6083    if (1) test_sri_2s_2s_32(TyS);
   6084    if (1) test_sri_8h_8h_1(TyH);
   6085    if (1) test_sri_8h_8h_8(TyH);
   6086    if (1) test_sri_8h_8h_16(TyH);
   6087    if (1) test_sri_4h_4h_1(TyH);
   6088    if (1) test_sri_4h_4h_8(TyH);
   6089    if (1) test_sri_4h_4h_16(TyH);
   6090    if (1) test_sri_16b_16b_1(TyB);
   6091    if (1) test_sri_16b_16b_4(TyB);
   6092    if (1) test_sri_16b_16b_8(TyB);
   6093    if (1) test_sri_8b_8b_1(TyB);
   6094    if (1) test_sri_8b_8b_4(TyB);
   6095    if (1) test_sri_8b_8b_8(TyB);
   6096 
   6097    // smax         4s,2s,8h,4h,16b,8b
   6098    // umax         4s,2s,8h,4h,16b,8b
   6099    // smin         4s,2s,8h,4h,16b,8b
   6100    // umin         4s,2s,8h,4h,16b,8b
   6101    if (1) test_smax_4s_4s_4s(TyS);
   6102    if (1) test_smax_2s_2s_2s(TyS);
   6103    if (1) test_smax_8h_8h_8h(TyH);
   6104    if (1) test_smax_4h_4h_4h(TyH);
   6105    if (1) test_smax_16b_16b_16b(TyB);
   6106    if (1) test_smax_8b_8b_8b(TyB);
   6107    if (1) test_umax_4s_4s_4s(TyS);
   6108    if (1) test_umax_2s_2s_2s(TyS);
   6109    if (1) test_umax_8h_8h_8h(TyH);
   6110    if (1) test_umax_4h_4h_4h(TyH);
   6111    if (1) test_umax_16b_16b_16b(TyB);
   6112    if (1) test_umax_8b_8b_8b(TyB);
   6113    if (1) test_smin_4s_4s_4s(TyS);
   6114    if (1) test_smin_2s_2s_2s(TyS);
   6115    if (1) test_smin_8h_8h_8h(TyH);
   6116    if (1) test_smin_4h_4h_4h(TyH);
   6117    if (1) test_smin_16b_16b_16b(TyB);
   6118    if (1) test_smin_8b_8b_8b(TyB);
   6119    if (1) test_umin_4s_4s_4s(TyS);
   6120    if (1) test_umin_2s_2s_2s(TyS);
   6121    if (1) test_umin_8h_8h_8h(TyH);
   6122    if (1) test_umin_4h_4h_4h(TyH);
   6123    if (1) test_umin_16b_16b_16b(TyB);
   6124    if (1) test_umin_8b_8b_8b(TyB);
   6125 
   6126    // smaxp        4s,2s,8h,4h,16b,8b
   6127    // umaxp        4s,2s,8h,4h,16b,8b
   6128    // sminp        4s,2s,8h,4h,16b,8b
   6129    // uminp        4s,2s,8h,4h,16b,8b
   6130    if (1) test_smaxp_4s_4s_4s(TyS);
   6131    if (1) test_smaxp_2s_2s_2s(TyS);
   6132    if (1) test_smaxp_8h_8h_8h(TyH);
   6133    if (1) test_smaxp_4h_4h_4h(TyH);
   6134    if (1) test_smaxp_16b_16b_16b(TyB);
   6135    if (1) test_smaxp_8b_8b_8b(TyB);
   6136    if (1) test_umaxp_4s_4s_4s(TyS);
   6137    if (1) test_umaxp_2s_2s_2s(TyS);
   6138    if (1) test_umaxp_8h_8h_8h(TyH);
   6139    if (1) test_umaxp_4h_4h_4h(TyH);
   6140    if (1) test_umaxp_16b_16b_16b(TyB);
   6141    if (1) test_umaxp_8b_8b_8b(TyB);
   6142    if (1) test_sminp_4s_4s_4s(TyS);
   6143    if (1) test_sminp_2s_2s_2s(TyS);
   6144    if (1) test_sminp_8h_8h_8h(TyH);
   6145    if (1) test_sminp_4h_4h_4h(TyH);
   6146    if (1) test_sminp_16b_16b_16b(TyB);
   6147    if (1) test_sminp_8b_8b_8b(TyB);
   6148    if (1) test_uminp_4s_4s_4s(TyS);
   6149    if (1) test_uminp_2s_2s_2s(TyS);
   6150    if (1) test_uminp_8h_8h_8h(TyH);
   6151    if (1) test_uminp_4h_4h_4h(TyH);
   6152    if (1) test_uminp_16b_16b_16b(TyB);
   6153    if (1) test_uminp_8b_8b_8b(TyB);
   6154 
   6155    // smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6156    // umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   6157    // sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6158    // uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   6159    if (1) test_SMAXV();
   6160    if (1) test_UMAXV();
   6161    if (1) test_SMINV();
   6162    if (1) test_UMINV();
   6163 
   6164    // smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6165    // umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6166    // smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6167    // umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6168    // smull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6169    // umull{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   6170    if (1) test_smlal_2d_2s_s0(TyS);
   6171    if (1) test_smlal_2d_2s_s3(TyS);
   6172    if (1) test_smlal2_2d_4s_s1(TyS);
   6173    if (1) test_smlal2_2d_4s_s2(TyS);
   6174    if (1) test_smlal_4s_4h_h0(TyH);
   6175    if (1) test_smlal_4s_4h_h7(TyH);
   6176    if (1) test_smlal2_4s_8h_h1(TyH);
   6177    if (1) test_smlal2_4s_8h_h4(TyH);
   6178    if (1) test_umlal_2d_2s_s0(TyS);
   6179    if (1) test_umlal_2d_2s_s3(TyS);
   6180    if (1) test_umlal2_2d_4s_s1(TyS);
   6181    if (1) test_umlal2_2d_4s_s2(TyS);
   6182    if (1) test_umlal_4s_4h_h0(TyH);
   6183    if (1) test_umlal_4s_4h_h7(TyH);
   6184    if (1) test_umlal2_4s_8h_h1(TyH);
   6185    if (1) test_umlal2_4s_8h_h4(TyH);
   6186    if (1) test_smlsl_2d_2s_s0(TyS);
   6187    if (1) test_smlsl_2d_2s_s3(TyS);
   6188    if (1) test_smlsl2_2d_4s_s1(TyS);
   6189    if (1) test_smlsl2_2d_4s_s2(TyS);
   6190    if (1) test_smlsl_4s_4h_h0(TyH);
   6191    if (1) test_smlsl_4s_4h_h7(TyH);
   6192    if (1) test_smlsl2_4s_8h_h1(TyH);
   6193    if (1) test_smlsl2_4s_8h_h4(TyH);
   6194    if (1) test_umlsl_2d_2s_s0(TyS);
   6195    if (1) test_umlsl_2d_2s_s3(TyS);
   6196    if (1) test_umlsl2_2d_4s_s1(TyS);
   6197    if (1) test_umlsl2_2d_4s_s2(TyS);
   6198    if (1) test_umlsl_4s_4h_h0(TyH);
   6199    if (1) test_umlsl_4s_4h_h7(TyH);
   6200    if (1) test_umlsl2_4s_8h_h1(TyH);
   6201    if (1) test_umlsl2_4s_8h_h4(TyH);
   6202    if (1) test_smull_2d_2s_s0(TyS);
   6203    if (1) test_smull_2d_2s_s3(TyS);
   6204    if (1) test_smull2_2d_4s_s1(TyS);
   6205    if (1) test_smull2_2d_4s_s2(TyS);
   6206    if (1) test_smull_4s_4h_h0(TyH);
   6207    if (1) test_smull_4s_4h_h7(TyH);
   6208    if (1) test_smull2_4s_8h_h1(TyH);
   6209    if (1) test_smull2_4s_8h_h4(TyH);
   6210    if (1) test_umull_2d_2s_s0(TyS);
   6211    if (1) test_umull_2d_2s_s3(TyS);
   6212    if (1) test_umull2_2d_4s_s1(TyS);
   6213    if (1) test_umull2_2d_4s_s2(TyS);
   6214    if (1) test_umull_4s_4h_h0(TyH);
   6215    if (1) test_umull_4s_4h_h7(TyH);
   6216    if (1) test_umull2_4s_8h_h1(TyH);
   6217    if (1) test_umull2_4s_8h_h4(TyH);
   6218 
   6219    // smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6220    // umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6221    // smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6222    // umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6223    // smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6224    // umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   6225    if (1) test_smlal_2d_2s_2s(TyS);
   6226    if (1) test_smlal2_2d_4s_4s(TyS);
   6227    if (1) test_smlal_4s_4h_4h(TyH);
   6228    if (1) test_smlal2_4s_8h_8h(TyH);
   6229    if (1) test_smlal_8h_8b_8b(TyB);
   6230    if (1) test_smlal2_8h_16b_16b(TyB);
   6231    if (1) test_umlal_2d_2s_2s(TyS);
   6232    if (1) test_umlal2_2d_4s_4s(TyS);
   6233    if (1) test_umlal_4s_4h_4h(TyH);
   6234    if (1) test_umlal2_4s_8h_8h(TyH);
   6235    if (1) test_umlal_8h_8b_8b(TyB);
   6236    if (1) test_umlal2_8h_16b_16b(TyB);
   6237    if (1) test_smlsl_2d_2s_2s(TyS);
   6238    if (1) test_smlsl2_2d_4s_4s(TyS);
   6239    if (1) test_smlsl_4s_4h_4h(TyH);
   6240    if (1) test_smlsl2_4s_8h_8h(TyH);
   6241    if (1) test_smlsl_8h_8b_8b(TyB);
   6242    if (1) test_smlsl2_8h_16b_16b(TyB);
   6243    if (1) test_umlsl_2d_2s_2s(TyS);
   6244    if (1) test_umlsl2_2d_4s_4s(TyS);
   6245    if (1) test_umlsl_4s_4h_4h(TyH);
   6246    if (1) test_umlsl2_4s_8h_8h(TyH);
   6247    if (1) test_umlsl_8h_8b_8b(TyB);
   6248    if (1) test_umlsl2_8h_16b_16b(TyB);
   6249    if (1) test_smull_2d_2s_2s(TyS);
   6250    if (1) test_smull2_2d_4s_4s(TyS);
   6251    if (1) test_smull_4s_4h_4h(TyH);
   6252    if (1) test_smull2_4s_8h_8h(TyH);
   6253    if (1) test_smull_8h_8b_8b(TyB);
   6254    if (1) test_smull2_8h_16b_16b(TyB);
   6255    if (1) test_umull_2d_2s_2s(TyS);
   6256    if (1) test_umull2_2d_4s_4s(TyS);
   6257    if (1) test_umull_4s_4h_4h(TyH);
   6258    if (1) test_umull2_4s_8h_8h(TyH);
   6259    if (1) test_umull_8h_8b_8b(TyB);
   6260    if (1) test_umull2_8h_16b_16b(TyB);
   6261 
   6262    // smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   6263    // umov         w_b[], w_h[],               w_s[], x_d[]
   6264    if (1) test_umov_x_d0(TyD);
   6265    if (1) test_umov_x_d1(TyD);
   6266    if (1) test_umov_w_s0(TyS);
   6267    if (1) test_umov_w_s3(TyS);
   6268    if (1) test_umov_w_h0(TyH);
   6269    if (1) test_umov_w_h7(TyH);
   6270    if (1) test_umov_w_b0(TyB);
   6271    if (1) test_umov_w_b15(TyB);
   6272    if (1) test_smov_x_s0(TyS);
   6273    if (1) test_smov_x_s3(TyS);
   6274    if (1) test_smov_x_h0(TyH);
   6275    if (1) test_smov_x_h7(TyH);
   6276    if (1) test_smov_w_h0(TyH);
   6277    if (1) test_smov_w_h7(TyH);
   6278    if (1) test_smov_x_b0(TyB);
   6279    if (1) test_smov_x_b15(TyB);
   6280    if (1) test_smov_w_b0(TyB);
   6281    if (1) test_smov_w_b15(TyB);
   6282 
   6283    // sqabs        d,s,h,b
   6284    // sqneg        d,s,h,b
   6285    if (1) test_sqabs_d_d(TyD);
   6286    if (1) test_sqabs_s_s(TyS);
   6287    if (1) test_sqabs_h_h(TyH);
   6288    if (1) test_sqabs_b_b(TyB);
   6289    if (1) test_sqneg_d_d(TyD);
   6290    if (1) test_sqneg_s_s(TyS);
   6291    if (1) test_sqneg_h_h(TyH);
   6292    if (1) test_sqneg_b_b(TyB);
   6293 
   6294    // sqabs        2d,4s,2s,8h,4h,16b,8b
   6295    // sqneg        2d,4s,2s,8h,4h,16b,8b
   6296    if (1) test_sqabs_2d_2d(TyD);
   6297    if (1) test_sqabs_4s_4s(TyS);
   6298    if (1) test_sqabs_2s_2s(TyS);
   6299    if (1) test_sqabs_8h_8h(TyH);
   6300    if (1) test_sqabs_4h_4h(TyH);
   6301    if (1) test_sqabs_16b_16b(TyB);
   6302    if (1) test_sqabs_8b_8b(TyB);
   6303    if (1) test_sqneg_2d_2d(TyD);
   6304    if (1) test_sqneg_4s_4s(TyS);
   6305    if (1) test_sqneg_2s_2s(TyS);
   6306    if (1) test_sqneg_8h_8h(TyH);
   6307    if (1) test_sqneg_4h_4h(TyH);
   6308    if (1) test_sqneg_16b_16b(TyB);
   6309    if (1) test_sqneg_8b_8b(TyB);
   6310 
   6311    // sqadd        d,s,h,b
   6312    // uqadd        d,s,h,b
   6313    // sqsub        d,s,h,b
   6314    // uqsub        d,s,h,b
   6315    if (1) test_sqadd_d_d_d(TyD);
   6316    if (1) test_sqadd_s_s_s(TyS);
   6317    if (1) test_sqadd_h_h_h(TyH);
   6318    if (1) test_sqadd_b_b_b(TyB);
   6319    if (1) test_uqadd_d_d_d(TyD);
   6320    if (1) test_uqadd_s_s_s(TyS);
   6321    if (1) test_uqadd_h_h_h(TyH);
   6322    if (1) test_uqadd_b_b_b(TyB);
   6323    if (1) test_sqsub_d_d_d(TyD);
   6324    if (1) test_sqsub_s_s_s(TyS);
   6325    if (1) test_sqsub_h_h_h(TyH);
   6326    if (1) test_sqsub_b_b_b(TyB);
   6327    if (1) test_uqsub_d_d_d(TyD);
   6328    if (1) test_uqsub_s_s_s(TyS);
   6329    if (1) test_uqsub_h_h_h(TyH);
   6330    if (1) test_uqsub_b_b_b(TyB);
   6331 
   6332    // sqadd        2d,4s,2s,8h,4h,16b,8b
   6333    // uqadd        2d,4s,2s,8h,4h,16b,8b
   6334    // sqsub        2d,4s,2s,8h,4h,16b,8b
   6335    // uqsub        2d,4s,2s,8h,4h,16b,8b
   6336    if (1) test_sqadd_2d_2d_2d(TyD);
   6337    if (1) test_sqadd_4s_4s_4s(TyS);
   6338    if (1) test_sqadd_2s_2s_2s(TyS);
   6339    if (1) test_sqadd_8h_8h_8h(TyH);
   6340    if (1) test_sqadd_4h_4h_4h(TyH);
   6341    if (1) test_sqadd_16b_16b_16b(TyB);
   6342    if (1) test_sqadd_8b_8b_8b(TyB);
   6343    if (1) test_uqadd_2d_2d_2d(TyD);
   6344    if (1) test_uqadd_4s_4s_4s(TyS);
   6345    if (1) test_uqadd_2s_2s_2s(TyS);
   6346    if (1) test_uqadd_8h_8h_8h(TyH);
   6347    if (1) test_uqadd_4h_4h_4h(TyH);
   6348    if (1) test_uqadd_16b_16b_16b(TyB);
   6349    if (1) test_uqadd_8b_8b_8b(TyB);
   6350    if (1) test_sqsub_2d_2d_2d(TyD);
   6351    if (1) test_sqsub_4s_4s_4s(TyS);
   6352    if (1) test_sqsub_2s_2s_2s(TyS);
   6353    if (1) test_sqsub_8h_8h_8h(TyH);
   6354    if (1) test_sqsub_4h_4h_4h(TyH);
   6355    if (1) test_sqsub_16b_16b_16b(TyB);
   6356    if (1) test_sqsub_8b_8b_8b(TyB);
   6357    if (1) test_uqsub_2d_2d_2d(TyD);
   6358    if (1) test_uqsub_4s_4s_4s(TyS);
   6359    if (1) test_uqsub_2s_2s_2s(TyS);
   6360    if (1) test_uqsub_8h_8h_8h(TyH);
   6361    if (1) test_uqsub_4h_4h_4h(TyH);
   6362    if (1) test_uqsub_16b_16b_16b(TyB);
   6363    if (1) test_uqsub_8b_8b_8b(TyB);
   6364 
   6365    // sqdmlal      d_s_s[], s_h_h[]
   6366    // sqdmlsl      d_s_s[], s_h_h[]
   6367    // sqdmull      d_s_s[], s_h_h[]
   6368    if (1) test_sqdmlal_d_s_s0(TyS);
   6369    if (1) test_sqdmlal_d_s_s3(TyS);
   6370    if (1) test_sqdmlal_s_h_h1(TyH);
   6371    if (1) test_sqdmlal_s_h_h5(TyH);
   6372    if (1) test_sqdmlsl_d_s_s0(TyS);
   6373    if (1) test_sqdmlsl_d_s_s3(TyS);
   6374    if (1) test_sqdmlsl_s_h_h1(TyH);
   6375    if (1) test_sqdmlsl_s_h_h5(TyH);
   6376    if (1) test_sqdmull_d_s_s0(TyS);
   6377    if (1) test_sqdmull_d_s_s3(TyS);
   6378    if (1) test_sqdmull_s_h_h1(TyH);
   6379    if (1) test_sqdmull_s_h_h5(TyH);
   6380 
   6381    // sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6382    // sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   6383    // sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   6384    if (1) test_sqdmlal_2d_2s_s0(TyS);
   6385    if (1) test_sqdmlal_2d_2s_s3(TyS);
   6386    if (1) test_sqdmlal2_2d_4s_s1(TyS);
   6387    if (1) test_sqdmlal2_2d_4s_s2(TyS);
   6388    if (1) test_sqdmlal_4s_4h_h0(TyH);
   6389    if (1) test_sqdmlal_4s_4h_h7(TyH);
   6390    if (1) test_sqdmlal2_4s_8h_h1(TyH);
   6391    if (1) test_sqdmlal2_4s_8h_h4(TyH);
   6392    if (1) test_sqdmlsl_2d_2s_s0(TyS);
   6393    if (1) test_sqdmlsl_2d_2s_s3(TyS);
   6394    if (1) test_sqdmlsl2_2d_4s_s1(TyS);
   6395    if (1) test_sqdmlsl2_2d_4s_s2(TyS);
   6396    if (1) test_sqdmlsl_4s_4h_h0(TyH);
   6397    if (1) test_sqdmlsl_4s_4h_h7(TyH);
   6398    if (1) test_sqdmlsl2_4s_8h_h1(TyH);
   6399    if (1) test_sqdmlsl2_4s_8h_h4(TyH);
   6400    if (1) test_sqdmull_2d_2s_s0(TyS);
   6401    if (1) test_sqdmull_2d_2s_s3(TyS);
   6402    if (1) test_sqdmull2_2d_4s_s1(TyS);
   6403    if (1) test_sqdmull2_2d_4s_s2(TyS);
   6404    if (1) test_sqdmull_4s_4h_h0(TyH);
   6405    if (1) test_sqdmull_4s_4h_h7(TyH);
   6406    if (1) test_sqdmull2_4s_8h_h1(TyH);
   6407    if (1) test_sqdmull2_4s_8h_h4(TyH);
   6408 
   6409    // sqdmlal      d_s_s, s_h_h
   6410    // sqdmlsl      d_s_s, s_h_h
   6411    // sqdmull      d_s_s, s_h_h
   6412    if (1) test_sqdmlal_d_s_s(TyS);
   6413    if (1) test_sqdmlal_s_h_h(TyH);
   6414    if (1) test_sqdmlsl_d_s_s(TyS);
   6415    if (1) test_sqdmlsl_s_h_h(TyH);
   6416    if (1) test_sqdmull_d_s_s(TyS);
   6417    if (1) test_sqdmull_s_h_h(TyH);
   6418 
   6419    // sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6420    // sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6421    // sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   6422    if (1) test_sqdmlal_2d_2s_2s(TyS);
   6423    if (1) test_sqdmlal2_2d_4s_4s(TyS);
   6424    if (1) test_sqdmlal_4s_4h_4h(TyH);
   6425    if (1) test_sqdmlal2_4s_8h_8h(TyH);
   6426    if (1) test_sqdmlsl_2d_2s_2s(TyS);
   6427    if (1) test_sqdmlsl2_2d_4s_4s(TyS);
   6428    if (1) test_sqdmlsl_4s_4h_4h(TyH);
   6429    if (1) test_sqdmlsl2_4s_8h_8h(TyH);
   6430    if (1) test_sqdmull_2d_2s_2s(TyS);
   6431    if (1) test_sqdmull2_2d_4s_4s(TyS);
   6432    if (1) test_sqdmull_4s_4h_4h(TyH);
   6433    if (1) test_sqdmull2_4s_8h_8h(TyH);
   6434 
   6435    // sqdmulh      s_s_s[], h_h_h[]
   6436    // sqrdmulh     s_s_s[], h_h_h[]
   6437    if (1) test_sqdmulh_s_s_s1(TyS);
   6438    if (1) test_sqdmulh_s_s_s3(TyS);
   6439    if (1) test_sqdmulh_h_h_h2(TyH);
   6440    if (1) test_sqdmulh_h_h_h7(TyH);
   6441    if (1) test_sqrdmulh_s_s_s1(TyS);
   6442    if (1) test_sqrdmulh_s_s_s3(TyS);
   6443    if (1) test_sqrdmulh_h_h_h2(TyH);
   6444    if (1) test_sqrdmulh_h_h_h7(TyH);
   6445 
   6446    // sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6447    // sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   6448    if (1) test_sqdmulh_4s_4s_s1(TyS);
   6449    if (1) test_sqdmulh_4s_4s_s3(TyS);
   6450    if (1) test_sqdmulh_2s_2s_s1(TyS);
   6451    if (1) test_sqdmulh_2s_2s_s3(TyS);
   6452    if (1) test_sqdmulh_8h_8h_h2(TyH);
   6453    if (1) test_sqdmulh_8h_8h_h7(TyH);
   6454    if (1) test_sqdmulh_4h_4h_h2(TyH);
   6455    if (1) test_sqdmulh_4h_4h_h7(TyH);
   6456    if (1) test_sqrdmulh_4s_4s_s1(TyS);
   6457    if (1) test_sqrdmulh_4s_4s_s3(TyS);
   6458    if (1) test_sqrdmulh_2s_2s_s1(TyS);
   6459    if (1) test_sqrdmulh_2s_2s_s3(TyS);
   6460    if (1) test_sqrdmulh_8h_8h_h2(TyH);
   6461    if (1) test_sqrdmulh_8h_8h_h7(TyH);
   6462    if (1) test_sqrdmulh_4h_4h_h2(TyH);
   6463    if (1) test_sqrdmulh_4h_4h_h7(TyH);
   6464 
   6465    // sqdmulh      h,s
   6466    // sqrdmulh     h,s
   6467    if (1) test_sqdmulh_s_s_s(TyS);
   6468    if (1) test_sqdmulh_h_h_h(TyH);
   6469    if (1) test_sqrdmulh_s_s_s(TyS);
   6470    if (1) test_sqrdmulh_h_h_h(TyH);
   6471 
   6472    // sqdmulh      4s,2s,8h,4h
   6473    // sqrdmulh     4s,2s,8h,4h
   6474    if (1) test_sqdmulh_4s_4s_4s(TyS);
   6475    if (1) test_sqdmulh_2s_2s_2s(TyS);
   6476    if (1) test_sqdmulh_8h_8h_8h(TyH);
   6477    if (1) test_sqdmulh_4h_4h_4h(TyH);
   6478    if (1) test_sqrdmulh_4s_4s_4s(TyS);
   6479    if (1) test_sqrdmulh_2s_2s_2s(TyS);
   6480    if (1) test_sqrdmulh_8h_8h_8h(TyH);
   6481    if (1) test_sqrdmulh_4h_4h_4h(TyH);
   6482 
   6483    // sqshl (reg)  d,s,h,b
   6484    // uqshl (reg)  d,s,h,b
   6485    // sqrshl (reg) d,s,h,b
   6486    // uqrshl (reg) d,s,h,b
   6487    if (1) test_sqshl_d_d_d(TyD);
   6488    if (1) test_sqshl_s_s_s(TyS);
   6489    if (1) test_sqshl_h_h_h(TyH);
   6490    if (1) test_sqshl_b_b_b(TyB);
   6491    if (1) test_uqshl_d_d_d(TyD);
   6492    if (1) test_uqshl_s_s_s(TyS);
   6493    if (1) test_uqshl_h_h_h(TyH);
   6494    if (1) test_uqshl_b_b_b(TyB);
   6495    if (1) test_sqrshl_d_d_d(TyD);
   6496    if (1) test_sqrshl_s_s_s(TyS);
   6497    if (1) test_sqrshl_h_h_h(TyH);
   6498    if (1) test_sqrshl_b_b_b(TyB);
   6499    if (1) test_uqrshl_d_d_d(TyD);
   6500    if (1) test_uqrshl_s_s_s(TyS);
   6501    if (1) test_uqrshl_h_h_h(TyH);
   6502    if (1) test_uqrshl_b_b_b(TyB);
   6503 
   6504    // sqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6505    // uqshl (reg)  2d,4s,2s,8h,4h,16b,8b
   6506    // sqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6507    // uqrshl (reg) 2d,4s,2s,8h,4h,16b,8b
   6508    if (1) test_sqshl_2d_2d_2d(TyD);
   6509    if (1) test_sqshl_4s_4s_4s(TyS);
   6510    if (1) test_sqshl_2s_2s_2s(TyS);
   6511    if (1) test_sqshl_8h_8h_8h(TyH);
   6512    if (1) test_sqshl_4h_4h_4h(TyH);
   6513    if (1) test_sqshl_16b_16b_16b(TyB);
   6514    if (1) test_sqshl_8b_8b_8b(TyB);
   6515    if (1) test_uqshl_2d_2d_2d(TyD);
   6516    if (1) test_uqshl_4s_4s_4s(TyS);
   6517    if (1) test_uqshl_2s_2s_2s(TyS);
   6518    if (1) test_uqshl_8h_8h_8h(TyH);
   6519    if (1) test_uqshl_4h_4h_4h(TyH);
   6520    if (1) test_uqshl_16b_16b_16b(TyB);
   6521    if (1) test_uqshl_8b_8b_8b(TyB);
   6522    if (1) test_sqrshl_2d_2d_2d(TyD);
   6523    if (1) test_sqrshl_4s_4s_4s(TyS);
   6524    if (1) test_sqrshl_2s_2s_2s(TyS);
   6525    if (1) test_sqrshl_8h_8h_8h(TyH);
   6526    if (1) test_sqrshl_4h_4h_4h(TyH);
   6527    if (1) test_sqrshl_16b_16b_16b(TyB);
   6528    if (1) test_sqrshl_8b_8b_8b(TyB);
   6529    if (1) test_uqrshl_2d_2d_2d(TyD);
   6530    if (1) test_uqrshl_4s_4s_4s(TyS);
   6531    if (1) test_uqrshl_2s_2s_2s(TyS);
   6532    if (1) test_uqrshl_8h_8h_8h(TyH);
   6533    if (1) test_uqrshl_4h_4h_4h(TyH);
   6534    if (1) test_uqrshl_16b_16b_16b(TyB);
   6535    if (1) test_uqrshl_8b_8b_8b(TyB);
   6536 
   6537    // sqrshrn      s_d, h_s, b_h   #imm
   6538    // uqrshrn      s_d, h_s, b_h   #imm
   6539    // sqshrn       s_d, h_s, b_h   #imm
   6540    // uqshrn       s_d, h_s, b_h   #imm
   6541    // sqrshrun     s_d, h_s, b_h   #imm
   6542    // sqshrun      s_d, h_s, b_h   #imm
   6543    if (1) test_sqrshrn_s_d_1(TyD);
   6544    if (1) test_sqrshrn_s_d_17(TyD);
   6545    if (1) test_sqrshrn_s_d_32(TyD);
   6546    if (1) test_sqrshrn_h_s_1(TyS);
   6547    if (1) test_sqrshrn_h_s_9(TyS);
   6548    if (1) test_sqrshrn_h_s_16(TyS);
   6549    if (1) test_sqrshrn_b_h_1(TyH);
   6550    if (1) test_sqrshrn_b_h_4(TyH);
   6551    if (1) test_sqrshrn_b_h_8(TyH);
   6552    if (1) test_uqrshrn_s_d_1(TyD);
   6553    if (1) test_uqrshrn_s_d_17(TyD);
   6554    if (1) test_uqrshrn_s_d_32(TyD);
   6555    if (1) test_uqrshrn_h_s_1(TyS);
   6556    if (1) test_uqrshrn_h_s_9(TyS);
   6557    if (1) test_uqrshrn_h_s_16(TyS);
   6558    if (1) test_uqrshrn_b_h_1(TyH);
   6559    if (1) test_uqrshrn_b_h_4(TyH);
   6560    if (1) test_uqrshrn_b_h_8(TyH);
   6561    if (1) test_sqshrn_s_d_1(TyD);
   6562    if (1) test_sqshrn_s_d_17(TyD);
   6563    if (1) test_sqshrn_s_d_32(TyD);
   6564    if (1) test_sqshrn_h_s_1(TyS);
   6565    if (1) test_sqshrn_h_s_9(TyS);
   6566    if (1) test_sqshrn_h_s_16(TyS);
   6567    if (1) test_sqshrn_b_h_1(TyH);
   6568    if (1) test_sqshrn_b_h_4(TyH);
   6569    if (1) test_sqshrn_b_h_8(TyH);
   6570    if (1) test_uqshrn_s_d_1(TyD);
   6571    if (1) test_uqshrn_s_d_17(TyD);
   6572    if (1) test_uqshrn_s_d_32(TyD);
   6573    if (1) test_uqshrn_h_s_1(TyS);
   6574    if (1) test_uqshrn_h_s_9(TyS);
   6575    if (1) test_uqshrn_h_s_16(TyS);
   6576    if (1) test_uqshrn_b_h_1(TyH);
   6577    if (1) test_uqshrn_b_h_4(TyH);
   6578    if (1) test_uqshrn_b_h_8(TyH);
   6579    if (1) test_sqrshrun_s_d_1(TyD);
   6580    if (1) test_sqrshrun_s_d_17(TyD);
   6581    if (1) test_sqrshrun_s_d_32(TyD);
   6582    if (1) test_sqrshrun_h_s_1(TyS);
   6583    if (1) test_sqrshrun_h_s_9(TyS);
   6584    if (1) test_sqrshrun_h_s_16(TyS);
   6585    if (1) test_sqrshrun_b_h_1(TyH);
   6586    if (1) test_sqrshrun_b_h_4(TyH);
   6587    if (1) test_sqrshrun_b_h_8(TyH);
   6588    if (1) test_sqshrun_s_d_1(TyD);
   6589    if (1) test_sqshrun_s_d_17(TyD);
   6590    if (1) test_sqshrun_s_d_32(TyD);
   6591    if (1) test_sqshrun_h_s_1(TyS);
   6592    if (1) test_sqshrun_h_s_9(TyS);
   6593    if (1) test_sqshrun_h_s_16(TyS);
   6594    if (1) test_sqshrun_b_h_1(TyH);
   6595    if (1) test_sqshrun_b_h_4(TyH);
   6596    if (1) test_sqshrun_b_h_8(TyH);
   6597 
   6598    // sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6599    // uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6600    // sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6601    // uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6602    // sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6603    // sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   6604    if (1) test_sqrshrn_2s_2d_1(TyD);
   6605    if (1) test_sqrshrn_2s_2d_17(TyD);
   6606    if (1) test_sqrshrn_2s_2d_32(TyD);
   6607    if (1) test_sqrshrn2_4s_2d_1(TyD);
   6608    if (1) test_sqrshrn2_4s_2d_17(TyD);
   6609    if (1) test_sqrshrn2_4s_2d_32(TyD);
   6610    if (1) test_sqrshrn_4h_4s_1(TyS);
   6611    if (1) test_sqrshrn_4h_4s_9(TyS);
   6612    if (1) test_sqrshrn_4h_4s_16(TyS);
   6613    if (1) test_sqrshrn2_8h_4s_1(TyS);
   6614    if (1) test_sqrshrn2_8h_4s_9(TyS);
   6615    if (1) test_sqrshrn2_8h_4s_16(TyS);
   6616    if (1) test_sqrshrn_8b_8h_1(TyH);
   6617    if (1) test_sqrshrn_8b_8h_4(TyH);
   6618    if (1) test_sqrshrn_8b_8h_8(TyH);
   6619    if (1) test_sqrshrn2_16b_8h_1(TyH);
   6620    if (1) test_sqrshrn2_16b_8h_4(TyH);
   6621    if (1) test_sqrshrn2_16b_8h_8(TyH);
   6622    if (1) test_uqrshrn_2s_2d_1(TyD);
   6623    if (1) test_uqrshrn_2s_2d_17(TyD);
   6624    if (1) test_uqrshrn_2s_2d_32(TyD);
   6625    if (1) test_uqrshrn2_4s_2d_1(TyD);
   6626    if (1) test_uqrshrn2_4s_2d_17(TyD);
   6627    if (1) test_uqrshrn2_4s_2d_32(TyD);
   6628    if (1) test_uqrshrn_4h_4s_1(TyS);
   6629    if (1) test_uqrshrn_4h_4s_9(TyS);
   6630    if (1) test_uqrshrn_4h_4s_16(TyS);
   6631    if (1) test_uqrshrn2_8h_4s_1(TyS);
   6632    if (1) test_uqrshrn2_8h_4s_9(TyS);
   6633    if (1) test_uqrshrn2_8h_4s_16(TyS);
   6634    if (1) test_uqrshrn_8b_8h_1(TyH);
   6635    if (1) test_uqrshrn_8b_8h_4(TyH);
   6636    if (1) test_uqrshrn_8b_8h_8(TyH);
   6637    if (1) test_uqrshrn2_16b_8h_1(TyH);
   6638    if (1) test_uqrshrn2_16b_8h_4(TyH);
   6639    if (1) test_uqrshrn2_16b_8h_8(TyH);
   6640    if (1) test_sqshrn_2s_2d_1(TyD);
   6641    if (1) test_sqshrn_2s_2d_17(TyD);
   6642    if (1) test_sqshrn_2s_2d_32(TyD);
   6643    if (1) test_sqshrn2_4s_2d_1(TyD);
   6644    if (1) test_sqshrn2_4s_2d_17(TyD);
   6645    if (1) test_sqshrn2_4s_2d_32(TyD);
   6646    if (1) test_sqshrn_4h_4s_1(TyS);
   6647    if (1) test_sqshrn_4h_4s_9(TyS);
   6648    if (1) test_sqshrn_4h_4s_16(TyS);
   6649    if (1) test_sqshrn2_8h_4s_1(TyS);
   6650    if (1) test_sqshrn2_8h_4s_9(TyS);
   6651    if (1) test_sqshrn2_8h_4s_16(TyS);
   6652    if (1) test_sqshrn_8b_8h_1(TyH);
   6653    if (1) test_sqshrn_8b_8h_4(TyH);
   6654    if (1) test_sqshrn_8b_8h_8(TyH);
   6655    if (1) test_sqshrn2_16b_8h_1(TyH);
   6656    if (1) test_sqshrn2_16b_8h_4(TyH);
   6657    if (1) test_sqshrn2_16b_8h_8(TyH);
   6658    if (1) test_uqshrn_2s_2d_1(TyD);
   6659    if (1) test_uqshrn_2s_2d_17(TyD);
   6660    if (1) test_uqshrn_2s_2d_32(TyD);
   6661    if (1) test_uqshrn2_4s_2d_1(TyD);
   6662    if (1) test_uqshrn2_4s_2d_17(TyD);
   6663    if (1) test_uqshrn2_4s_2d_32(TyD);
   6664    if (1) test_uqshrn_4h_4s_1(TyS);
   6665    if (1) test_uqshrn_4h_4s_9(TyS);
   6666    if (1) test_uqshrn_4h_4s_16(TyS);
   6667    if (1) test_uqshrn2_8h_4s_1(TyS);
   6668    if (1) test_uqshrn2_8h_4s_9(TyS);
   6669    if (1) test_uqshrn2_8h_4s_16(TyS);
   6670    if (1) test_uqshrn_8b_8h_1(TyH);
   6671    if (1) test_uqshrn_8b_8h_4(TyH);
   6672    if (1) test_uqshrn_8b_8h_8(TyH);
   6673    if (1) test_uqshrn2_16b_8h_1(TyH);
   6674    if (1) test_uqshrn2_16b_8h_4(TyH);
   6675    if (1) test_uqshrn2_16b_8h_8(TyH);
   6676    if (1) test_sqrshrun_2s_2d_1(TyD);
   6677    if (1) test_sqrshrun_2s_2d_17(TyD);
   6678    if (1) test_sqrshrun_2s_2d_32(TyD);
   6679    if (1) test_sqrshrun2_4s_2d_1(TyD);
   6680    if (1) test_sqrshrun2_4s_2d_17(TyD);
   6681    if (1) test_sqrshrun2_4s_2d_32(TyD);
   6682    if (1) test_sqrshrun_4h_4s_1(TyS);
   6683    if (1) test_sqrshrun_4h_4s_9(TyS);
   6684    if (1) test_sqrshrun_4h_4s_16(TyS);
   6685    if (1) test_sqrshrun2_8h_4s_1(TyS);
   6686    if (1) test_sqrshrun2_8h_4s_9(TyS);
   6687    if (1) test_sqrshrun2_8h_4s_16(TyS);
   6688    if (1) test_sqrshrun_8b_8h_1(TyH);
   6689    if (1) test_sqrshrun_8b_8h_4(TyH);
   6690    if (1) test_sqrshrun_8b_8h_8(TyH);
   6691    if (1) test_sqrshrun2_16b_8h_1(TyH);
   6692    if (1) test_sqrshrun2_16b_8h_4(TyH);
   6693    if (1) test_sqrshrun2_16b_8h_8(TyH);
   6694    if (1) test_sqshrun_2s_2d_1(TyD);
   6695    if (1) test_sqshrun_2s_2d_17(TyD);
   6696    if (1) test_sqshrun_2s_2d_32(TyD);
   6697    if (1) test_sqshrun2_4s_2d_1(TyD);
   6698    if (1) test_sqshrun2_4s_2d_17(TyD);
   6699    if (1) test_sqshrun2_4s_2d_32(TyD);
   6700    if (1) test_sqshrun_4h_4s_1(TyS);
   6701    if (1) test_sqshrun_4h_4s_9(TyS);
   6702    if (1) test_sqshrun_4h_4s_16(TyS);
   6703    if (1) test_sqshrun2_8h_4s_1(TyS);
   6704    if (1) test_sqshrun2_8h_4s_9(TyS);
   6705    if (1) test_sqshrun2_8h_4s_16(TyS);
   6706    if (1) test_sqshrun_8b_8h_1(TyH);
   6707    if (1) test_sqshrun_8b_8h_4(TyH);
   6708    if (1) test_sqshrun_8b_8h_8(TyH);
   6709    if (1) test_sqshrun2_16b_8h_1(TyH);
   6710    if (1) test_sqshrun2_16b_8h_4(TyH);
   6711    if (1) test_sqshrun2_16b_8h_8(TyH);
   6712 
   6713    // sqshl (imm)  d,s,h,b   _#imm
   6714    // uqshl (imm)  d,s,h,b   _#imm
   6715    // sqshlu (imm) d,s,h,b   _#imm
   6716    if (1) test_sqshl_d_d_0(TyD);
   6717    if (1) test_sqshl_d_d_32(TyD);
   6718    if (1) test_sqshl_d_d_63(TyD);
   6719    if (1) test_sqshl_s_s_0(TyS);
   6720    if (1) test_sqshl_s_s_16(TyS);
   6721    if (1) test_sqshl_s_s_31(TyS);
   6722    if (1) test_sqshl_h_h_0(TyH);
   6723    if (1) test_sqshl_h_h_8(TyH);
   6724    if (1) test_sqshl_h_h_15(TyH);
   6725    if (1) test_sqshl_b_b_0(TyB);
   6726    if (1) test_sqshl_b_b_1(TyB);
   6727    if (1) test_sqshl_b_b_4(TyB);
   6728    if (1) test_sqshl_b_b_6(TyB);
   6729    if (1) test_sqshl_b_b_7(TyB);
   6730    if (1) test_uqshl_d_d_0(TyD);
   6731    if (1) test_uqshl_d_d_32(TyD);
   6732    if (1) test_uqshl_d_d_63(TyD);
   6733    if (1) test_uqshl_s_s_0(TyS);
   6734    if (1) test_uqshl_s_s_16(TyS);
   6735    if (1) test_uqshl_s_s_31(TyS);
   6736    if (1) test_uqshl_h_h_0(TyH);
   6737    if (1) test_uqshl_h_h_8(TyH);
   6738    if (1) test_uqshl_h_h_15(TyH);
   6739    if (1) test_uqshl_b_b_0(TyB);
   6740    if (1) test_uqshl_b_b_1(TyB);
   6741    if (1) test_uqshl_b_b_4(TyB);
   6742    if (1) test_uqshl_b_b_6(TyB);
   6743    if (1) test_uqshl_b_b_7(TyB);
   6744    if (1) test_sqshlu_d_d_0(TyD);
   6745    if (1) test_sqshlu_d_d_32(TyD);
   6746    if (1) test_sqshlu_d_d_63(TyD);
   6747    if (1) test_sqshlu_s_s_0(TyS);
   6748    if (1) test_sqshlu_s_s_16(TyS);
   6749    if (1) test_sqshlu_s_s_31(TyS);
   6750    if (1) test_sqshlu_h_h_0(TyH);
   6751    if (1) test_sqshlu_h_h_8(TyH);
   6752    if (1) test_sqshlu_h_h_15(TyH);
   6753    if (1) test_sqshlu_b_b_0(TyB);
   6754    if (1) test_sqshlu_b_b_1(TyB);
   6755    if (1) test_sqshlu_b_b_2(TyB);
   6756    if (1) test_sqshlu_b_b_3(TyB);
   6757    if (1) test_sqshlu_b_b_4(TyB);
   6758    if (1) test_sqshlu_b_b_5(TyB);
   6759    if (1) test_sqshlu_b_b_6(TyB);
   6760    if (1) test_sqshlu_b_b_7(TyB);
   6761 
   6762    // sqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6763    // uqshl (imm)  2d,4s,2s,8h,4h,16b,8b   _#imm
   6764    // sqshlu (imm) 2d,4s,2s,8h,4h,16b,8b   _#imm
   6765    if (1) test_sqshl_2d_2d_0(TyD);
   6766    if (1) test_sqshl_2d_2d_32(TyD);
   6767    if (1) test_sqshl_2d_2d_63(TyD);
   6768    if (1) test_sqshl_4s_4s_0(TyS);
   6769    if (1) test_sqshl_4s_4s_16(TyS);
   6770    if (1) test_sqshl_4s_4s_31(TyS);
   6771    if (1) test_sqshl_2s_2s_0(TyS);
   6772    if (1) test_sqshl_2s_2s_16(TyS);
   6773    if (1) test_sqshl_2s_2s_31(TyS);
   6774    if (1) test_sqshl_8h_8h_0(TyH);
   6775    if (1) test_sqshl_8h_8h_8(TyH);
   6776    if (1) test_sqshl_8h_8h_15(TyH);
   6777    if (1) test_sqshl_4h_4h_0(TyH);
   6778    if (1) test_sqshl_4h_4h_8(TyH);
   6779    if (1) test_sqshl_4h_4h_15(TyH);
   6780    if (1) test_sqshl_16b_16b_0(TyB);
   6781    if (1) test_sqshl_16b_16b_3(TyB);
   6782    if (1) test_sqshl_16b_16b_7(TyB);
   6783    if (1) test_sqshl_8b_8b_0(TyB);
   6784    if (1) test_sqshl_8b_8b_3(TyB);
   6785    if (1) test_sqshl_8b_8b_7(TyB);
   6786    if (1) test_uqshl_2d_2d_0(TyD);
   6787    if (1) test_uqshl_2d_2d_32(TyD);
   6788    if (1) test_uqshl_2d_2d_63(TyD);
   6789    if (1) test_uqshl_4s_4s_0(TyS);
   6790    if (1) test_uqshl_4s_4s_16(TyS);
   6791    if (1) test_uqshl_4s_4s_31(TyS);
   6792    if (1) test_uqshl_2s_2s_0(TyS);
   6793    if (1) test_uqshl_2s_2s_16(TyS);
   6794    if (1) test_uqshl_2s_2s_31(TyS);
   6795    if (1) test_uqshl_8h_8h_0(TyH);
   6796    if (1) test_uqshl_8h_8h_8(TyH);
   6797    if (1) test_uqshl_8h_8h_15(TyH);
   6798    if (1) test_uqshl_4h_4h_0(TyH);
   6799    if (1) test_uqshl_4h_4h_8(TyH);
   6800    if (1) test_uqshl_4h_4h_15(TyH);
   6801    if (1) test_uqshl_16b_16b_0(TyB);
   6802    if (1) test_uqshl_16b_16b_3(TyB);
   6803    if (1) test_uqshl_16b_16b_7(TyB);
   6804    if (1) test_uqshl_8b_8b_0(TyB);
   6805    if (1) test_uqshl_8b_8b_3(TyB);
   6806    if (1) test_uqshl_8b_8b_7(TyB);
   6807    if (1) test_sqshlu_2d_2d_0(TyD);
   6808    if (1) test_sqshlu_2d_2d_32(TyD);
   6809    if (1) test_sqshlu_2d_2d_63(TyD);
   6810    if (1) test_sqshlu_4s_4s_0(TyS);
   6811    if (1) test_sqshlu_4s_4s_16(TyS);
   6812    if (1) test_sqshlu_4s_4s_31(TyS);
   6813    if (1) test_sqshlu_2s_2s_0(TyS);
   6814    if (1) test_sqshlu_2s_2s_16(TyS);
   6815    if (1) test_sqshlu_2s_2s_31(TyS);
   6816    if (1) test_sqshlu_8h_8h_0(TyH);
   6817    if (1) test_sqshlu_8h_8h_8(TyH);
   6818    if (1) test_sqshlu_8h_8h_15(TyH);
   6819    if (1) test_sqshlu_4h_4h_0(TyH);
   6820    if (1) test_sqshlu_4h_4h_8(TyH);
   6821    if (1) test_sqshlu_4h_4h_15(TyH);
   6822    if (1) test_sqshlu_16b_16b_0(TyB);
   6823    if (1) test_sqshlu_16b_16b_3(TyB);
   6824    if (1) test_sqshlu_16b_16b_7(TyB);
   6825    if (1) test_sqshlu_8b_8b_0(TyB);
   6826    if (1) test_sqshlu_8b_8b_3(TyB);
   6827    if (1) test_sqshlu_8b_8b_7(TyB);
   6828 
   6829    // sqxtn        s_d,h_s,b_h
   6830    // uqxtn        s_d,h_s,b_h
   6831    // sqxtun       s_d,h_s,b_h
   6832    if (1) test_sqxtn_s_d(TyD);
   6833    if (1) test_sqxtn_h_s(TyS);
   6834    if (1) test_sqxtn_b_h(TyH);
   6835    if (1) test_uqxtn_s_d(TyD);
   6836    if (1) test_uqxtn_h_s(TyS);
   6837    if (1) test_uqxtn_b_h(TyH);
   6838    if (1) test_sqxtun_s_d(TyD);
   6839    if (1) test_sqxtun_h_s(TyS);
   6840    if (1) test_sqxtun_b_h(TyH);
   6841 
   6842    // sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6843    // uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6844    // sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   6845    if (1) test_sqxtn_2s_2d(TyD);
   6846    if (1) test_sqxtn2_4s_2d(TyD);
   6847    if (1) test_sqxtn_4h_4s(TyS);
   6848    if (1) test_sqxtn2_8h_4s(TyS);
   6849    if (1) test_sqxtn_8b_8h(TyH);
   6850    if (1) test_sqxtn2_16b_8h(TyH);
   6851    if (1) test_uqxtn_2s_2d(TyD);
   6852    if (1) test_uqxtn2_4s_2d(TyD);
   6853    if (1) test_uqxtn_4h_4s(TyS);
   6854    if (1) test_uqxtn2_8h_4s(TyS);
   6855    if (1) test_uqxtn_8b_8h(TyH);
   6856    if (1) test_uqxtn2_16b_8h(TyH);
   6857    if (1) test_sqxtun_2s_2d(TyD);
   6858    if (1) test_sqxtun2_4s_2d(TyD);
   6859    if (1) test_sqxtun_4h_4s(TyS);
   6860    if (1) test_sqxtun2_8h_4s(TyS);
   6861    if (1) test_sqxtun_8b_8h(TyH);
   6862    if (1) test_sqxtun2_16b_8h(TyH);
   6863 
   6864    // srhadd       4s,2s,8h,4h,16b,8b
   6865    // urhadd       4s,2s,8h,4h,16b,8b
   6866    if (1) test_srhadd_4s_4s_4s(TyS);
   6867    if (1) test_srhadd_2s_2s_2s(TyS);
   6868    if (1) test_srhadd_8h_8h_8h(TyH);
   6869    if (1) test_srhadd_4h_4h_4h(TyH);
   6870    if (1) test_srhadd_16b_16b_16b(TyB);
   6871    if (1) test_srhadd_8b_8b_8b(TyB);
   6872    if (1) test_urhadd_4s_4s_4s(TyS);
   6873    if (1) test_urhadd_2s_2s_2s(TyS);
   6874    if (1) test_urhadd_8h_8h_8h(TyH);
   6875    if (1) test_urhadd_4h_4h_4h(TyH);
   6876    if (1) test_urhadd_16b_16b_16b(TyB);
   6877    if (1) test_urhadd_8b_8b_8b(TyB);
   6878 
   6879    // sshl (reg)   d
   6880    // ushl (reg)   d
   6881    if (1) test_sshl_d_d_d(TyD);
   6882    if (1) test_ushl_d_d_d(TyD);
   6883 
   6884    // sshl (reg)   2d,4s,2s,8h,4h,16b,8b
   6885    // ushl (reg)   2d,4s,2s,8h,4h,16b,8b
   6886    if (1) test_sshl_2d_2d_2d(TyD);
   6887    if (1) test_sshl_4s_4s_4s(TyS);
   6888    if (1) test_sshl_2s_2s_2s(TyS);
   6889    if (1) test_sshl_8h_8h_8h(TyH);
   6890    if (1) test_sshl_4h_4h_4h(TyH);
   6891    if (1) test_sshl_16b_16b_16b(TyB);
   6892    if (1) test_sshl_8b_8b_8b(TyB);
   6893    if (1) test_ushl_2d_2d_2d(TyD);
   6894    if (1) test_ushl_4s_4s_4s(TyS);
   6895    if (1) test_ushl_2s_2s_2s(TyS);
   6896    if (1) test_ushl_8h_8h_8h(TyH);
   6897    if (1) test_ushl_4h_4h_4h(TyH);
   6898    if (1) test_ushl_16b_16b_16b(TyB);
   6899    if (1) test_ushl_8b_8b_8b(TyB);
   6900 
   6901    // shl  (imm)   d
   6902    // sshr (imm)   d
   6903    // ushr (imm)   d
   6904    if (1) test_shl_d_d_0(TyD);
   6905    if (1) test_shl_d_d_32(TyD);
   6906    if (1) test_shl_d_d_63(TyD);
   6907    if (1) test_sshr_d_d_1(TyD);
   6908    if (1) test_sshr_d_d_32(TyD);
   6909    if (1) test_sshr_d_d_64(TyD);
   6910    if (1) test_ushr_d_d_1(TyD);
   6911    if (1) test_ushr_d_d_32(TyD);
   6912    if (1) test_ushr_d_d_64(TyD);
   6913 
   6914    // shl  (imm)   16b,8b,8h,4h,4s,2s,2d
   6915    // sshr (imm)   2d,4s,2s,8h,4h,16b,8b
   6916    // ushr (imm)   2d,4s,2s,8h,4h,16b,8b
   6917    if (1) test_shl_2d_2d_0(TyD);
   6918    if (1) test_shl_2d_2d_13(TyD);
   6919    if (1) test_shl_2d_2d_63(TyD);
   6920    if (1) test_shl_4s_4s_0(TyS);
   6921    if (1) test_shl_4s_4s_13(TyS);
   6922    if (1) test_shl_4s_4s_31(TyS);
   6923    if (1) test_shl_2s_2s_0(TyS);
   6924    if (1) test_shl_2s_2s_13(TyS);
   6925    if (1) test_shl_2s_2s_31(TyS);
   6926    if (1) test_shl_8h_8h_0(TyH);
   6927    if (1) test_shl_8h_8h_13(TyH);
   6928    if (1) test_shl_8h_8h_15(TyH);
   6929    if (1) test_shl_4h_4h_0(TyH);
   6930    if (1) test_shl_4h_4h_13(TyH);
   6931    if (1) test_shl_4h_4h_15(TyH);
   6932    if (1) test_shl_16b_16b_0(TyB);
   6933    if (1) test_shl_16b_16b_7(TyB);
   6934    if (1) test_shl_8b_8b_0(TyB);
   6935    if (1) test_shl_8b_8b_7(TyB);
   6936    if (1) test_sshr_2d_2d_1(TyD);
   6937    if (1) test_sshr_2d_2d_13(TyD);
   6938    if (1) test_sshr_2d_2d_64(TyD);
   6939    if (1) test_sshr_4s_4s_1(TyS);
   6940    if (1) test_sshr_4s_4s_13(TyS);
   6941    if (1) test_sshr_4s_4s_32(TyS);
   6942    if (1) test_sshr_2s_2s_1(TyS);
   6943    if (1) test_sshr_2s_2s_13(TyS);
   6944    if (1) test_sshr_2s_2s_32(TyS);
   6945    if (1) test_sshr_8h_8h_1(TyH);
   6946    if (1) test_sshr_8h_8h_13(TyH);
   6947    if (1) test_sshr_8h_8h_16(TyH);
   6948    if (1) test_sshr_4h_4h_1(TyH);
   6949    if (1) test_sshr_4h_4h_13(TyH);
   6950    if (1) test_sshr_4h_4h_16(TyH);
   6951    if (1) test_sshr_16b_16b_1(TyB);
   6952    if (1) test_sshr_16b_16b_8(TyB);
   6953    if (1) test_sshr_8b_8b_1(TyB);
   6954    if (1) test_sshr_8b_8b_8(TyB);
   6955    if (1) test_ushr_2d_2d_1(TyD);
   6956    if (1) test_ushr_2d_2d_13(TyD);
   6957    if (1) test_ushr_2d_2d_64(TyD);
   6958    if (1) test_ushr_4s_4s_1(TyS);
   6959    if (1) test_ushr_4s_4s_13(TyS);
   6960    if (1) test_ushr_4s_4s_32(TyS);
   6961    if (1) test_ushr_2s_2s_1(TyS);
   6962    if (1) test_ushr_2s_2s_13(TyS);
   6963    if (1) test_ushr_2s_2s_32(TyS);
   6964    if (1) test_ushr_8h_8h_1(TyH);
   6965    if (1) test_ushr_8h_8h_13(TyH);
   6966    if (1) test_ushr_8h_8h_16(TyH);
   6967    if (1) test_ushr_4h_4h_1(TyH);
   6968    if (1) test_ushr_4h_4h_13(TyH);
   6969    if (1) test_ushr_4h_4h_16(TyH);
   6970    if (1) test_ushr_16b_16b_1(TyB);
   6971    if (1) test_ushr_16b_16b_8(TyB);
   6972    if (1) test_ushr_8b_8b_1(TyB);
   6973    if (1) test_ushr_8b_8b_8(TyB);
   6974 
   6975    // ssra (imm)   d
   6976    // usra (imm)   d
   6977    if (1) test_ssra_d_d_1(TyD);
   6978    if (1) test_ssra_d_d_32(TyD);
   6979    if (1) test_ssra_d_d_64(TyD);
   6980    if (1) test_usra_d_d_1(TyD);
   6981    if (1) test_usra_d_d_32(TyD);
   6982    if (1) test_usra_d_d_64(TyD);
   6983 
   6984    // ssra (imm)   2d,4s,2s,8h,4h,16b,8b
   6985    // usra (imm)   2d,4s,2s,8h,4h,16b,8b
   6986    if (1) test_ssra_2d_2d_1(TyD);
   6987    if (1) test_ssra_2d_2d_32(TyD);
   6988    if (1) test_ssra_2d_2d_64(TyD);
   6989    if (1) test_ssra_4s_4s_1(TyS);
   6990    if (1) test_ssra_4s_4s_16(TyS);
   6991    if (1) test_ssra_4s_4s_32(TyS);
   6992    if (1) test_ssra_2s_2s_1(TyS);
   6993    if (1) test_ssra_2s_2s_16(TyS);
   6994    if (1) test_ssra_2s_2s_32(TyS);
   6995    if (1) test_ssra_8h_8h_1(TyH);
   6996    if (1) test_ssra_8h_8h_8(TyH);
   6997    if (1) test_ssra_8h_8h_16(TyH);
   6998    if (1) test_ssra_4h_4h_1(TyH);
   6999    if (1) test_ssra_4h_4h_8(TyH);
   7000    if (1) test_ssra_4h_4h_16(TyH);
   7001    if (1) test_ssra_16b_16b_1(TyB);
   7002    if (1) test_ssra_16b_16b_3(TyB);
   7003    if (1) test_ssra_16b_16b_8(TyB);
   7004    if (1) test_ssra_8b_8b_1(TyB);
   7005    if (1) test_ssra_8b_8b_3(TyB);
   7006    if (1) test_ssra_8b_8b_8(TyB);
   7007    if (1) test_usra_2d_2d_1(TyD);
   7008    if (1) test_usra_2d_2d_32(TyD);
   7009    if (1) test_usra_2d_2d_64(TyD);
   7010    if (1) test_usra_4s_4s_1(TyS);
   7011    if (1) test_usra_4s_4s_16(TyS);
   7012    if (1) test_usra_4s_4s_32(TyS);
   7013    if (1) test_usra_2s_2s_1(TyS);
   7014    if (1) test_usra_2s_2s_16(TyS);
   7015    if (1) test_usra_2s_2s_32(TyS);
   7016    if (1) test_usra_8h_8h_1(TyH);
   7017    if (1) test_usra_8h_8h_8(TyH);
   7018    if (1) test_usra_8h_8h_16(TyH);
   7019    if (1) test_usra_4h_4h_1(TyH);
   7020    if (1) test_usra_4h_4h_8(TyH);
   7021    if (1) test_usra_4h_4h_16(TyH);
   7022    if (1) test_usra_16b_16b_1(TyB);
   7023    if (1) test_usra_16b_16b_3(TyB);
   7024    if (1) test_usra_16b_16b_8(TyB);
   7025    if (1) test_usra_8b_8b_1(TyB);
   7026    if (1) test_usra_8b_8b_3(TyB);
   7027    if (1) test_usra_8b_8b_8(TyB);
   7028 
   7029    // srshl (reg)  d
   7030    // urshl (reg)  d
   7031    if (1) test_srshl_d_d_d(TyD);
   7032    if (1) test_urshl_d_d_d(TyD);
   7033 
   7034    // srshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7035    // urshl (reg)  2d,4s,2s,8h,4h,16b,8b
   7036    if (1) test_srshl_2d_2d_2d(TyD);
   7037    if (1) test_srshl_4s_4s_4s(TyS);
   7038    if (1) test_srshl_2s_2s_2s(TyS);
   7039    if (1) test_srshl_8h_8h_8h(TyH);
   7040    if (1) test_srshl_4h_4h_4h(TyH);
   7041    if (1) test_srshl_16b_16b_16b(TyB);
   7042    if (1) test_srshl_8b_8b_8b(TyB);
   7043    if (1) test_urshl_2d_2d_2d(TyD);
   7044    if (1) test_urshl_4s_4s_4s(TyS);
   7045    if (1) test_urshl_2s_2s_2s(TyS);
   7046    if (1) test_urshl_8h_8h_8h(TyH);
   7047    if (1) test_urshl_4h_4h_4h(TyH);
   7048    if (1) test_urshl_16b_16b_16b(TyB);
   7049    if (1) test_urshl_8b_8b_8b(TyB);
   7050 
   7051    // srshr (imm)  d
   7052    // urshr (imm)  d
   7053    if (1) test_srshr_d_d_1(TyD);
   7054    if (1) test_srshr_d_d_32(TyD);
   7055    if (1) test_srshr_d_d_64(TyD);
   7056    if (1) test_urshr_d_d_1(TyD);
   7057    if (1) test_urshr_d_d_32(TyD);
   7058    if (1) test_urshr_d_d_64(TyD);
   7059 
   7060    // srshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7061    // urshr (imm)  2d,4s,2s,8h,4h,16b,8b
   7062    if (1) test_srshr_2d_2d_1(TyD);
   7063    if (1) test_srshr_2d_2d_32(TyD);
   7064    if (1) test_srshr_2d_2d_64(TyD);
   7065    if (1) test_srshr_4s_4s_1(TyS);
   7066    if (1) test_srshr_4s_4s_16(TyS);
   7067    if (1) test_srshr_4s_4s_32(TyS);
   7068    if (1) test_srshr_2s_2s_1(TyS);
   7069    if (1) test_srshr_2s_2s_16(TyS);
   7070    if (1) test_srshr_2s_2s_32(TyS);
   7071    if (1) test_srshr_8h_8h_1(TyH);
   7072    if (1) test_srshr_8h_8h_8(TyH);
   7073    if (1) test_srshr_8h_8h_16(TyH);
   7074    if (1) test_srshr_4h_4h_1(TyH);
   7075    if (1) test_srshr_4h_4h_8(TyH);
   7076    if (1) test_srshr_4h_4h_16(TyH);
   7077    if (1) test_srshr_16b_16b_1(TyB);
   7078    if (1) test_srshr_16b_16b_3(TyB);
   7079    if (1) test_srshr_16b_16b_8(TyB);
   7080    if (1) test_srshr_8b_8b_1(TyB);
   7081    if (1) test_srshr_8b_8b_3(TyB);
   7082    if (1) test_srshr_8b_8b_8(TyB);
   7083    if (1) test_urshr_2d_2d_1(TyD);
   7084    if (1) test_urshr_2d_2d_32(TyD);
   7085    if (1) test_urshr_2d_2d_64(TyD);
   7086    if (1) test_urshr_4s_4s_1(TyS);
   7087    if (1) test_urshr_4s_4s_16(TyS);
   7088    if (1) test_urshr_4s_4s_32(TyS);
   7089    if (1) test_urshr_2s_2s_1(TyS);
   7090    if (1) test_urshr_2s_2s_16(TyS);
   7091    if (1) test_urshr_2s_2s_32(TyS);
   7092    if (1) test_urshr_8h_8h_1(TyH);
   7093    if (1) test_urshr_8h_8h_8(TyH);
   7094    if (1) test_urshr_8h_8h_16(TyH);
   7095    if (1) test_urshr_4h_4h_1(TyH);
   7096    if (1) test_urshr_4h_4h_8(TyH);
   7097    if (1) test_urshr_4h_4h_16(TyH);
   7098    if (1) test_urshr_16b_16b_1(TyB);
   7099    if (1) test_urshr_16b_16b_3(TyB);
   7100    if (1) test_urshr_16b_16b_8(TyB);
   7101    if (1) test_urshr_8b_8b_1(TyB);
   7102    if (1) test_urshr_8b_8b_3(TyB);
   7103    if (1) test_urshr_8b_8b_8(TyB);
   7104 
   7105    // srsra (imm)  d
   7106    // ursra (imm)  d
   7107    if (1) test_srsra_d_d_1(TyD);
   7108    if (1) test_srsra_d_d_32(TyD);
   7109    if (1) test_srsra_d_d_64(TyD);
   7110    if (1) test_ursra_d_d_1(TyD);
   7111    if (1) test_ursra_d_d_32(TyD);
   7112    if (1) test_ursra_d_d_64(TyD);
   7113 
   7114    // srsra (imm)  2d,4s,2s,8h,4h,16b,8b
   7115    // ursra (imm)  2d,4s,2s,8h,4h,16b,8b
   7116    if (1) test_srsra_2d_2d_1(TyD);
   7117    if (1) test_srsra_2d_2d_32(TyD);
   7118    if (1) test_srsra_2d_2d_64(TyD);
   7119    if (1) test_srsra_4s_4s_1(TyS);
   7120    if (1) test_srsra_4s_4s_16(TyS);
   7121    if (1) test_srsra_4s_4s_32(TyS);
   7122    if (1) test_srsra_2s_2s_1(TyS);
   7123    if (1) test_srsra_2s_2s_16(TyS);
   7124    if (1) test_srsra_2s_2s_32(TyS);
   7125    if (1) test_srsra_8h_8h_1(TyH);
   7126    if (1) test_srsra_8h_8h_8(TyH);
   7127    if (1) test_srsra_8h_8h_16(TyH);
   7128    if (1) test_srsra_4h_4h_1(TyH);
   7129    if (1) test_srsra_4h_4h_8(TyH);
   7130    if (1) test_srsra_4h_4h_16(TyH);
   7131    if (1) test_srsra_16b_16b_1(TyB);
   7132    if (1) test_srsra_16b_16b_3(TyB);
   7133    if (1) test_srsra_16b_16b_8(TyB);
   7134    if (1) test_srsra_8b_8b_1(TyB);
   7135    if (1) test_srsra_8b_8b_3(TyB);
   7136    if (1) test_srsra_8b_8b_8(TyB);
   7137    if (1) test_ursra_2d_2d_1(TyD);
   7138    if (1) test_ursra_2d_2d_32(TyD);
   7139    if (1) test_ursra_2d_2d_64(TyD);
   7140    if (1) test_ursra_4s_4s_1(TyS);
   7141    if (1) test_ursra_4s_4s_16(TyS);
   7142    if (1) test_ursra_4s_4s_32(TyS);
   7143    if (1) test_ursra_2s_2s_1(TyS);
   7144    if (1) test_ursra_2s_2s_16(TyS);
   7145    if (1) test_ursra_2s_2s_32(TyS);
   7146    if (1) test_ursra_8h_8h_1(TyH);
   7147    if (1) test_ursra_8h_8h_8(TyH);
   7148    if (1) test_ursra_8h_8h_16(TyH);
   7149    if (1) test_ursra_4h_4h_1(TyH);
   7150    if (1) test_ursra_4h_4h_8(TyH);
   7151    if (1) test_ursra_4h_4h_16(TyH);
   7152    if (1) test_ursra_16b_16b_1(TyB);
   7153    if (1) test_ursra_16b_16b_3(TyB);
   7154    if (1) test_ursra_16b_16b_8(TyB);
   7155    if (1) test_ursra_8b_8b_1(TyB);
   7156    if (1) test_ursra_8b_8b_3(TyB);
   7157    if (1) test_ursra_8b_8b_8(TyB);
   7158 
   7159    // sshll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7160    // ushll{2} (imm)  2d_2s/4s, 4s_4h/8h, 8h_8b/16b
   7161    if (1) test_sshll_2d_2s_0(TyS);
   7162    if (1) test_sshll_2d_2s_15(TyS);
   7163    if (1) test_sshll_2d_2s_31(TyS);
   7164    if (1) test_sshll2_2d_4s_0(TyS);
   7165    if (1) test_sshll2_2d_4s_15(TyS);
   7166    if (1) test_sshll2_2d_4s_31(TyS);
   7167    if (1) test_sshll_4s_4h_0(TyH);
   7168    if (1) test_sshll_4s_4h_7(TyH);
   7169    if (1) test_sshll_4s_4h_15(TyH);
   7170    if (1) test_sshll2_4s_8h_0(TyH);
   7171    if (1) test_sshll2_4s_8h_7(TyH);
   7172    if (1) test_sshll2_4s_8h_15(TyH);
   7173    if (1) test_sshll_8h_8b_0(TyB);
   7174    if (1) test_sshll_8h_8b_3(TyB);
   7175    if (1) test_sshll_8h_8b_7(TyB);
   7176    if (1) test_sshll2_8h_16b_0(TyB);
   7177    if (1) test_sshll2_8h_16b_3(TyB);
   7178    if (1) test_sshll2_8h_16b_7(TyB);
   7179    if (1) test_ushll_2d_2s_0(TyS);
   7180    if (1) test_ushll_2d_2s_15(TyS);
   7181    if (1) test_ushll_2d_2s_31(TyS);
   7182    if (1) test_ushll2_2d_4s_0(TyS);
   7183    if (1) test_ushll2_2d_4s_15(TyS);
   7184    if (1) test_ushll2_2d_4s_31(TyS);
   7185    if (1) test_ushll_4s_4h_0(TyH);
   7186    if (1) test_ushll_4s_4h_7(TyH);
   7187    if (1) test_ushll_4s_4h_15(TyH);
   7188    if (1) test_ushll2_4s_8h_0(TyH);
   7189    if (1) test_ushll2_4s_8h_7(TyH);
   7190    if (1) test_ushll2_4s_8h_15(TyH);
   7191    if (1) test_ushll_8h_8b_0(TyB);
   7192    if (1) test_ushll_8h_8b_3(TyB);
   7193    if (1) test_ushll_8h_8b_7(TyB);
   7194    if (1) test_ushll2_8h_16b_0(TyB);
   7195    if (1) test_ushll2_8h_16b_3(TyB);
   7196    if (1) test_ushll2_8h_16b_7(TyB);
   7197 
   7198    // suqadd  d,s,h,b
   7199    // usqadd  d,s,h,b
   7200    if (1) test_suqadd_d_d(TyD);
   7201    if (1) test_suqadd_s_s(TyS);
   7202    if (1) test_suqadd_h_h(TyH);
   7203    if (1) test_suqadd_b_b(TyB);
   7204    if (1) test_usqadd_d_d(TyD);
   7205    if (1) test_usqadd_s_s(TyS);
   7206    if (1) test_usqadd_h_h(TyH);
   7207    if (1) test_usqadd_b_b(TyB);
   7208 
   7209    // suqadd  2d,4s,2s,8h,4h,16b,8b
   7210    // usqadd  2d,4s,2s,8h,4h,16b,8b
   7211    if (1) test_suqadd_2d_2d(TyD);
   7212    if (1) test_suqadd_4s_4s(TyS);
   7213    if (1) test_suqadd_2s_2s(TyS);
   7214    if (1) test_suqadd_8h_8h(TyH);
   7215    if (1) test_suqadd_4h_4h(TyH);
   7216    if (1) test_suqadd_16b_16b(TyB);
   7217    if (1) test_suqadd_8b_8b(TyB);
   7218    if (1) test_usqadd_2d_2d(TyD);
   7219    if (1) test_usqadd_4s_4s(TyS);
   7220    if (1) test_usqadd_2s_2s(TyS);
   7221    if (1) test_usqadd_8h_8h(TyH);
   7222    if (1) test_usqadd_4h_4h(TyH);
   7223    if (1) test_usqadd_16b_16b(TyB);
   7224    if (1) test_usqadd_8b_8b(TyB);
   7225 
   7226    // tbl     8b_{16b}_8b, 16b_{16b}_16b
   7227    // tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7228    // tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7229    // tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7230    if (1) test_tbl_16b_1reg(TyB);
   7231    if (1) test_tbl_16b_2reg(TyB);
   7232    if (1) test_tbl_16b_3reg(TyB);
   7233    if (1) test_tbl_16b_4reg(TyB);
   7234    if (1) test_tbl_8b_1reg(TyB);
   7235    if (1) test_tbl_8b_2reg(TyB);
   7236    if (1) test_tbl_8b_3reg(TyB);
   7237    if (1) test_tbl_8b_4reg(TyB);
   7238 
   7239    // tbx     8b_{16b}_8b, 16b_{16b}_16b
   7240    // tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   7241    // tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   7242    // tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   7243    if (1) test_tbx_16b_1reg(TyB);
   7244    if (1) test_tbx_16b_2reg(TyB);
   7245    if (1) test_tbx_16b_3reg(TyB);
   7246    if (1) test_tbx_16b_4reg(TyB);
   7247    if (1) test_tbx_8b_1reg(TyB);
   7248    if (1) test_tbx_8b_2reg(TyB);
   7249    if (1) test_tbx_8b_3reg(TyB);
   7250    if (1) test_tbx_8b_4reg(TyB);
   7251 
   7252    // trn1    2d,4s,2s,8h,4h,16b,8b
   7253    // trn2    2d,4s,2s,8h,4h,16b,8b
   7254    if (1) test_trn1_2d_2d_2d(TyD);
   7255    if (1) test_trn1_4s_4s_4s(TyS);
   7256    if (1) test_trn1_2s_2s_2s(TyS);
   7257    if (1) test_trn1_8h_8h_8h(TyH);
   7258    if (1) test_trn1_4h_4h_4h(TyH);
   7259    if (1) test_trn1_16b_16b_16b(TyB);
   7260    if (1) test_trn1_8b_8b_8b(TyB);
   7261    if (1) test_trn2_2d_2d_2d(TyD);
   7262    if (1) test_trn2_4s_4s_4s(TyS);
   7263    if (1) test_trn2_2s_2s_2s(TyS);
   7264    if (1) test_trn2_8h_8h_8h(TyH);
   7265    if (1) test_trn2_4h_4h_4h(TyH);
   7266    if (1) test_trn2_16b_16b_16b(TyB);
   7267    if (1) test_trn2_8b_8b_8b(TyB);
   7268 
   7269    // urecpe      4s,2s
   7270    // ursqrte     4s,2s
   7271    if (1) test_urecpe_4s_4s(TyS);
   7272    if (1) test_urecpe_2s_2s(TyS);
   7273    if (1) test_ursqrte_4s_4s(TyS);
   7274    if (1) test_ursqrte_2s_2s(TyS);
   7275 
   7276    // uzp1      2d,4s,2s,8h,4h,16b,8b
   7277    // uzp2      2d,4s,2s,8h,4h,16b,8b
   7278    // zip1      2d,4s,2s,8h,4h,16b,8b
   7279    // zip2      2d,4s,2s,8h,4h,16b,8b
   7280    if (1) test_uzp1_2d_2d_2d(TyD);
   7281    if (1) test_uzp1_4s_4s_4s(TyS);
   7282    if (1) test_uzp1_2s_2s_2s(TyS);
   7283    if (1) test_uzp1_8h_8h_8h(TyH);
   7284    if (1) test_uzp1_4h_4h_4h(TyH);
   7285    if (1) test_uzp1_16b_16b_16b(TyB);
   7286    if (1) test_uzp1_8b_8b_8b(TyB);
   7287    if (1) test_uzp2_2d_2d_2d(TyD);
   7288    if (1) test_uzp2_4s_4s_4s(TyS);
   7289    if (1) test_uzp2_2s_2s_2s(TyS);
   7290    if (1) test_uzp2_8h_8h_8h(TyH);
   7291    if (1) test_uzp2_4h_4h_4h(TyH);
   7292    if (1) test_uzp2_16b_16b_16b(TyB);
   7293    if (1) test_uzp2_8b_8b_8b(TyB);
   7294    if (1) test_zip1_2d_2d_2d(TyD);
   7295    if (1) test_zip1_4s_4s_4s(TyS);
   7296    if (1) test_zip1_2s_2s_2s(TyS);
   7297    if (1) test_zip1_8h_8h_8h(TyH);
   7298    if (1) test_zip1_4h_4h_4h(TyH);
   7299    if (1) test_zip1_16b_16b_16b(TyB);
   7300    if (1) test_zip1_8b_8b_8b(TyB);
   7301    if (1) test_zip2_2d_2d_2d(TyD);
   7302    if (1) test_zip2_4s_4s_4s(TyS);
   7303    if (1) test_zip2_2s_2s_2s(TyS);
   7304    if (1) test_zip2_8h_8h_8h(TyH);
   7305    if (1) test_zip2_4h_4h_4h(TyH);
   7306    if (1) test_zip2_16b_16b_16b(TyB);
   7307    if (1) test_zip2_8b_8b_8b(TyB);
   7308 
   7309    // xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7310    if (1) test_xtn_2s_2d(TyD);
   7311    if (1) test_xtn2_4s_2d(TyD);
   7312    if (1) test_xtn_4h_4s(TyS);
   7313    if (1) test_xtn2_8h_4s(TyS);
   7314    if (1) test_xtn_8b_8h(TyH);
   7315    if (1) test_xtn2_16b_8h(TyH);
   7316 
   7317    // ======================== MEM ========================
   7318 
   7319    // All the SIMD and FP memory tests are in none/tests/arm64/memory.c.
   7320 
   7321    // ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7322    // ld1  (single 1-element structure to one lane of 1 reg)
   7323    // ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7324 
   7325    // ld2  (multiple 2-element structures to 2 regs)
   7326    // ld2  (single 2-element structure to one lane of 2 regs)
   7327    // ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7328 
   7329    // ld3  (multiple 3-element structures to 3 regs)
   7330    // ld3  (single 3-element structure to one lane of 3 regs)
   7331    // ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7332 
   7333    // ld4  (multiple 4-element structures to 4 regs)
   7334    // ld4  (single 4-element structure to one lane of 4 regs)
   7335    // ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7336 
   7337    // ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7338    //       addr = reg + uimm7 * reg_size
   7339 
   7340    // ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7341    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7342 
   7343    // ldr   q,d,s,h,b from addr
   7344    //       addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7345 
   7346    // ldr   q,d,s from  pc+#imm19
   7347 
   7348    // ldr   q,d,s,h,b from addr
   7349    //       addr = [Xn|SP, R <extend> <shift]
   7350 
   7351    // ldur  q,d,s,h,b from addr
   7352    //       addr = [Xn|SP,#imm] (unscaled offset)
   7353 
   7354    // st1 (multiple 1-element structures from 1/2/3/4 regs)
   7355    // st1 (single 1-element structure for 1 lane of 1 reg)
   7356 
   7357    // st2 (multiple 2-element structures from 2 regs)
   7358    // st2 (single 2-element structure from 1 lane of 2 regs)
   7359 
   7360    // st3 (multiple 3-element structures from 3 regs)
   7361    // st3 (single 3-element structure from 1 lane of 3 regs)
   7362 
   7363    // st4 (multiple 4-element structures from 4 regs)
   7364    // st4 (single 4-element structure from one lane of 4 regs)
   7365 
   7366    // stnp q_q_addr, d_d_addr, s_s_addr
   7367    //      addr = [Xn|SP, #imm]
   7368 
   7369    // stp  q_q_addr, d_d_addr, s_s_addr
   7370    //      addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   7371 
   7372    // str  q,d,s,h,b_addr
   7373    //      addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   7374 
   7375    // str   q,d,s,h,b_addr
   7376    //       addr = [Xn|SP, R <extend> <shift]
   7377 
   7378    // stur  q,d,s,h,b_addr
   7379    //       addr = [Xn|SP,#imm] (unscaled offset)
   7380 
   7381    // ======================== CRYPTO ========================
   7382 
   7383    // aesd       16b (aes single round decryption)
   7384    // aese       16b (aes single round encryption)
   7385    // aesimc     16b (aes inverse mix columns)
   7386    // aesmc      16b (aes mix columns)
   7387    if (1) DO50( test_aesd_16b_16b(TyNONE) );
   7388    if (1) DO50( test_aese_16b_16b(TyNONE) );
   7389    if (1) DO50( test_aesimc_16b_16b(TyNONE) );
   7390    if (1) DO50( test_aesmc_16b_16b(TyNONE) );
   7391 
   7392    // sha1c      q_s_4s
   7393    // sha1h      s_s
   7394    // sha1m      q_s_4s
   7395    // sha1p      q_s_4s
   7396    // sha1su0    4s_4s_4s
   7397    // sha1su1    4s_4s
   7398    if (1) DO50( test_sha1c_q_s_4s(TyNONE) );
   7399    if (1) DO50( test_sha1h_s_s(TyNONE) );
   7400    if (1) DO50( test_sha1m_q_s_4s(TyNONE) );
   7401    if (1) DO50( test_sha1p_q_s_4s(TyNONE) );
   7402    if (1) DO50( test_sha1su0_4s_4s_4s(TyNONE) );
   7403    if (1) DO50( test_sha1su1_4s_4s(TyNONE) );
   7404 
   7405    // sha256h2   q_q_4s
   7406    // sha256h    q_q_4s
   7407    // sha256su0  4s_4s
   7408    // sha256su1  4s_4s_4s
   7409    if (1) DO50( test_sha256h2_q_q_4s(TyNONE) );
   7410    if (1) DO50( test_sha256h_q_q_4s(TyNONE) );
   7411    if (1) DO50( test_sha256su0_4s_4s(TyNONE) );
   7412    if (1) DO50( test_sha256su1_4s_4s_4s(TyNONE) );
   7413 
   7414    // pmull{2} 1q_1d_1d,1q_2d_2d
   7415    if (1) test_pmull_1q_1d_1d(TyD);
   7416    if (1) test_pmull2_1q_2d_2d(TyD);
   7417 
   7418 return 0;
   7419 }
   7420 
   7421 
   7422 /* ---------------------------------------------------------------- */
   7423 /* -- Alphabetical list of insns                                 -- */
   7424 /* ---------------------------------------------------------------- */
   7425 /*
   7426    abs      d
   7427    abs      2d,4s,2s,8h,4h,16b,8b
   7428    add      d
   7429    add      2d,4s,2s,8h,4h,16b,8b
   7430    addhn    2s.2d.2d, 4s.2d.2d, h_from_s and b_from_h (add and get high half)
   7431    addp     d (add pairs, across)
   7432    addp     2d,4s,2s,8h,4h,16b,8b
   7433    addv     4s,8h,4h,16b,18b (reduce across vector)
   7434    aesd     16b (aes single round decryption)
   7435    aese     16b (aes single round encryption)
   7436    aesimc   16b (aes inverse mix columns)
   7437    aesmc    16b (aes mix columns)
   7438    and      16b,8b
   7439 
   7440    bic      4s,2s,8h,4h (vector, imm)
   7441    also movi, mvni, orr
   7442 
   7443    bic      16b,8b (vector,reg) (bit clear)
   7444    bif      16b,8b (vector) (bit insert if false)
   7445    bit      16b,8b (vector) (bit insert if true)
   7446    bsl      16b,8b (vector) (bit select)
   7447 
   7448    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   7449    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   7450 
   7451    cmeq     d
   7452    cmeq     2d,4s,2s,8h,4h,16b,8b
   7453    cmeq_z   d
   7454    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   7455 
   7456    cmge     d
   7457    cmge     2d,4s,2s,8h,4h,16b,8b
   7458    cmge_z   d
   7459    cmge_z   2d,4s,2s,8h,4h,16b,8b
   7460 
   7461    cmgt     d
   7462    cmgt     2d,4s,2s,8h,4h,16b,8b
   7463    cmgt_z   d
   7464    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   7465 
   7466    cmhi     d
   7467    cmhi     2d,4s,2s,8h,4h,16b,8b
   7468 
   7469    cmhs     d
   7470    cmhs     2d,4s,2s,8h,4h,16b,8b
   7471 
   7472    cmle_z   d
   7473    cmle_z   2d,4s,2s,8h,4h,16b,8b
   7474 
   7475    cmlt_z   d
   7476    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   7477 
   7478    cmtst    d
   7479    cmtst    2d,4s,2s,8h,4h,16b,8b
   7480 
   7481    cnt      16b,8b (population count per byte)
   7482 
   7483    dup      d,s,h,b (vec elem to scalar)
   7484    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   7485    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   7486 
   7487    eor      16b,8b (vector)
   7488    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   7489 
   7490    fabd     d,s
   7491    fabd     2d,4s,2s
   7492 
   7493    fabs     d,s
   7494    fabs     2d,4s,2s
   7495 
   7496    facge    s,d  (floating abs compare GE)
   7497    facge    2d,4s,2s
   7498 
   7499    facgt    s,d  (floating abs compare GE)
   7500    facgt    2d,4s,2s
   7501 
   7502    fadd     d,s
   7503    fadd     2d,4s,2s
   7504 
   7505    faddp    d,s (floating add pair)
   7506    faddp    2d,4s,2s
   7507 
   7508    fccmp    d,s (floating point conditional quiet compare)
   7509    fccmpe   d,s (floating point conditional signaling compare)
   7510 
   7511    fcmeq    d,s
   7512    fcmeq    2d,4s,2s
   7513    fcmeq_z  d,s
   7514    fcmeq_z  2d,4s,2s
   7515 
   7516    fcmge    d,s
   7517    fcmge    2d,4s,2s
   7518    fcmge_z  d,s
   7519    fcmge_z  2d,4s,2s
   7520 
   7521    fcmgt    d,s
   7522    fcmgt    2d,4s,2s
   7523    fcmgt_z  d,s
   7524    fcmgt_z  2d,4s,2s
   7525 
   7526    fcmle_z  d,s
   7527    fcmle_z  2d,4s,2s
   7528 
   7529    fcmlt_z  d,s
   7530    fcmlt_z  2d,4s,2s
   7531 
   7532    fcmp     d,s (floating point quiet, set flags)
   7533    fcmp_z   d,s
   7534    fcmpe    d,s (floating point signaling, set flags)
   7535    fcmpe_z  d,s
   7536 
   7537    fcsel    d,s (fp cond select)
   7538 
   7539    fcvt     s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   7540 
   7541    fcvtas   d,s  (fcvt to signed int, nearest, ties away)
   7542    fcvtas   2d,4s,2s
   7543    fcvtas   w_s,x_s,w_d,x_d
   7544 
   7545    fcvtau   d,s  (fcvt to unsigned int, nearest, ties away)
   7546    fcvtau   2d,4s,2s
   7547    fcvtau   w_s,x_s,w_d,x_d
   7548 
   7549    fcvtl{2} 4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   7550 
   7551    fcvtms   d,s  (fcvt to signed int, minus inf)
   7552    fcvtms   2d,4s,2s
   7553    fcvtms   w_s,x_s,w_d,x_d
   7554 
   7555    fcvtmu   d,s  (fcvt to unsigned int, minus inf)
   7556    fcvtmu   2d,4s,2s
   7557    fcvtmu   w_s,x_s,w_d,x_d
   7558 
   7559    fcvtn{2} 4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   7560 
   7561    fcvtns   d,s  (fcvt to signed int, nearest)
   7562    fcvtns   2d,4s,2s
   7563    fcvtns   w_s,x_s,w_d,x_d
   7564 
   7565    fcvtnu   d,s  (fcvt to unsigned int, nearest)
   7566    fcvtnu   2d,4s,2s
   7567    fcvtnu   w_s,x_s,w_d,x_d
   7568 
   7569    fcvtps   d,s  (fcvt to signed int, plus inf)
   7570    fcvtps   2d,4s,2s
   7571    fcvtps   w_s,x_s,w_d,x_d
   7572 
   7573    fcvtpu   d,s  (fcvt to unsigned int, plus inf)
   7574    fcvtpu   2d,4s,2s
   7575    fcvtpu   w_s,x_s,w_d,x_d
   7576 
   7577    fcvtxn   s_d (fcvt to lower prec narrow, rounding to odd)
   7578    fcvtxn   2s_2d,4s_2d
   7579 
   7580    fcvtzs   s,d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7581    fcvtzs   2d,4s,2s
   7582 
   7583    fcvtzs   s,d (fcvt to signed integer, to zero)
   7584    fcvtzs   2d,4s,2s
   7585 
   7586    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed fixedpt, to zero) (w/ #fbits)
   7587 
   7588    fcvtzs   w_s,x_s,w_d,x_d (fcvt to signed integer, to zero)
   7589 
   7590    fcvtzu   s,d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7591    fcvtzu   2d,4s,2s
   7592 
   7593    fcvtzu   s,d (fcvt to unsigned integer, to zero)
   7594    fcvtzu   2d,4s,2s
   7595 
   7596    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   7597 
   7598    fcvtzu   w_s,x_s,w_d,x_d (fcvt to unsigned integer, to zero)
   7599 
   7600    fdiv     d,s
   7601    fdiv     2d,4s,2s
   7602 
   7603    fmadd    d,s
   7604    fnmadd   d,s
   7605    fnmsub   d,s
   7606    fnmul    d,s
   7607 
   7608    fmax     d,s
   7609    fmin     d,s
   7610 
   7611    fmax     2d,4s,2s
   7612    fmin     2d,4s,2s
   7613 
   7614    fmaxnm   d,s ("max number")
   7615    fminnm   d,s
   7616 
   7617    fmaxnm   2d,4s,2s
   7618    fminnm   2d,4s,2s
   7619 
   7620    fmaxnmp  d_2d,s_2s ("max number pairwise")
   7621    fminnmp  d_2d,s_2s
   7622 
   7623    fmaxnmp  2d,4s,2s
   7624    fminnmp  2d,4s,2s
   7625 
   7626    fmaxnmv  s_4s (maxnum across vector)
   7627    fminnmv  s_4s
   7628 
   7629    fmaxp    d_2d,s_2s (max of a pair)
   7630    fminp    d_2d,s_2s (max of a pair)
   7631 
   7632    fmaxp    2d,4s,2s  (max pairwise)
   7633    fminp    2d,4s,2s
   7634 
   7635    fmaxv    s_4s (max across vector)
   7636    fminv    s_4s
   7637 
   7638    fmla     d_d_d[],s_s_s[] (by element)
   7639    fmla     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7640 
   7641    fmla     2d,4s,2s
   7642 
   7643    fmls     d_d_d[],s_s_s[] (by element)
   7644    fmls     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7645 
   7646    fmls     2d,4s,2s
   7647 
   7648    fmov     2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   7649 
   7650    fmov     d_d,s_s
   7651 
   7652    fmov     s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   7653 
   7654    fmov     d,s #imm
   7655 
   7656    fmsub    d,s
   7657 
   7658    fmul     d_d_d[],s_s_s[]
   7659    fmul     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7660 
   7661    fmul     2d,4s,2s
   7662    fmul     d,s
   7663 
   7664    fmulx    d_d_d[],s_s_s[]
   7665    fmulx    2d_2d_d[],4s_4s_s[],2s_2s_s[]
   7666 
   7667    fmulx    d,s
   7668    fmulx    2d,4s,2s
   7669 
   7670    fneg     d,s
   7671    fneg     2d,4s,2s
   7672 
   7673    frecpe   d,s (recip estimate)
   7674    frecpe   2d,4s,2s
   7675 
   7676    frecps   d,s (recip step)
   7677    frecps   2d,4s,2s
   7678 
   7679    frecpx   d,s (recip exponent)
   7680 
   7681    frinta   2d,4s,2s (round to integral, nearest away)
   7682    frinta   d,s
   7683 
   7684    frinti   2d,4s,2s (round to integral, per FPCR)
   7685    frinti   d,s
   7686 
   7687    frintm   2d,4s,2s (round to integral, minus inf)
   7688    frintm   d,s
   7689 
   7690    frintn   2d,4s,2s (round to integral, nearest, to even)
   7691    frintn   d,s
   7692 
   7693    frintp   2d,4s,2s (round to integral, plus inf)
   7694    frintp   d,s
   7695 
   7696    frintx   2d,4s,2s (round to integral exact, per FPCR)
   7697    frintx   d,s
   7698 
   7699    frintz   2d,4s,2s (round to integral, zero)
   7700    frintz   d,s
   7701 
   7702    frsqrte  d,s (est)
   7703    frsqrte  2d,4s,2s
   7704 
   7705    frsqrts  d,s (step)
   7706    frsqrts  2d,4s,2s
   7707 
   7708    fsqrt    d,s
   7709    fsqrt    2d,4s,2s
   7710 
   7711    fsub     d,s
   7712    fsub     2d,4s,2s
   7713 
   7714    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   7715 
   7716    ins      d[]_x, s[]_w, h[]_w, b[]_w
   7717 
   7718    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   7719    ld1  (single 1-element structure to one lane of 1 reg)
   7720    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   7721 
   7722    ld2  (multiple 2-element structures to 2 regs)
   7723    ld2  (single 2-element structure to one lane of 2 regs)
   7724    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   7725 
   7726    ld3  (multiple 3-element structures to 3 regs)
   7727    ld3  (single 3-element structure to one lane of 3 regs)
   7728    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   7729 
   7730    ld4  (multiple 4-element structures to 4 regs)
   7731    ld4  (single 4-element structure to one lane of 4 regs)
   7732    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   7733 
   7734    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   7735          addr = reg + uimm7 * reg_size
   7736 
   7737    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   7738          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7739 
   7740    ldr   q,d,s,h,b from addr
   7741          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   7742 
   7743    ldr   q,d,s from  pc+#imm19
   7744 
   7745    ldr   q,d,s,h,b from addr
   7746          addr = [Xn|SP, R <extend> <shift]
   7747 
   7748    ldur  q,d,s,h,b from addr
   7749          addr = [Xn|SP,#imm] (unscaled offset)
   7750 
   7751    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7752    mla   4s,2s,8h,4h,16b,8b
   7753 
   7754    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7755    mls   4s,2s,8h,4h,16b,8b
   7756 
   7757    movi  16b,8b   #imm8, LSL #0
   7758    movi  8h,4h    #imm8, LSL #0 or 8
   7759    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   7760    movi  4s,2s    #imm8, MSL #8 or 16
   7761    movi  d,       #imm64
   7762    movi  2d,      #imm64
   7763 
   7764    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   7765    mul   4s,2s,8h,4h,16b,8b
   7766 
   7767    mvni  8h,4h    #imm8, LSL #0 or 8
   7768    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   7769    mvni  4s,2s    #imm8, MSL #8 or 16
   7770 
   7771    neg   d
   7772    neg   2d,4s,2s,8h,4h,16b,8b
   7773 
   7774    not   16b,8b
   7775 
   7776    orn   16b,8b
   7777 
   7778    orr   8h,4h   #imm8, LSL #0 or 8
   7779    orr   4s,2s   #imm8, LSL #0, 8, 16 or 24
   7780 
   7781    orr   16b,8b
   7782 
   7783    pmul  16b,8b
   7784 
   7785    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   7786 
   7787    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7788 
   7789    rbit    16b,8b
   7790    rev16   16b,8b
   7791    rev32   16b,8b,8h,4h
   7792    rev64   16b,8b,8h,4h,4s,2s
   7793 
   7794    rshrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7795 
   7796    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   7797 
   7798    saba      16b,8b,8h,4h,4s,2s
   7799    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7800 
   7801    sabd      16b,8b,8h,4h,4s,2s
   7802    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7803 
   7804    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7805 
   7806    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7807 
   7808    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   7809 
   7810    saddlv    h_16b/8b, s_8h/4h, d_4s
   7811 
   7812    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7813 
   7814    scvtf     d,s        _#fbits
   7815    scvtf     2d,4s,2s   _#fbits
   7816 
   7817    scvtf     d,s
   7818    scvtf     2d,4s,2s
   7819 
   7820    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   7821    scvtf     s_w, d_w, s_x, d_x
   7822 
   7823    sha1c       q_s_4s
   7824    sha1h       s_s
   7825    sha1m       q_s_4s
   7826    sha1p       q_s_4s
   7827    sha1su0     4s_4s_4s
   7828    sha1su1     4s_4s
   7829    sha256h2    q_q_4s
   7830    sha256h     q_q_4s
   7831    sha256su0   4s_4s
   7832    sha256su1   4s_4s_4s
   7833 
   7834    shadd       16b,8b,8h,4h,4s,2s
   7835 
   7836    shl         d_#imm
   7837    shl         16b,8b,8h,4h,4s,2s,2d  _#imm
   7838 
   7839    shll{2}   8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   7840 
   7841    shrn{2}  2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   7842 
   7843    shsub       16b,8b,8h,4h,4s,2s
   7844 
   7845    sli         d_#imm
   7846    sli         2d,4s,2s,8h,4h,16b,8b  _#imm
   7847 
   7848    smax        4s,2s,8h,4h,16b,8b
   7849 
   7850    smaxp       4s,2s,8h,4h,16b,8b
   7851 
   7852    smaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   7853 
   7854    smin        4s,2s,8h,4h,16b,8b
   7855 
   7856    sminp       4s,2s,8h,4h,16b,8b
   7857 
   7858    sminv       s_4s,h_8h,h_4h,b_16b,b_8b
   7859 
   7860    smlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7861    smlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7862 
   7863    smlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   7864    smlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7865 
   7866    smov        w_b[], w_h[], x_b[], x_h[], x_s[]
   7867 
   7868    smull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   7869    smull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7870 
   7871    sqabs       d,s,h,b
   7872    sqabs       2d,4s,2s,8h,4h,16b,8b
   7873 
   7874    sqadd       d,s,h,b
   7875    sqadd       2d,4s,2s,8h,4h,16b,8b
   7876 
   7877    sqdmlal     d_s_s[], s_h_h[]
   7878    sqdmlal{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7879 
   7880    sqdmlal     d_s_s, s_h_h
   7881    sqdmlal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7882 
   7883    sqdmlsl     d_s_s[], s_h_h[]
   7884    sqdmlsl{2}  2d_2s/4s_s[], 4s_4h/8h_h[]
   7885 
   7886    sqdmlsl     d_s_s, s_h_h
   7887    sqdmlsl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7888 
   7889    sqdmulh     s_s_s[], h_h_h[]
   7890    sqdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7891 
   7892    sqdmulh     h,s
   7893    sqdmulh     4s,2s,8h,4h
   7894 
   7895    sqdmull     d_s_s[], s_h_h[]
   7896    sqdmull{2}  2d_2s/4s_s[], 4s_4h/2h_h[]
   7897 
   7898    sqdmull     d_s_s,s_h_h
   7899    sqdmull{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   7900 
   7901    sqneg       d,s,h,b
   7902    sqneg       2d,4s,2s,8h,4h,16b,8b
   7903 
   7904    sqrdmulh    s_s_s[], h_h_h[]
   7905    sqrdmulh    4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   7906 
   7907    sqrdmulh    h,s
   7908    sqrdmulh    4s,2s,8h,4h
   7909 
   7910    sqrshl      d,s,h,b
   7911    sqrshl      2d,4s,2s,8h,4h,16b,8b
   7912 
   7913    sqrshrn     s_d, h_s, b_h   #imm
   7914    sqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7915 
   7916    sqrshrun     s_d, h_s, b_h   #imm
   7917    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7918 
   7919    sqshl        d,s,h,b   _#imm
   7920    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   7921 
   7922    sqshl        d,s,h,b
   7923    sqshl        2d,4s,2s,8h,4h,16b,8b
   7924 
   7925    sqshlu       d,s,h,b  _#imm
   7926    sqshlu       2d,4s,2s,8h,4h,16b,8b  _#imm
   7927 
   7928    sqshrn       s_d, h_s, b_h   #imm
   7929    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7930 
   7931    sqshrun      s_d, h_s, b_h   #imm
   7932    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   7933 
   7934    sqsub       d,s,h,b
   7935    sqsub       2d,4s,2s,8h,4h,16b,8b
   7936 
   7937    sqxtn       s_d,h_s,b_h
   7938    sqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7939 
   7940    sqxtun      s_d,h_s,b_h
   7941    sqxtun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   7942 
   7943    srhadd      4s,2s,8h,4h,16b,8b
   7944 
   7945    sri         d_#imm
   7946    sri         2d,4s,2s,8h,4h,16b,8b  _#imm
   7947 
   7948    srshl (reg) d
   7949    srshl       2d,4s,2s,8h,4h,16b,8b
   7950 
   7951    srshr (imm) d
   7952    srshr       2d,4s,2s,8h,4h,16b,8b
   7953 
   7954    srsra (imm) d
   7955    srsra       2d,4s,2s,8h,4h,16b,8b
   7956 
   7957    sshl (reg)  d
   7958    sshl        2d,4s,2s,8h,4h,16b,8b
   7959 
   7960    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   7961 
   7962    sshr (imm)  d
   7963    sshr        2d,4s,2s,8h,4h,16b,8b
   7964 
   7965    ssra (imm)  d
   7966    ssra        2d,4s,2s,8h,4h,16b,8b
   7967 
   7968    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   7969 
   7970    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   7971 
   7972    st1 (multiple 1-element structures from 1/2/3/4 regs)
   7973    st1 (single 1-element structure for 1 lane of 1 reg)
   7974 
   7975    st2 (multiple 2-element structures from 2 regs)
   7976    st2 (single 2-element structure from 1 lane of 2 regs)
   7977 
   7978    st3 (multiple 3-element structures from 3 regs)
   7979    st3 (single 3-element structure from 1 lane of 3 regs)
   7980 
   7981    st4 (multiple 4-element structures from 4 regs)
   7982    st4 (single 4-element structure from one lane of 4 regs)
   7983 
   7984    stnp q_q_addr, d_d_addr, s_s_addr
   7985         addr = [Xn|SP, #imm]
   7986 
   7987    stp  q_q_addr, d_d_addr, s_s_addr
   7988         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   7989 
   7990    str  q,d,s,h,b_addr
   7991         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   7992 
   7993    str   q,d,s,h,b_addr
   7994          addr = [Xn|SP, R <extend> <shift]
   7995 
   7996    stur  q,d,s,h,b_addr
   7997          addr = [Xn|SP,#imm] (unscaled offset)
   7998 
   7999    sub   d
   8000    sub   2d,4s,2s,8h,4h,16b,8b
   8001 
   8002    subhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8003 
   8004    suqadd  d,s,h,b
   8005    suqadd  2d,4s,2s,8h,4h,16b,8b
   8006 
   8007    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8008    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8009    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8010    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8011 
   8012    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8013    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8014    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8015    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8016 
   8017    trn1    2d,4s,2s,8h,4h,16b,8b
   8018    trn2    2d,4s,2s,8h,4h,16b,8b
   8019 
   8020    uaba      16b,8b,8h,4h,4s,2s
   8021    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8022 
   8023    uabd      16b,8b,8h,4h,4s,2s
   8024    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8025 
   8026    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8027 
   8028    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8029 
   8030    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8031 
   8032    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8033 
   8034    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8035 
   8036    ucvtf     d,s        _#fbits
   8037    ucvtf     2d,4s,2s   _#fbits
   8038 
   8039    ucvtf     d,s
   8040    ucvtf     2d,4s,2s
   8041 
   8042    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8043    ucvtf     s_w, d_w, s_x, d_x
   8044 
   8045    uhadd       16b,8b,8h,4h,4s,2s
   8046 
   8047    uhsub       16b,8b,8h,4h,4s,2s
   8048 
   8049    umax        4s,2s,8h,4h,16b,8b
   8050 
   8051    umaxp       4s,2s,8h,4h,16b,8b
   8052 
   8053    umaxv       s_4s,h_8h,h_4h,b_16b,b_8b
   8054 
   8055    umin        4s,2s,8h,4h,16b,8b
   8056 
   8057    uminp       4s,2s,8h,4h,16b,8b
   8058 
   8059    uminv       s_4s,h_8h,h_4h,b_16b,b_8b
   8060 
   8061    umlal{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8062    umlal{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8063 
   8064    umlsl{2}    2d_2s/4s_s[], 4s_4h/8h_h[]
   8065    umlsl{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8066 
   8067    umov        w_b[], w_h[], x_b[], x_h[], x_s[]
   8068 
   8069    umull{2}    2d_2s/4s_s[]. 4s_4h/8h_h[]
   8070    umull{2}    2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8071 
   8072    uqadd       d,s,h,b
   8073    uqadd       2d,4s,2s,8h,4h,16b,8b
   8074 
   8075    uqrshl      d,s,h,b
   8076    uqrshl      2d,4s,2s,8h,4h,16b,8b
   8077 
   8078    uqrshrn     s_d, h_s, b_h   #imm
   8079    uqrshrn{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8080 
   8081    uqshl        d,s,h,b   _#imm
   8082    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8083 
   8084    uqshl        d,s,h,b
   8085    uqshl        2d,4s,2s,8h,4h,16b,8b
   8086 
   8087    uqshrn       s_d, h_s, b_h   #imm
   8088    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8089 
   8090    uqsub       d,s,h,b
   8091    uqsub       2d,4s,2s,8h,4h,16b,8b
   8092 
   8093    uqxtn       s_d,h_s,b_h
   8094    uqxtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8095 
   8096    urecpe      4s,2s
   8097 
   8098    urhadd      4s,2s,8h,4h,16b,8b
   8099 
   8100    urshl (reg) d
   8101    urshl       2d,4s,2s,8h,4h,16b,8b
   8102 
   8103    urshr (imm) d
   8104    urshr       2d,4s,2s,8h,4h,16b,8b
   8105 
   8106    ursqrte     4s,2s
   8107 
   8108    ursra (imm) d
   8109    ursra       2d,4s,2s,8h,4h,16b,8b
   8110 
   8111    ushl (reg)  d
   8112    ushl        2d,4s,2s,8h,4h,16b,8b
   8113 
   8114    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8115 
   8116    ushr (imm)  d
   8117    ushr        2d,4s,2s,8h,4h,16b,8b
   8118 
   8119    usqadd      d,s,h,b
   8120    usqadd      2d,4s,2s,8h,4h,16b,8b
   8121 
   8122    usra (imm)  d
   8123    usra        2d,4s,2s,8h,4h,16b,8b
   8124 
   8125    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8126 
   8127    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8128 
   8129    uzp1      2d,4s,2s,8h,4h,16b,8b
   8130    uzp2      2d,4s,2s,8h,4h,16b,8b
   8131 
   8132    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8133 
   8134    zip1      2d,4s,2s,8h,4h,16b,8b
   8135    zip2      2d,4s,2s,8h,4h,16b,8b
   8136 */
   8137 
   8138 
   8139 /* ---------------------------------------------------------------- */
   8140 /* -- List of insns, grouped somewhat by laneage configuration   -- */
   8141 /* ---------------------------------------------------------------- */
   8142 /*
   8143    ======================== FP ========================
   8144 
   8145    fabs      d,s
   8146    fabs      2d,4s,2s
   8147 
   8148    fneg      d,s
   8149    fneg      2d,4s,2s
   8150 
   8151    fsqrt     d,s
   8152    fsqrt     2d,4s,2s
   8153 
   8154    fadd      d,s
   8155    fsub      d,s
   8156 
   8157    fadd      2d,4s,2s
   8158    fsub      2d,4s,2s
   8159 
   8160    fabd      d,s
   8161    fabd      2d,4s,2s
   8162 
   8163    faddp     d,s (floating add pair)
   8164    faddp     2d,4s,2s
   8165 
   8166    fccmp     d,s (floating point conditional quiet compare)
   8167    fccmpe    d,s (floating point conditional signaling compare)
   8168 
   8169    fcmeq     d,s
   8170    fcmge     d,s
   8171    fcmgt     d,s
   8172    facgt     d,s  (floating abs compare GE)
   8173    facge     d,s  (floating abs compare GE)
   8174 
   8175    fcmeq     2d,4s,2s
   8176    fcmge     2d,4s,2s
   8177    fcmgt     2d,4s,2s
   8178    facge     2d,4s,2s
   8179    facgt     2d,4s,2s
   8180 
   8181    fcmeq_z   d,s
   8182    fcmge_z   d,s
   8183    fcmgt_z   d,s
   8184    fcmle_z   d,s
   8185    fcmlt_z   d,s
   8186 
   8187    fcmeq_z   2d,4s,2s
   8188    fcmge_z   2d,4s,2s
   8189    fcmgt_z   2d,4s,2s
   8190    fcmle_z   2d,4s,2s
   8191    fcmlt_z   2d,4s,2s
   8192 
   8193    fcmp_z    d,s
   8194    fcmpe_z   d,s
   8195    fcmp      d,s (floating point quiet, set flags)
   8196    fcmpe     d,s (floating point signaling, set flags)
   8197 
   8198    fcsel     d,s (fp cond select)
   8199 
   8200    fdiv      d,s
   8201    fdiv      2d,4s,2s
   8202 
   8203    fmadd     d,s
   8204    fnmadd    d,s
   8205    fmsub     d,s
   8206    fnmsub    d,s
   8207 
   8208    fnmul     d,s
   8209 
   8210    fmax      d,s
   8211    fmin      d,s
   8212    fmaxnm    d,s ("max number")
   8213    fminnm    d,s
   8214 
   8215    fmax      2d,4s,2s
   8216    fmin      2d,4s,2s
   8217    fmaxnm    2d,4s,2s
   8218    fminnm    2d,4s,2s
   8219 
   8220    fmaxnmp   d_2d,s_2s ("max number pairwise")
   8221    fminnmp   d_2d,s_2s
   8222 
   8223    fmaxnmp   2d,4s,2s
   8224    fminnmp   2d,4s,2s
   8225 
   8226    fmaxnmv   s_4s (maxnum across vector)
   8227    fminnmv   s_4s
   8228 
   8229    fmaxp     d_2d,s_2s (max of a pair)
   8230    fminp     d_2d,s_2s (max of a pair)
   8231 
   8232    fmaxp     2d,4s,2s  (max pairwise)
   8233    fminp     2d,4s,2s
   8234 
   8235    fmaxv     s_4s (max across vector)
   8236    fminv     s_4s
   8237 
   8238    fmla      2d,4s,2s
   8239    fmls      2d,4s,2s
   8240 
   8241    fmla      d_d_d[],s_s_s[] (by element)
   8242    fmls      d_d_d[],s_s_s[] (by element)
   8243 
   8244    fmla      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8245    fmls      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8246 
   8247    fmov      2d,4s,2s #imm (part of the MOVI/MVNI/ORR/BIC imm group)
   8248 
   8249    fmov      d_d,s_s
   8250 
   8251    fmov      s_w,w_s,d_x,d[1]_x,x_d,x_d[1]
   8252 
   8253    fmov      d,s #imm
   8254 
   8255    fmul      d_d_d[],s_s_s[]
   8256    fmul      2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8257 
   8258    fmul      2d,4s,2s
   8259    fmul      d,s
   8260 
   8261    fmulx     d_d_d[],s_s_s[]
   8262    fmulx     2d_2d_d[],4s_4s_s[],2s_2s_s[]
   8263 
   8264    fmulx     d,s
   8265    fmulx     2d,4s,2s
   8266 
   8267    frecpe    d,s (recip estimate)
   8268    frecpe    2d,4s,2s
   8269 
   8270    frecps    d,s (recip step)
   8271    frecps    2d,4s,2s
   8272 
   8273    frecpx    d,s (recip exponent)
   8274 
   8275    frinta    d,s
   8276    frinti    d,s
   8277    frintm    d,s
   8278    frintn    d,s
   8279    frintp    d,s
   8280    frintx    d,s
   8281    frintz    d,s
   8282 
   8283    frinta    2d,4s,2s (round to integral, nearest away)
   8284    frinti    2d,4s,2s (round to integral, per FPCR)
   8285    frintm    2d,4s,2s (round to integral, minus inf)
   8286    frintn    2d,4s,2s (round to integral, nearest, to even)
   8287    frintp    2d,4s,2s (round to integral, plus inf)
   8288    frintx    2d,4s,2s (round to integral exact, per FPCR)
   8289    frintz    2d,4s,2s (round to integral, zero)
   8290 
   8291    frsqrte   d,s (est)
   8292    frsqrte   2d,4s,2s
   8293 
   8294    frsqrts   d,s (step)
   8295    frsqrts   2d,4s,2s
   8296 
   8297    ======================== CONV ========================
   8298 
   8299    fcvt      s_h,d_h,h_s,d_s,h_d,s_d (fp convert, scalar)
   8300 
   8301    fcvtl{2}  4s/4h, 4s/8h, 2d/2s, 2d/4s (float convert to longer form)
   8302 
   8303    fcvtn{2}  4h/4s, 8h/4s, 2s/2d, 4s/2d (float convert to narrower form)
   8304 
   8305    fcvtas    d,s  (fcvt to signed int,   nearest, ties away)
   8306    fcvtau    d,s  (fcvt to unsigned int, nearest, ties away)
   8307    fcvtas    2d,4s,2s
   8308    fcvtau    2d,4s,2s
   8309    fcvtas    w_s,x_s,w_d,x_d
   8310    fcvtau    w_s,x_s,w_d,x_d
   8311 
   8312    fcvtms    d,s  (fcvt to signed int,   minus inf)
   8313    fcvtmu    d,s  (fcvt to unsigned int, minus inf)
   8314    fcvtms    2d,4s,2s
   8315    fcvtmu    2d,4s,2s
   8316    fcvtms    w_s,x_s,w_d,x_d
   8317    fcvtmu    w_s,x_s,w_d,x_d
   8318 
   8319    fcvtns    d,s  (fcvt to signed int,   nearest)
   8320    fcvtnu    d,s  (fcvt to unsigned int, nearest)
   8321    fcvtns    2d,4s,2s
   8322    fcvtnu    2d,4s,2s
   8323    fcvtns    w_s,x_s,w_d,x_d
   8324    fcvtnu    w_s,x_s,w_d,x_d
   8325 
   8326    fcvtps    d,s  (fcvt to signed int,   plus inf)
   8327    fcvtpu    d,s  (fcvt to unsigned int, plus inf)
   8328    fcvtps    2d,4s,2s
   8329    fcvtpu    2d,4s,2s
   8330    fcvtps    w_s,x_s,w_d,x_d
   8331    fcvtpu    w_s,x_s,w_d,x_d
   8332 
   8333    fcvtzs    d,s (fcvt to signed integer,   to zero)
   8334    fcvtzu    d,s (fcvt to unsigned integer, to zero)
   8335    fcvtzs    2d,4s,2s
   8336    fcvtzu    2d,4s,2s
   8337    fcvtzs    w_s,x_s,w_d,x_d
   8338    fcvtzu    w_s,x_s,w_d,x_d
   8339 
   8340    fcvtzs    d,s (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8341    fcvtzu    d,s (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8342    fcvtzs    2d,4s,2s
   8343    fcvtzu    2d,4s,2s
   8344    fcvtzs    w_s,x_s,w_d,x_d (fcvt to signed fixedpt,   to zero) (w/ #fbits)
   8345    fcvtzu    w_s,x_s,w_d,x_d (fcvt to unsigned fixedpt, to zero) (w/ #fbits)
   8346 
   8347    fcvtxn    s_d (fcvt to lower prec narrow, rounding to odd)
   8348    fcvtxn    2s_2d,4s_2d
   8349 
   8350    scvtf     d,s        _#fbits
   8351    ucvtf     d,s        _#fbits
   8352 
   8353    scvtf     2d,4s,2s   _#fbits
   8354    ucvtf     2d,4s,2s   _#fbits
   8355 
   8356    scvtf     d,s
   8357    ucvtf     d,s
   8358 
   8359    scvtf     2d,4s,2s
   8360    ucvtf     2d,4s,2s
   8361 
   8362    scvtf     s_w, d_w, s_x, d_x,   _#fbits
   8363    ucvtf     s_w, d_w, s_x, d_x,   _#fbits
   8364 
   8365    scvtf     s_w, d_w, s_x, d_x
   8366    ucvtf     s_w, d_w, s_x, d_x
   8367 
   8368    ======================== INT ========================
   8369 
   8370    abs       d
   8371    neg       d
   8372 
   8373    abs       2d,4s,2s,8h,4h,16b,8b
   8374    neg       2d,4s,2s,8h,4h,16b,8b
   8375 
   8376    add       d
   8377    sub       d
   8378 
   8379    add       2d,4s,2s,8h,4h,16b,8b
   8380    sub       2d,4s,2s,8h,4h,16b,8b
   8381 
   8382    addhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8383    subhn{2}   2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8384    raddhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8385    rsubhn{2}  2s/4s_2d_2d, 4h/8h_4s_4s, 8b/16b_8h_8h
   8386 
   8387    addp     d (add pairs, across)
   8388    addp     2d,4s,2s,8h,4h,16b,8b
   8389    addv     4s,8h,4h,16b,18b (reduce across vector)
   8390 
   8391    and      16b,8b
   8392 
   8393    orr      8h,4h   #imm8, LSL #0 or 8
   8394    orr      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8395    bic      8h,4h   #imm8, LSL #0 or 8
   8396    bic      4s,2s   #imm8, LSL #0, 8, 16 or 24
   8397    also movi, mvni
   8398 
   8399    bic      16b,8b (vector,reg) (bit clear)
   8400    bif      16b,8b (vector) (bit insert if false)
   8401    bit      16b,8b (vector) (bit insert if true)
   8402    bsl      16b,8b (vector) (bit select)
   8403 
   8404    cls      4s,2s,8h,4h,16b,8b (count leading sign bits)
   8405    clz      4s,2s,8h,4h,16b,8b (count leading zero bits)
   8406 
   8407    cmeq     d
   8408    cmge     d
   8409    cmgt     d
   8410    cmhi     d
   8411    cmhs     d
   8412    cmtst    d
   8413 
   8414    cmeq     2d,4s,2s,8h,4h,16b,8b
   8415    cmge     2d,4s,2s,8h,4h,16b,8b
   8416    cmgt     2d,4s,2s,8h,4h,16b,8b
   8417    cmhi     2d,4s,2s,8h,4h,16b,8b
   8418    cmhs     2d,4s,2s,8h,4h,16b,8b
   8419    cmtst    2d,4s,2s,8h,4h,16b,8b
   8420 
   8421    cmeq_z   d
   8422    cmge_z   d
   8423    cmgt_z   d
   8424    cmle_z   d
   8425    cmlt_z   d
   8426 
   8427    cmeq_z   2d,4s,2s,8h,4h,16b,8b
   8428    cmge_z   2d,4s,2s,8h,4h,16b,8b
   8429    cmgt_z   2d,4s,2s,8h,4h,16b,8b
   8430    cmle_z   2d,4s,2s,8h,4h,16b,8b
   8431    cmlt_z   2d,4s,2s,8h,4h,16b,8b
   8432 
   8433    cnt      16b,8b (population count per byte)
   8434 
   8435    dup      d,s,h,b (vec elem to scalar)
   8436    dup      2d,4s,2s,8h,4h,16b,8b (vec elem to vector)
   8437    dup      2d,4s,2s,8h,4h,16b,8b (general reg to vector)
   8438 
   8439    eor      16b,8b (vector)
   8440    ext      16b,8b,#imm4 (concat 2 vectors, then slice)
   8441 
   8442    ins      d[]_d[],s[]_s[],h[]_h[],b[]_b[]
   8443 
   8444    ins      d[]_x, s[]_w, h[]_w, b[]_w
   8445 
   8446    mla   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8447    mla   4s,2s,8h,4h,16b,8b
   8448 
   8449    mls   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8450    mls   4s,2s,8h,4h,16b,8b
   8451 
   8452    movi  16b,8b   #imm8, LSL #0
   8453    movi  8h,4h    #imm8, LSL #0 or 8
   8454    movi  4s,2s    #imm8, LSL #0, 8, 16, 24
   8455    movi  4s,2s    #imm8, MSL #8 or 16
   8456    movi  d,       #imm64
   8457    movi  2d,      #imm64
   8458 
   8459    mul   4s_4s_s[],2s_2s_s[],8h_8h_h[],4h_4h_h[]
   8460    mul   4s,2s,8h,4h,16b,8b
   8461 
   8462    mvni  8h,4h    #imm8, LSL #0 or 8
   8463    mvni  4s,2s    #imm8, LSL #0, 8, 16, 24
   8464    mvni  4s,2s    #imm8, MSL #8 or 16
   8465 
   8466    not   16b,8b
   8467 
   8468    orn   16b,8b
   8469    orr   16b,8b
   8470 
   8471    pmul  16b,8b
   8472 
   8473    pmull{2}  8h_8b_8b,8h_16b_16b,1q_1d_1d,1d_2d_2d
   8474 
   8475    rbit    16b,8b
   8476    rev16   16b,8b
   8477    rev32   16b,8b,8h,4h
   8478    rev64   16b,8b,8h,4h,4s,2s
   8479 
   8480    saba      16b,8b,8h,4h,4s,2s
   8481    uaba      16b,8b,8h,4h,4s,2s
   8482 
   8483    sabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8484    uabal{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8485 
   8486    sabd      16b,8b,8h,4h,4s,2s
   8487    uabd      16b,8b,8h,4h,4s,2s
   8488 
   8489    sabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8490    uabdl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8491 
   8492    sadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8493    uadalp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8494 
   8495    saddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8496    uaddl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8497    ssubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8498    usubl{2}  2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8499 
   8500    saddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8501    uaddlp    4h_8b,8h_16b,2s_4h,4s_8h,1d_2s,2d_4s
   8502 
   8503    saddlv    h_16b/8b, s_8h/4h, d_4s
   8504    uaddlv    h_16b/8b, s_8h/4h, d_4s
   8505 
   8506    saddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8507    uaddw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8508    ssubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8509    usubw{2}  8h_8h_16b/8b, 4s_4s_8h/4h, 2d_2d_2s/4s
   8510 
   8511    shadd        16b,8b,8h,4h,4s,2s
   8512    uhadd        16b,8b,8h,4h,4s,2s
   8513    shsub        16b,8b,8h,4h,4s,2s
   8514    uhsub        16b,8b,8h,4h,4s,2s
   8515 
   8516    shl          d_#imm
   8517    shl          16b,8b,8h,4h,4s,2s,2d  _#imm
   8518 
   8519    shll{2}      8h_8b/16b_#8, 4s_4h/8h_#16, 2d_2s/4s_#32
   8520 
   8521    shrn{2}      2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8522    rshrn{2}     2s/4s_2d, 8h/4h_4s, 2s/4s_2d,   #imm in 1 .. elem_bits
   8523 
   8524    sli          d_#imm
   8525    sri          d_#imm
   8526 
   8527    sli          2d,4s,2s,8h,4h,16b,8b  _#imm
   8528    sri          2d,4s,2s,8h,4h,16b,8b  _#imm
   8529 
   8530    smax         4s,2s,8h,4h,16b,8b
   8531    umax         4s,2s,8h,4h,16b,8b
   8532    smin         4s,2s,8h,4h,16b,8b
   8533    umin         4s,2s,8h,4h,16b,8b
   8534 
   8535    smaxp        4s,2s,8h,4h,16b,8b
   8536    umaxp        4s,2s,8h,4h,16b,8b
   8537    sminp        4s,2s,8h,4h,16b,8b
   8538    uminp        4s,2s,8h,4h,16b,8b
   8539 
   8540    smaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8541    umaxv        s_4s,h_8h,h_4h,b_16b,b_8b
   8542    sminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8543    uminv        s_4s,h_8h,h_4h,b_16b,b_8b
   8544 
   8545    smlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8546    umlal{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8547    smlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8548    umlsl{2}     2d_2s/4s_s[], 4s_4h/8h_h[]
   8549    smull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8550    umull{2}     2d_2s/4s_s[]. 4s_4h/8h_h[]
   8551 
   8552    smlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8553    umlal{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8554    smlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8555    umlsl{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8556    smull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8557    umull{2}     2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h), 8h_(8b_8b)/(16b_16b)
   8558 
   8559    smov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8560    umov         w_b[], w_h[], x_b[], x_h[], x_s[]
   8561 
   8562    sqabs        d,s,h,b
   8563    sqneg        d,s,h,b
   8564 
   8565    sqabs        2d,4s,2s,8h,4h,16b,8b
   8566    sqneg        2d,4s,2s,8h,4h,16b,8b
   8567 
   8568    sqadd        d,s,h,b
   8569    uqadd        d,s,h,b
   8570    sqsub        d,s,h,b
   8571    uqsub        d,s,h,b
   8572 
   8573    sqadd        2d,4s,2s,8h,4h,16b,8b
   8574    uqadd        2d,4s,2s,8h,4h,16b,8b
   8575    sqsub        2d,4s,2s,8h,4h,16b,8b
   8576    uqsub        2d,4s,2s,8h,4h,16b,8b
   8577 
   8578    sqdmlal      d_s_s[], s_h_h[]
   8579    sqdmlsl      d_s_s[], s_h_h[]
   8580    sqdmull      d_s_s[], s_h_h[]
   8581 
   8582    sqdmlal{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8583    sqdmlsl{2}   2d_2s/4s_s[], 4s_4h/8h_h[]
   8584    sqdmull{2}   2d_2s/4s_s[], 4s_4h/2h_h[]
   8585 
   8586    sqdmlal      d_s_s, s_h_h
   8587    sqdmlsl      d_s_s, s_h_h
   8588    sqdmull      d_s_s, s_h_h
   8589 
   8590    sqdmlal{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8591    sqdmlsl{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8592    sqdmull{2}   2d_(2s_2s)/(4s_4s), 4s_(4h_4h)/(8h_8h)
   8593 
   8594    sqdmulh      s_s_s[], h_h_h[]
   8595    sqrdmulh     s_s_s[], h_h_h[]
   8596 
   8597    sqdmulh      4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8598    sqrdmulh     4s_4s_s[], 2s_2s_s[], 8h_8h_h[], 4h_4h_h[]
   8599 
   8600    sqdmulh      h,s
   8601    sqrdmulh     h,s
   8602 
   8603    sqdmulh      4s,2s,8h,4h
   8604    sqrdmulh     4s,2s,8h,4h
   8605 
   8606    sqshl        d,s,h,b
   8607    uqshl        d,s,h,b
   8608    sqrshl       d,s,h,b
   8609    uqrshl       d,s,h,b
   8610 
   8611    sqshl        2d,4s,2s,8h,4h,16b,8b
   8612    uqshl        2d,4s,2s,8h,4h,16b,8b
   8613    sqrshl       2d,4s,2s,8h,4h,16b,8b
   8614    uqrshl       2d,4s,2s,8h,4h,16b,8b
   8615 
   8616    sqrshrn      s_d, h_s, b_h   #imm
   8617    uqrshrn      s_d, h_s, b_h   #imm
   8618    sqshrn       s_d, h_s, b_h   #imm
   8619    uqshrn       s_d, h_s, b_h   #imm
   8620 
   8621    sqrshrun     s_d, h_s, b_h   #imm
   8622    sqshrun      s_d, h_s, b_h   #imm
   8623 
   8624    sqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8625    uqrshrn{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8626    sqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8627    uqshrn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8628 
   8629    sqrshrun{2}  2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8630    sqshrun{2}   2s/4s_2d, 4h/8h_4s, 8b/16b_8h,  #imm
   8631 
   8632    sqshl        d,s,h,b   _#imm
   8633    uqshl        d,s,h,b   _#imm
   8634    sqshlu       d,s,h,b   _#imm
   8635 
   8636    sqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8637    uqshl        2d,4s,2s,8h,4h,16b,8b   _#imm
   8638    sqshlu       2d,4s,2s,8h,4h,16b,8b   _#imm
   8639 
   8640    sqxtn        s_d,h_s,b_h
   8641    uqxtn        s_d,h_s,b_h
   8642    sqxtun       s_d,h_s,b_h
   8643 
   8644    sqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8645    uqxtn{2}     2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8646    sqxtun{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8647 
   8648    srhadd       4s,2s,8h,4h,16b,8b
   8649    urhadd       4s,2s,8h,4h,16b,8b
   8650 
   8651    sshl (reg)   d
   8652    ushl (reg)   d
   8653    sshr (imm)   d
   8654    ushr (imm)   d
   8655    ssra (imm)   d
   8656    usra (imm)   d
   8657 
   8658    srshl (reg)  d
   8659    urshl (reg)  d
   8660    srshr (imm)  d
   8661    urshr (imm)  d
   8662    srsra (imm)  d
   8663    ursra (imm)  d
   8664 
   8665    sshl         2d,4s,2s,8h,4h,16b,8b
   8666    ushl         2d,4s,2s,8h,4h,16b,8b
   8667    sshr         2d,4s,2s,8h,4h,16b,8b
   8668    ushr         2d,4s,2s,8h,4h,16b,8b
   8669    ssra         2d,4s,2s,8h,4h,16b,8b
   8670    usra         2d,4s,2s,8h,4h,16b,8b
   8671 
   8672    srshl        2d,4s,2s,8h,4h,16b,8b
   8673    urshl        2d,4s,2s,8h,4h,16b,8b
   8674    srshr        2d,4s,2s,8h,4h,16b,8b
   8675    urshr        2d,4s,2s,8h,4h,16b,8b
   8676    srsra        2d,4s,2s,8h,4h,16b,8b
   8677    ursra        2d,4s,2s,8h,4h,16b,8b
   8678 
   8679    sshll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8680    ushll{2} (imm)  2d_2s/4s  4s_4h/8h, 8h_8b/16b
   8681 
   8682    suqadd  d,s,h,b
   8683    suqadd  2d,4s,2s,8h,4h,16b,8b
   8684 
   8685    tbl     8b_{16b}_8b, 16b_{16b}_16b
   8686    tbl     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8687    tbl     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8688    tbl     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8689 
   8690    tbx     8b_{16b}_8b, 16b_{16b}_16b
   8691    tbx     8b_{16b,16b}_8b, 16b_{16b,16b}_16b
   8692    tbx     8b_{16b,16b,16b}_8b, 16b_{16b,16b,16b}_16b
   8693    tbx     8b_{16b,16b,16b,16b}_8b, 16b_{16b,16b,16b,16b}_16b
   8694 
   8695    trn1    2d,4s,2s,8h,4h,16b,8b
   8696    trn2    2d,4s,2s,8h,4h,16b,8b
   8697 
   8698    urecpe      4s,2s
   8699 
   8700    ursqrte     4s,2s
   8701 
   8702    usqadd      d,s,h,b
   8703    usqadd      2d,4s,2s,8h,4h,16b,8b
   8704 
   8705    uzp1      2d,4s,2s,8h,4h,16b,8b
   8706    uzp2      2d,4s,2s,8h,4h,16b,8b
   8707 
   8708    xtn{2}    2s/4s_2d, 4h/8h_4s, 8b/16b_8h
   8709 
   8710    zip1      2d,4s,2s,8h,4h,16b,8b
   8711    zip2      2d,4s,2s,8h,4h,16b,8b
   8712 
   8713    ======================== MEM ========================
   8714 
   8715    ld1  (multiple 1-element structures to 1/2/3/4 regs)
   8716    ld1  (single 1-element structure to one lane of 1 reg)
   8717    ld1r (single 1-element structure and rep to all lanes of 1 reg)
   8718 
   8719    ld2  (multiple 2-element structures to 2 regs)
   8720    ld2  (single 2-element structure to one lane of 2 regs)
   8721    ld2r (single 2-element structure and rep to all lanes of 2 regs)
   8722 
   8723    ld3  (multiple 3-element structures to 3 regs)
   8724    ld3  (single 3-element structure to one lane of 3 regs)
   8725    ld3r (single 3-element structure and rep to all lanes of 3 regs)
   8726 
   8727    ld4  (multiple 4-element structures to 4 regs)
   8728    ld4  (single 4-element structure to one lane of 4 regs)
   8729    ld4r (single 4-element structure and rep to all lanes of 4 regs)
   8730 
   8731    ldnp  q_q_addr,d_d_addr,s_s_addr  (load pair w/ non-temporal hint)
   8732          addr = reg + uimm7 * reg_size
   8733 
   8734    ldp   q_q_addr,d_d_addr,s_s_addr  (load pair)
   8735          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8736 
   8737    ldr   q,d,s,h,b from addr
   8738          addr = [Xn|SP],#imm   or [Xn|SP,#imm]!  or [Xn|SP,#imm]
   8739 
   8740    ldr   q,d,s from  pc+#imm19
   8741 
   8742    ldr   q,d,s,h,b from addr
   8743          addr = [Xn|SP, R <extend> <shift]
   8744 
   8745    ldur  q,d,s,h,b from addr
   8746          addr = [Xn|SP,#imm] (unscaled offset)
   8747 
   8748    st1 (multiple 1-element structures from 1/2/3/4 regs)
   8749    st1 (single 1-element structure for 1 lane of 1 reg)
   8750 
   8751    st2 (multiple 2-element structures from 2 regs)
   8752    st2 (single 2-element structure from 1 lane of 2 regs)
   8753 
   8754    st3 (multiple 3-element structures from 3 regs)
   8755    st3 (single 3-element structure from 1 lane of 3 regs)
   8756 
   8757    st4 (multiple 4-element structures from 4 regs)
   8758    st4 (single 4-element structure from one lane of 4 regs)
   8759 
   8760    stnp q_q_addr, d_d_addr, s_s_addr
   8761         addr = [Xn|SP, #imm]
   8762 
   8763    stp  q_q_addr, d_d_addr, s_s_addr
   8764         addr = [Xn|SP], #imm  or [Xn|SP, #imm]!  or [Xn|SP, #imm]
   8765 
   8766    str  q,d,s,h,b_addr
   8767         addr = [Xn|SP], #simm  or [Xn|SP, #simm]!  or [Xn|SP, #pimm]
   8768 
   8769    str   q,d,s,h,b_addr
   8770          addr = [Xn|SP, R <extend> <shift]
   8771 
   8772    stur  q,d,s,h,b_addr
   8773          addr = [Xn|SP,#imm] (unscaled offset)
   8774 
   8775    ======================== CRYPTO ========================
   8776 
   8777    aesd       16b (aes single round decryption)
   8778    aese       16b (aes single round encryption)
   8779    aesimc     16b (aes inverse mix columns)
   8780    aesmc      16b (aes mix columns)
   8781 
   8782    sha1c      q_s_4s
   8783    sha1h      s_s
   8784    sha1m      q_s_4s
   8785    sha1p      q_s_4s
   8786    sha1su0    4s_4s_4s
   8787    sha1su1    4s_4s
   8788 
   8789    sha256h2   q_q_4s
   8790    sha256h    q_q_4s
   8791    sha256su0  4s_4s
   8792    sha256su1  4s_4s_4s
   8793 */
   8794