Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
      3 
      4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
      5   ; CHECK-LABEL: vshf_v16i8_0:
      6 
      7   %1 = load <16 x i8>, <16 x i8>* %a
      8   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
      9   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     10   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     11   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     12   ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
     13   store <16 x i8> %2, <16 x i8>* %c
     14   ; CHECK-DAG: st.b [[R3]], 0($4)
     15 
     16   ret void
     17 }
     18 
     19 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     20   ; CHECK-LABEL: vshf_v16i8_1:
     21 
     22   %1 = load <16 x i8>, <16 x i8>* %a
     23   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     24   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     25   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
     26   store <16 x i8> %2, <16 x i8>* %c
     27   ; CHECK-DAG: st.b [[R3]], 0($4)
     28 
     29   ret void
     30 }
     31 
     32 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     33   ; CHECK-LABEL: vshf_v16i8_2:
     34 
     35   %1 = load <16 x i8>, <16 x i8>* %a
     36   %2 = load <16 x i8>, <16 x i8>* %b
     37   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     38   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
     39   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     40   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     41   ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
     42   store <16 x i8> %3, <16 x i8>* %c
     43   ; CHECK-DAG: st.b [[R3]], 0($4)
     44 
     45   ret void
     46 }
     47 
     48 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     49   ; CHECK-LABEL: vshf_v16i8_3:
     50 
     51   %1 = load <16 x i8>, <16 x i8>* %a
     52   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     53   %2 = load <16 x i8>, <16 x i8>* %b
     54   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     55   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
     56   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     57   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     58   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
     59   ; the operands to get the right answer.
     60   ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]]
     61   store <16 x i8> %3, <16 x i8>* %c
     62   ; CHECK-DAG: st.b [[R3]], 0($4)
     63 
     64   ret void
     65 }
     66 
     67 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     68   ; CHECK-LABEL: vshf_v16i8_4:
     69 
     70   %1 = load <16 x i8>, <16 x i8>* %a
     71   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     72   %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
     73   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
     74   store <16 x i8> %2, <16 x i8>* %c
     75   ; CHECK-DAG: st.b [[R3]], 0($4)
     76 
     77   ret void
     78 }
     79 
     80 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     81   ; CHECK-LABEL: vshf_v8i16_0:
     82 
     83   %1 = load <8 x i16>, <8 x i16>* %a
     84   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     85   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     86   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     87   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
     88   ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
     89   store <8 x i16> %2, <8 x i16>* %c
     90   ; CHECK-DAG: st.h [[R3]], 0($4)
     91 
     92   ret void
     93 }
     94 
     95 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     96   ; CHECK-LABEL: vshf_v8i16_1:
     97 
     98   %1 = load <8 x i16>, <8 x i16>* %a
     99   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    100   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    101   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
    102   store <8 x i16> %2, <8 x i16>* %c
    103   ; CHECK-DAG: st.h [[R3]], 0($4)
    104 
    105   ret void
    106 }
    107 
    108 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    109   ; CHECK-LABEL: vshf_v8i16_2:
    110 
    111   %1 = load <8 x i16>, <8 x i16>* %a
    112   %2 = load <8 x i16>, <8 x i16>* %b
    113   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    114   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
    115   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    116   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
    117   ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
    118   store <8 x i16> %3, <8 x i16>* %c
    119   ; CHECK-DAG: st.h [[R3]], 0($4)
    120 
    121   ret void
    122 }
    123 
    124 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    125   ; CHECK-LABEL: vshf_v8i16_3:
    126 
    127   %1 = load <8 x i16>, <8 x i16>* %a
    128   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    129   %2 = load <8 x i16>, <8 x i16>* %b
    130   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    131   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
    132   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    133   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
    134   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    135   ; the operands to get the right answer.
    136   ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]]
    137   store <8 x i16> %3, <8 x i16>* %c
    138   ; CHECK-DAG: st.h [[R3]], 0($4)
    139 
    140   ret void
    141 }
    142 
    143 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    144   ; CHECK-LABEL: vshf_v8i16_4:
    145 
    146   %1 = load <8 x i16>, <8 x i16>* %a
    147   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    148   %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
    149   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
    150   store <8 x i16> %2, <8 x i16>* %c
    151   ; CHECK-DAG: st.h [[R3]], 0($4)
    152 
    153   ret void
    154 }
    155 
    156 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
    157 ; instruction when using a single vector.
    158 
    159 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    160   ; CHECK-LABEL: vshf_v4i32_0:
    161 
    162   %1 = load <4 x i32>, <4 x i32>* %a
    163   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    164   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    165   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
    166   store <4 x i32> %2, <4 x i32>* %c
    167   ; CHECK-DAG: st.w [[R3]], 0($4)
    168 
    169   ret void
    170 }
    171 
    172 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    173   ; CHECK-LABEL: vshf_v4i32_1:
    174 
    175   %1 = load <4 x i32>, <4 x i32>* %a
    176   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    177   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    178   ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
    179   store <4 x i32> %2, <4 x i32>* %c
    180   ; CHECK-DAG: st.w [[R3]], 0($4)
    181 
    182   ret void
    183 }
    184 
    185 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    186   ; CHECK-LABEL: vshf_v4i32_2:
    187 
    188   %1 = load <4 x i32>, <4 x i32>* %a
    189   %2 = load <4 x i32>, <4 x i32>* %b
    190   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    191   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
    192   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
    193   store <4 x i32> %3, <4 x i32>* %c
    194   ; CHECK-DAG: st.w [[R3]], 0($4)
    195 
    196   ret void
    197 }
    198 
    199 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    200   ; CHECK-LABEL: vshf_v4i32_3:
    201 
    202   %1 = load <4 x i32>, <4 x i32>* %a
    203   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    204   %2 = load <4 x i32>, <4 x i32>* %b
    205   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    206   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
    207   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    208   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
    209   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    210   ; the operands to get the right answer.
    211   ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]]
    212   store <4 x i32> %3, <4 x i32>* %c
    213   ; CHECK-DAG: st.w [[R3]], 0($4)
    214 
    215   ret void
    216 }
    217 
    218 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    219   ; CHECK-LABEL: vshf_v4i32_4:
    220 
    221   %1 = load <4 x i32>, <4 x i32>* %a
    222   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    223   %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
    224   ; The two operand vectors are the same so element 1 and 5 are equivalent.
    225   ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][1]
    226   store <4 x i32> %2, <4 x i32>* %c
    227   ; CHECK-DAG: st.w [[R3]], 0($4)
    228 
    229   ret void
    230 }
    231 
    232 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    233   ; CHECK-LABEL: vshf_v2i64_0:
    234 
    235   %1 = load <2 x i64>, <2 x i64>* %a
    236   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    237   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
    238   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    239   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    240   ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
    241   store <2 x i64> %2, <2 x i64>* %c
    242   ; CHECK-DAG: st.d [[R3]], 0($4)
    243 
    244   ret void
    245 }
    246 
    247 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    248   ; CHECK-LABEL: vshf_v2i64_1:
    249 
    250   %1 = load <2 x i64>, <2 x i64>* %a
    251   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    252   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    253   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    254   store <2 x i64> %2, <2 x i64>* %c
    255   ; CHECK-DAG: st.d [[R3]], 0($4)
    256 
    257   ret void
    258 }
    259 
    260 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    261   ; CHECK-LABEL: vshf_v2i64_2:
    262 
    263   %1 = load <2 x i64>, <2 x i64>* %a
    264   %2 = load <2 x i64>, <2 x i64>* %b
    265   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    266   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
    267   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    268   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    269   ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
    270   store <2 x i64> %3, <2 x i64>* %c
    271   ; CHECK-DAG: st.d [[R3]], 0($4)
    272 
    273   ret void
    274 }
    275 
    276 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    277   ; CHECK-LABEL: vshf_v2i64_3:
    278 
    279   %1 = load <2 x i64>, <2 x i64>* %a
    280   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    281   %2 = load <2 x i64>, <2 x i64>* %b
    282   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    283   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
    284   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    285   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    286   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    287   ; the operands to get the right answer.
    288   ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]]
    289   store <2 x i64> %3, <2 x i64>* %c
    290   ; CHECK-DAG: st.d [[R3]], 0($4)
    291 
    292   ret void
    293 }
    294 
    295 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    296   ; CHECK-LABEL: vshf_v2i64_4:
    297 
    298   %1 = load <2 x i64>, <2 x i64>* %a
    299   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    300   %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
    301   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    302   store <2 x i64> %2, <2 x i64>* %c
    303   ; CHECK-DAG: st.d [[R3]], 0($4)
    304 
    305   ret void
    306 }
    307 
    308 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    309   ; CHECK-LABEL: shf_v16i8_0:
    310 
    311   %1 = load <16 x i8>, <16 x i8>* %a
    312   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    313   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
    314   ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
    315   store <16 x i8> %2, <16 x i8>* %c
    316   ; CHECK-DAG: st.b [[R3]], 0($4)
    317 
    318   ret void
    319 }
    320 
    321 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    322   ; CHECK-LABEL: shf_v8i16_0:
    323 
    324   %1 = load <8 x i16>, <8 x i16>* %a
    325   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    326   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    327   ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
    328   store <8 x i16> %2, <8 x i16>* %c
    329   ; CHECK-DAG: st.h [[R3]], 0($4)
    330 
    331   ret void
    332 }
    333 
    334 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    335   ; CHECK-LABEL: shf_v4i32_0:
    336 
    337   %1 = load <4 x i32>, <4 x i32>* %a
    338   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    339   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    340   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
    341   store <4 x i32> %2, <4 x i32>* %c
    342   ; CHECK-DAG: st.w [[R3]], 0($4)
    343 
    344   ret void
    345 }
    346 
    347 ; shf.d does not exist
    348 
    349 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    350   ; CHECK-LABEL: ilvev_v16i8_0:
    351 
    352   %1 = load <16 x i8>, <16 x i8>* %a
    353   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    354   %2 = load <16 x i8>, <16 x i8>* %b
    355   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    356   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    357                      <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
    358   ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    359   store <16 x i8> %3, <16 x i8>* %c
    360   ; CHECK-DAG: st.b [[R3]], 0($4)
    361 
    362   ret void
    363 }
    364 
    365 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    366   ; CHECK-LABEL: ilvev_v8i16_0:
    367 
    368   %1 = load <8 x i16>, <8 x i16>* %a
    369   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    370   %2 = load <8 x i16>, <8 x i16>* %b
    371   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    372   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    373   ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    374   store <8 x i16> %3, <8 x i16>* %c
    375   ; CHECK-DAG: st.h [[R3]], 0($4)
    376 
    377   ret void
    378 }
    379 
    380 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    381   ; CHECK-LABEL: ilvev_v4i32_0:
    382 
    383   %1 = load <4 x i32>, <4 x i32>* %a
    384   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    385   %2 = load <4 x i32>, <4 x i32>* %b
    386   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    387   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    388   ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    389   store <4 x i32> %3, <4 x i32>* %c
    390   ; CHECK-DAG: st.w [[R3]], 0($4)
    391 
    392   ret void
    393 }
    394 
    395 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    396   ; CHECK-LABEL: ilvev_v2i64_0:
    397 
    398   %1 = load <2 x i64>, <2 x i64>* %a
    399   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    400   %2 = load <2 x i64>, <2 x i64>* %b
    401   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    402   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
    403   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    404   store <2 x i64> %3, <2 x i64>* %c
    405   ; CHECK-DAG: st.d [[R3]], 0($4)
    406 
    407   ret void
    408 }
    409 
    410 ; Interleaving one operand with itself.
    411 define void @ilvev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    412   ; CHECK-LABEL: ilvev_v16i8_1:
    413 
    414   %1 = load <16 x i8>, <16 x i8>* %a
    415   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    416   %2 = load <16 x i8>, <16 x i8>* %b
    417   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    418                      <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    419   ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    420   store <16 x i8> %3, <16 x i8>* %c
    421   ; CHECK-DAG: st.b [[R3]], 0($4)
    422 
    423   ret void
    424 }
    425 
    426 define void @ilvev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    427   ; CHECK-LABEL: ilvev_v8i16_1:
    428 
    429   %1 = load <8 x i16>, <8 x i16>* %a
    430   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    431   %2 = load <8 x i16>, <8 x i16>* %b
    432   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    433   ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    434   store <8 x i16> %3, <8 x i16>* %c
    435   ; CHECK-DAG: st.h [[R3]], 0($4)
    436 
    437   ret void
    438 }
    439 
    440 define void @ilvev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    441   ; CHECK-LABEL: ilvev_v4i32_1:
    442 
    443   %1 = load <4 x i32>, <4 x i32>* %a
    444   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    445   %2 = load <4 x i32>, <4 x i32>* %b
    446   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    447   ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    448   store <4 x i32> %3, <4 x i32>* %c
    449   ; CHECK-DAG: st.w [[R3]], 0($4)
    450 
    451   ret void
    452 }
    453 
    454 define void @ilvev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    455   ; CHECK-LABEL: ilvev_v2i64_1:
    456 
    457   %1 = load <2 x i64>, <2 x i64>* %a
    458   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    459   %2 = load <2 x i64>, <2 x i64>* %b
    460   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
    461   ; ilvev.d with two identical operands is equivalent to splati.d
    462   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
    463   store <2 x i64> %3, <2 x i64>* %c
    464   ; CHECK-DAG: st.d [[R3]], 0($4)
    465 
    466   ret void
    467 }
    468 
    469 define void @ilvev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    470   ; CHECK-LABEL: ilvev_v16i8_2:
    471 
    472   %1 = load <16 x i8>, <16 x i8>* %a
    473   %2 = load <16 x i8>, <16 x i8>* %b
    474   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    475   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    476                      <16 x i32> <i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
    477   ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    478   store <16 x i8> %3, <16 x i8>* %c
    479   ; CHECK-DAG: st.b [[R3]], 0($4)
    480 
    481   ret void
    482 }
    483 
    484 define void @ilvev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    485   ; CHECK-LABEL: ilvev_v8i16_2:
    486 
    487   %1 = load <8 x i16>, <8 x i16>* %a
    488   %2 = load <8 x i16>, <8 x i16>* %b
    489   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    490   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
    491   ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    492   store <8 x i16> %3, <8 x i16>* %c
    493   ; CHECK-DAG: st.h [[R3]], 0($4)
    494 
    495   ret void
    496 }
    497 
    498 define void @ilvev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    499   ; CHECK-LABEL: ilvev_v4i32_2:
    500 
    501   %1 = load <4 x i32>, <4 x i32>* %a
    502   %2 = load <4 x i32>, <4 x i32>* %b
    503   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    504   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 6, i32 6>
    505   ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    506   store <4 x i32> %3, <4 x i32>* %c
    507   ; CHECK-DAG: st.w [[R3]], 0($4)
    508 
    509   ret void
    510 }
    511 
    512 define void @ilvev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    513   ; CHECK-LABEL: ilvev_v2i64_2:
    514 
    515   %1 = load <2 x i64>, <2 x i64>* %a
    516   %2 = load <2 x i64>, <2 x i64>* %b
    517   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    518   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
    519   ; ilvev.d with two identical operands is equivalent to splati.d
    520   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
    521   store <2 x i64> %3, <2 x i64>* %c
    522   ; CHECK-DAG: st.d [[R3]], 0($4)
    523 
    524   ret void
    525 }
    526 
    527 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    528   ; CHECK-LABEL: ilvod_v16i8_0:
    529 
    530   %1 = load <16 x i8>, <16 x i8>* %a
    531   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    532   %2 = load <16 x i8>, <16 x i8>* %b
    533   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    534   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    535                      <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
    536   ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    537   store <16 x i8> %3, <16 x i8>* %c
    538   ; CHECK-DAG: st.b [[R3]], 0($4)
    539 
    540   ret void
    541 }
    542 
    543 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    544   ; CHECK-LABEL: ilvod_v8i16_0:
    545 
    546   %1 = load <8 x i16>, <8 x i16>* %a
    547   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    548   %2 = load <8 x i16>, <8 x i16>* %b
    549   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    550   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    551   ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    552   store <8 x i16> %3, <8 x i16>* %c
    553   ; CHECK-DAG: st.h [[R3]], 0($4)
    554 
    555   ret void
    556 }
    557 
    558 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    559   ; CHECK-LABEL: ilvod_v4i32_0:
    560 
    561   %1 = load <4 x i32>, <4 x i32>* %a
    562   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    563   %2 = load <4 x i32>, <4 x i32>* %b
    564   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    565   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    566   ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    567   store <4 x i32> %3, <4 x i32>* %c
    568   ; CHECK-DAG: st.w [[R3]], 0($4)
    569 
    570   ret void
    571 }
    572 
    573 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    574   ; CHECK-LABEL: ilvod_v2i64_0:
    575 
    576   %1 = load <2 x i64>, <2 x i64>* %a
    577   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    578   %2 = load <2 x i64>, <2 x i64>* %b
    579   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    580   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
    581   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    582   store <2 x i64> %3, <2 x i64>* %c
    583   ; CHECK-DAG: st.d [[R3]], 0($4)
    584 
    585   ret void
    586 }
    587 
    588 define void @ilvod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    589   ; CHECK-LABEL: ilvod_v16i8_1:
    590 
    591   %1 = load <16 x i8>, <16 x i8>* %a
    592   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    593   %2 = load <16 x i8>, <16 x i8>* %b
    594   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    595                      <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    596   ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    597   store <16 x i8> %3, <16 x i8>* %c
    598   ; CHECK-DAG: st.b [[R3]], 0($4)
    599 
    600   ret void
    601 }
    602 
    603 define void @ilvod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    604   ; CHECK-LABEL: ilvod_v8i16_1:
    605 
    606   %1 = load <8 x i16>, <8 x i16>* %a
    607   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    608   %2 = load <8 x i16>, <8 x i16>* %b
    609   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    610   ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    611   store <8 x i16> %3, <8 x i16>* %c
    612   ; CHECK-DAG: st.h [[R3]], 0($4)
    613 
    614   ret void
    615 }
    616 
    617 define void @ilvod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    618   ; CHECK-LABEL: ilvod_v4i32_1:
    619 
    620   %1 = load <4 x i32>, <4 x i32>* %a
    621   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    622   %2 = load <4 x i32>, <4 x i32>* %b
    623   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    624   ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    625   store <4 x i32> %3, <4 x i32>* %c
    626   ; CHECK-DAG: st.w [[R3]], 0($4)
    627 
    628   ret void
    629 }
    630 
    631 define void @ilvod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    632   ; CHECK-LABEL: ilvod_v2i64_1:
    633 
    634   %1 = load <2 x i64>, <2 x i64>* %a
    635   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    636   %2 = load <2 x i64>, <2 x i64>* %b
    637   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
    638   ; ilvod.d with two identical operands is equivalent to splati.d
    639   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    640   store <2 x i64> %3, <2 x i64>* %c
    641   ; CHECK-DAG: st.d [[R3]], 0($4)
    642 
    643   ret void
    644 }
    645 
    646 define void @ilvod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    647   ; CHECK-LABEL: ilvod_v16i8_2:
    648 
    649   %1 = load <16 x i8>, <16 x i8>* %a
    650   %2 = load <16 x i8>, <16 x i8>* %b
    651   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    652   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    653                      <16 x i32> <i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
    654   ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    655   store <16 x i8> %3, <16 x i8>* %c
    656   ; CHECK-DAG: st.b [[R3]], 0($4)
    657 
    658   ret void
    659 }
    660 
    661 define void @ilvod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    662   ; CHECK-LABEL: ilvod_v8i16_2:
    663 
    664   %1 = load <8 x i16>, <8 x i16>* %a
    665   %2 = load <8 x i16>, <8 x i16>* %b
    666   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    667   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
    668   ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    669   store <8 x i16> %3, <8 x i16>* %c
    670   ; CHECK-DAG: st.h [[R3]], 0($4)
    671 
    672   ret void
    673 }
    674 
    675 define void @ilvod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    676   ; CHECK-LABEL: ilvod_v4i32_2:
    677 
    678   %1 = load <4 x i32>, <4 x i32>* %a
    679   %2 = load <4 x i32>, <4 x i32>* %b
    680   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    681   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 5, i32 7, i32 7>
    682   ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    683   store <4 x i32> %3, <4 x i32>* %c
    684   ; CHECK-DAG: st.w [[R3]], 0($4)
    685 
    686   ret void
    687 }
    688 
    689 define void @ilvod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    690   ; CHECK-LABEL: ilvod_v2i64_2:
    691 
    692   %1 = load <2 x i64>, <2 x i64>* %a
    693   %2 = load <2 x i64>, <2 x i64>* %b
    694   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    695   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
    696   ; ilvod.d with two identical operands is equivalent to splati.d
    697   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
    698   store <2 x i64> %3, <2 x i64>* %c
    699   ; CHECK-DAG: st.d [[R3]], 0($4)
    700 
    701   ret void
    702 }
    703 
    704 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    705   ; CHECK-LABEL: ilvr_v16i8_0:
    706 
    707   %1 = load <16 x i8>, <16 x i8>* %a
    708   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    709   %2 = load <16 x i8>, <16 x i8>* %b
    710   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    711   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    712                      <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    713   ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    714   store <16 x i8> %3, <16 x i8>* %c
    715   ; CHECK-DAG: st.b [[R3]], 0($4)
    716 
    717   ret void
    718 }
    719 
    720 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    721   ; CHECK-LABEL: ilvr_v8i16_0:
    722 
    723   %1 = load <8 x i16>, <8 x i16>* %a
    724   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    725   %2 = load <8 x i16>, <8 x i16>* %b
    726   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    727   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    728   ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    729   store <8 x i16> %3, <8 x i16>* %c
    730   ; CHECK-DAG: st.h [[R3]], 0($4)
    731 
    732   ret void
    733 }
    734 
    735 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    736   ; CHECK-LABEL: ilvr_v4i32_0:
    737 
    738   %1 = load <4 x i32>, <4 x i32>* %a
    739   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    740   %2 = load <4 x i32>, <4 x i32>* %b
    741   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    742   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    743   ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    744   store <4 x i32> %3, <4 x i32>* %c
    745   ; CHECK-DAG: st.w [[R3]], 0($4)
    746 
    747   ret void
    748 }
    749 
    750 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    751   ; CHECK-LABEL: ilvr_v2i64_0:
    752 
    753   %1 = load <2 x i64>, <2 x i64>* %a
    754   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    755   %2 = load <2 x i64>, <2 x i64>* %b
    756   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    757   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
    758   ; ilvr.d and ilvev.d are equivalent for v2i64
    759   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    760   store <2 x i64> %3, <2 x i64>* %c
    761   ; CHECK-DAG: st.d [[R3]], 0($4)
    762 
    763   ret void
    764 }
    765 
    766 define void @ilvr_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    767   ; CHECK-LABEL: ilvr_v16i8_1:
    768 
    769   %1 = load <16 x i8>, <16 x i8>* %a
    770   %2 = load <16 x i8>, <16 x i8>* %b
    771   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    772   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    773                      <16 x i32> <i32 16, i32 16, i32 17, i32 17, i32 18, i32 18, i32 19, i32 19, i32 20, i32 20, i32 21, i32 21, i32 22, i32 22, i32 23, i32 23>
    774   ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    775   store <16 x i8> %3, <16 x i8>* %c
    776   ; CHECK-DAG: st.b [[R3]], 0($4)
    777 
    778   ret void
    779 }
    780 
    781 define void @ilvr_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    782   ; CHECK-LABEL: ilvr_v8i16_1:
    783 
    784   %1 = load <8 x i16>, <8 x i16>* %a
    785   %2 = load <8 x i16>, <8 x i16>* %b
    786   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    787   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11>
    788   ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    789   store <8 x i16> %3, <8 x i16>* %c
    790   ; CHECK-DAG: st.h [[R3]], 0($4)
    791 
    792   ret void
    793 }
    794 
    795 define void @ilvr_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    796   ; CHECK-LABEL: ilvr_v4i32_1:
    797 
    798   %1 = load <4 x i32>, <4 x i32>* %a
    799   %2 = load <4 x i32>, <4 x i32>* %b
    800   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    801   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 4, i32 5, i32 5>
    802   ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    803   store <4 x i32> %3, <4 x i32>* %c
    804   ; CHECK-DAG: st.w [[R3]], 0($4)
    805 
    806   ret void
    807 }
    808 
    809 define void @ilvr_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    810   ; CHECK-LABEL: ilvr_v2i64_1:
    811 
    812   %1 = load <2 x i64>, <2 x i64>* %a
    813   %2 = load <2 x i64>, <2 x i64>* %b
    814   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    815   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
    816   ; ilvr.d and splati.d are equivalent for v2i64
    817   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
    818   store <2 x i64> %3, <2 x i64>* %c
    819   ; CHECK-DAG: st.d [[R3]], 0($4)
    820 
    821   ret void
    822 }
    823 
    824 define void @ilvr_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    825   ; CHECK-LABEL: ilvr_v16i8_2:
    826 
    827   %1 = load <16 x i8>, <16 x i8>* %a
    828   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    829   %2 = load <16 x i8>, <16 x i8>* %b
    830   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    831                      <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
    832   ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    833   store <16 x i8> %3, <16 x i8>* %c
    834   ; CHECK-DAG: st.b [[R3]], 0($4)
    835 
    836   ret void
    837 }
    838 
    839 define void @ilvr_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    840   ; CHECK-LABEL: ilvr_v8i16_2:
    841 
    842   %1 = load <8 x i16>, <8 x i16>* %a
    843   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    844   %2 = load <8 x i16>, <8 x i16>* %b
    845   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    846   ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    847   store <8 x i16> %3, <8 x i16>* %c
    848   ; CHECK-DAG: st.h [[R3]], 0($4)
    849 
    850   ret void
    851 }
    852 
    853 define void @ilvr_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    854   ; CHECK-LABEL: ilvr_v4i32_2:
    855 
    856   %1 = load <4 x i32>, <4 x i32>* %a
    857   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    858   %2 = load <4 x i32>, <4 x i32>* %b
    859   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
    860   ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
    861   store <4 x i32> %3, <4 x i32>* %c
    862   ; CHECK-DAG: st.w [[R3]], 0($4)
    863 
    864   ret void
    865 }
    866 
    867 define void @ilvr_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    868   ; CHECK-LABEL: ilvr_v2i64_2:
    869 
    870   %1 = load <2 x i64>, <2 x i64>* %a
    871   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    872   %2 = load <2 x i64>, <2 x i64>* %b
    873   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
    874   ; ilvr.d and splati.d are equivalent for v2i64
    875   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
    876   store <2 x i64> %3, <2 x i64>* %c
    877   ; CHECK-DAG: st.d [[R3]], 0($4)
    878 
    879   ret void
    880 }
    881 
    882 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    883   ; CHECK-LABEL: ilvl_v16i8_0:
    884 
    885   %1 = load <16 x i8>, <16 x i8>* %a
    886   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    887   %2 = load <16 x i8>, <16 x i8>* %b
    888   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    889   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    890                      <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    891   ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    892   store <16 x i8> %3, <16 x i8>* %c
    893   ; CHECK-DAG: st.b [[R3]], 0($4)
    894 
    895   ret void
    896 }
    897 
    898 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    899   ; CHECK-LABEL: ilvl_v8i16_0:
    900 
    901   %1 = load <8 x i16>, <8 x i16>* %a
    902   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    903   %2 = load <8 x i16>, <8 x i16>* %b
    904   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    905   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    906   ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    907   store <8 x i16> %3, <8 x i16>* %c
    908   ; CHECK-DAG: st.h [[R3]], 0($4)
    909 
    910   ret void
    911 }
    912 
    913 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    914   ; CHECK-LABEL: ilvl_v4i32_0:
    915 
    916   %1 = load <4 x i32>, <4 x i32>* %a
    917   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    918   %2 = load <4 x i32>, <4 x i32>* %b
    919   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    920   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    921   ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
    922   store <4 x i32> %3, <4 x i32>* %c
    923   ; CHECK-DAG: st.w [[R3]], 0($4)
    924 
    925   ret void
    926 }
    927 
    928 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    929   ; CHECK-LABEL: ilvl_v2i64_0:
    930 
    931   %1 = load <2 x i64>, <2 x i64>* %a
    932   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    933   %2 = load <2 x i64>, <2 x i64>* %b
    934   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    935   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
    936   ; ilvl.d and ilvod.d are equivalent for v2i64
    937   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    938   store <2 x i64> %3, <2 x i64>* %c
    939   ; CHECK-DAG: st.d [[R3]], 0($4)
    940 
    941   ret void
    942 }
    943 
    944 define void @ilvl_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    945   ; CHECK-LABEL: ilvl_v16i8_1:
    946 
    947   %1 = load <16 x i8>, <16 x i8>* %a
    948   %2 = load <16 x i8>, <16 x i8>* %b
    949   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    950   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    951                      <16 x i32> <i32 24, i32 24, i32 25, i32 25, i32 26, i32 26, i32 27, i32 27, i32 28, i32 28, i32 29, i32 29, i32 30, i32 30, i32 31, i32 31>
    952   ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    953   store <16 x i8> %3, <16 x i8>* %c
    954   ; CHECK-DAG: st.b [[R3]], 0($4)
    955 
    956   ret void
    957 }
    958 
    959 define void @ilvl_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    960   ; CHECK-LABEL: ilvl_v8i16_1:
    961 
    962   %1 = load <8 x i16>, <8 x i16>* %a
    963   %2 = load <8 x i16>, <8 x i16>* %b
    964   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    965   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
    966   ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    967   store <8 x i16> %3, <8 x i16>* %c
    968   ; CHECK-DAG: st.h [[R3]], 0($4)
    969 
    970   ret void
    971 }
    972 
    973 define void @ilvl_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    974   ; CHECK-LABEL: ilvl_v4i32_1:
    975 
    976   %1 = load <4 x i32>, <4 x i32>* %a
    977   %2 = load <4 x i32>, <4 x i32>* %b
    978   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    979   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 6, i32 6, i32 7, i32 7>
    980   ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
    981   store <4 x i32> %3, <4 x i32>* %c
    982   ; CHECK-DAG: st.w [[R3]], 0($4)
    983 
    984   ret void
    985 }
    986 
    987 define void @ilvl_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    988   ; CHECK-LABEL: ilvl_v2i64_1:
    989 
    990   %1 = load <2 x i64>, <2 x i64>* %a
    991   %2 = load <2 x i64>, <2 x i64>* %b
    992   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    993   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
    994   ; ilvl.d and splati.d are equivalent for v2i64
    995   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
    996   store <2 x i64> %3, <2 x i64>* %c
    997   ; CHECK-DAG: st.d [[R3]], 0($4)
    998 
    999   ret void
   1000 }
   1001 
   1002 define void @ilvl_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1003   ; CHECK-LABEL: ilvl_v16i8_2:
   1004 
   1005   %1 = load <16 x i8>, <16 x i8>* %a
   1006   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1007   %2 = load <16 x i8>, <16 x i8>* %b
   1008   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1009                      <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
   1010   ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1011   store <16 x i8> %3, <16 x i8>* %c
   1012   ; CHECK-DAG: st.b [[R3]], 0($4)
   1013 
   1014   ret void
   1015 }
   1016 
   1017 define void @ilvl_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1018   ; CHECK-LABEL: ilvl_v8i16_2:
   1019 
   1020   %1 = load <8 x i16>, <8 x i16>* %a
   1021   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1022   %2 = load <8 x i16>, <8 x i16>* %b
   1023   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
   1024   ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1025   store <8 x i16> %3, <8 x i16>* %c
   1026   ; CHECK-DAG: st.h [[R3]], 0($4)
   1027 
   1028   ret void
   1029 }
   1030 
   1031 define void @ilvl_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1032   ; CHECK-LABEL: ilvl_v4i32_2:
   1033 
   1034   %1 = load <4 x i32>, <4 x i32>* %a
   1035   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1036   %2 = load <4 x i32>, <4 x i32>* %b
   1037   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
   1038   ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1039   store <4 x i32> %3, <4 x i32>* %c
   1040   ; CHECK-DAG: st.w [[R3]], 0($4)
   1041 
   1042   ret void
   1043 }
   1044 
   1045 define void @ilvl_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1046   ; CHECK-LABEL: ilvl_v2i64_2:
   1047 
   1048   %1 = load <2 x i64>, <2 x i64>* %a
   1049   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1050   %2 = load <2 x i64>, <2 x i64>* %b
   1051   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
   1052   ; ilvl.d and splati.d are equivalent for v2i64
   1053   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
   1054   store <2 x i64> %3, <2 x i64>* %c
   1055   ; CHECK-DAG: st.d [[R3]], 0($4)
   1056 
   1057   ret void
   1058 }
   1059 
   1060 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1061   ; CHECK-LABEL: pckev_v16i8_0:
   1062 
   1063   %1 = load <16 x i8>, <16 x i8>* %a
   1064   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1065   %2 = load <16 x i8>, <16 x i8>* %b
   1066   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1067   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1068                      <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1069   ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1070   store <16 x i8> %3, <16 x i8>* %c
   1071   ; CHECK-DAG: st.b [[R3]], 0($4)
   1072 
   1073   ret void
   1074 }
   1075 
   1076 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1077   ; CHECK-LABEL: pckev_v8i16_0:
   1078 
   1079   %1 = load <8 x i16>, <8 x i16>* %a
   1080   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1081   %2 = load <8 x i16>, <8 x i16>* %b
   1082   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1083   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1084   ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1085   store <8 x i16> %3, <8 x i16>* %c
   1086   ; CHECK-DAG: st.h [[R3]], 0($4)
   1087 
   1088   ret void
   1089 }
   1090 
   1091 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1092   ; CHECK-LABEL: pckev_v4i32_0:
   1093 
   1094   %1 = load <4 x i32>, <4 x i32>* %a
   1095   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1096   %2 = load <4 x i32>, <4 x i32>* %b
   1097   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1098   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   1099   ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1100   store <4 x i32> %3, <4 x i32>* %c
   1101   ; CHECK-DAG: st.w [[R3]], 0($4)
   1102 
   1103   ret void
   1104 }
   1105 
   1106 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1107   ; CHECK-LABEL: pckev_v2i64_0:
   1108 
   1109   %1 = load <2 x i64>, <2 x i64>* %a
   1110   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1111   %2 = load <2 x i64>, <2 x i64>* %b
   1112   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1113   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
   1114   ; pckev.d and ilvev.d are equivalent for v2i64
   1115   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1116   store <2 x i64> %3, <2 x i64>* %c
   1117   ; CHECK-DAG: st.d [[R3]], 0($4)
   1118 
   1119   ret void
   1120 }
   1121 
   1122 define void @pckev_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1123   ; CHECK-LABEL: pckev_v16i8_1:
   1124 
   1125   %1 = load <16 x i8>, <16 x i8>* %a
   1126   %2 = load <16 x i8>, <16 x i8>* %b
   1127   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1128   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1129                      <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
   1130   ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1131   store <16 x i8> %3, <16 x i8>* %c
   1132   ; CHECK-DAG: st.b [[R3]], 0($4)
   1133 
   1134   ret void
   1135 }
   1136 
   1137 define void @pckev_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1138   ; CHECK-LABEL: pckev_v8i16_1:
   1139 
   1140   %1 = load <8 x i16>, <8 x i16>* %a
   1141   %2 = load <8 x i16>, <8 x i16>* %b
   1142   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1143   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 8, i32 10, i32 12, i32 14>
   1144   ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1145   store <8 x i16> %3, <8 x i16>* %c
   1146   ; CHECK-DAG: st.h [[R3]], 0($4)
   1147 
   1148   ret void
   1149 }
   1150 
   1151 define void @pckev_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1152   ; CHECK-LABEL: pckev_v4i32_1:
   1153 
   1154   %1 = load <4 x i32>, <4 x i32>* %a
   1155   %2 = load <4 x i32>, <4 x i32>* %b
   1156   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1157   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 6, i32 4, i32 6>
   1158   ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1159   store <4 x i32> %3, <4 x i32>* %c
   1160   ; CHECK-DAG: st.w [[R3]], 0($4)
   1161 
   1162   ret void
   1163 }
   1164 
   1165 define void @pckev_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1166   ; CHECK-LABEL: pckev_v2i64_1:
   1167 
   1168   %1 = load <2 x i64>, <2 x i64>* %a
   1169   %2 = load <2 x i64>, <2 x i64>* %b
   1170   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1171   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 2, i32 2>
   1172   ; pckev.d and splati.d are equivalent for v2i64
   1173   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][0]
   1174   store <2 x i64> %3, <2 x i64>* %c
   1175   ; CHECK-DAG: st.d [[R3]], 0($4)
   1176 
   1177   ret void
   1178 }
   1179 
   1180 define void @pckev_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1181   ; CHECK-LABEL: pckev_v16i8_2:
   1182 
   1183   %1 = load <16 x i8>, <16 x i8>* %a
   1184   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1185   %2 = load <16 x i8>, <16 x i8>* %b
   1186   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1187                      <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   1188   ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1189   store <16 x i8> %3, <16 x i8>* %c
   1190   ; CHECK-DAG: st.b [[R3]], 0($4)
   1191 
   1192   ret void
   1193 }
   1194 
   1195 define void @pckev_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1196   ; CHECK-LABEL: pckev_v8i16_2:
   1197 
   1198   %1 = load <8 x i16>, <8 x i16>* %a
   1199   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1200   %2 = load <8 x i16>, <8 x i16>* %b
   1201   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 0, i32 2, i32 4, i32 6>
   1202   ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1203   store <8 x i16> %3, <8 x i16>* %c
   1204   ; CHECK-DAG: st.h [[R3]], 0($4)
   1205 
   1206   ret void
   1207 }
   1208 
   1209 define void @pckev_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1210   ; CHECK-LABEL: pckev_v4i32_2:
   1211 
   1212   %1 = load <4 x i32>, <4 x i32>* %a
   1213   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1214   %2 = load <4 x i32>, <4 x i32>* %b
   1215   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
   1216   ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1217   store <4 x i32> %3, <4 x i32>* %c
   1218   ; CHECK-DAG: st.w [[R3]], 0($4)
   1219 
   1220   ret void
   1221 }
   1222 
   1223 define void @pckev_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1224   ; CHECK-LABEL: pckev_v2i64_2:
   1225 
   1226   %1 = load <2 x i64>, <2 x i64>* %a
   1227   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1228   %2 = load <2 x i64>, <2 x i64>* %b
   1229   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 0>
   1230   ; pckev.d and splati.d are equivalent for v2i64
   1231   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][0]
   1232   store <2 x i64> %3, <2 x i64>* %c
   1233   ; CHECK-DAG: st.d [[R3]], 0($4)
   1234 
   1235   ret void
   1236 }
   1237 
   1238 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1239   ; CHECK-LABEL: pckod_v16i8_0:
   1240 
   1241   %1 = load <16 x i8>, <16 x i8>* %a
   1242   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1243   %2 = load <16 x i8>, <16 x i8>* %b
   1244   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1245   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1246                      <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
   1247   ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1248   store <16 x i8> %3, <16 x i8>* %c
   1249   ; CHECK-DAG: st.b [[R3]], 0($4)
   1250 
   1251   ret void
   1252 }
   1253 
   1254 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1255   ; CHECK-LABEL: pckod_v8i16_0:
   1256 
   1257   %1 = load <8 x i16>, <8 x i16>* %a
   1258   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1259   %2 = load <8 x i16>, <8 x i16>* %b
   1260   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1261   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1262   ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1263   store <8 x i16> %3, <8 x i16>* %c
   1264   ; CHECK-DAG: st.h [[R3]], 0($4)
   1265 
   1266   ret void
   1267 }
   1268 
   1269 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1270   ; CHECK-LABEL: pckod_v4i32_0:
   1271 
   1272   %1 = load <4 x i32>, <4 x i32>* %a
   1273   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1274   %2 = load <4 x i32>, <4 x i32>* %b
   1275   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1276   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   1277   ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R1]]
   1278   store <4 x i32> %3, <4 x i32>* %c
   1279   ; CHECK-DAG: st.w [[R3]], 0($4)
   1280 
   1281   ret void
   1282 }
   1283 
   1284 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1285   ; CHECK-LABEL: pckod_v2i64_0:
   1286 
   1287   %1 = load <2 x i64>, <2 x i64>* %a
   1288   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1289   %2 = load <2 x i64>, <2 x i64>* %b
   1290   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1291   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   1292   ; pckod.d and ilvod.d are equivalent for v2i64
   1293   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
   1294   store <2 x i64> %3, <2 x i64>* %c
   1295   ; CHECK-DAG: st.d [[R3]], 0($4)
   1296 
   1297   ret void
   1298 }
   1299 
   1300 define void @pckod_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1301   ; CHECK-LABEL: pckod_v16i8_1:
   1302 
   1303   %1 = load <16 x i8>, <16 x i8>* %a
   1304   %2 = load <16 x i8>, <16 x i8>* %b
   1305   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   1306   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1307                      <16 x i32> <i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
   1308   ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1309   store <16 x i8> %3, <16 x i8>* %c
   1310   ; CHECK-DAG: st.b [[R3]], 0($4)
   1311 
   1312   ret void
   1313 }
   1314 
   1315 define void @pckod_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1316   ; CHECK-LABEL: pckod_v8i16_1:
   1317 
   1318   %1 = load <8 x i16>, <8 x i16>* %a
   1319   %2 = load <8 x i16>, <8 x i16>* %b
   1320   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   1321   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 9, i32 11, i32 13, i32 15>
   1322   ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1323   store <8 x i16> %3, <8 x i16>* %c
   1324   ; CHECK-DAG: st.h [[R3]], 0($4)
   1325 
   1326   ret void
   1327 }
   1328 
   1329 define void @pckod_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1330   ; CHECK-LABEL: pckod_v4i32_1:
   1331 
   1332   %1 = load <4 x i32>, <4 x i32>* %a
   1333   %2 = load <4 x i32>, <4 x i32>* %b
   1334   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   1335   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 5, i32 7, i32 5, i32 7>
   1336   ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R2]], [[R2]]
   1337   store <4 x i32> %3, <4 x i32>* %c
   1338   ; CHECK-DAG: st.w [[R3]], 0($4)
   1339 
   1340   ret void
   1341 }
   1342 
   1343 define void @pckod_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1344   ; CHECK-LABEL: pckod_v2i64_1:
   1345 
   1346   %1 = load <2 x i64>, <2 x i64>* %a
   1347   %2 = load <2 x i64>, <2 x i64>* %b
   1348   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   1349   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 3>
   1350   ; pckod.d and splati.d are equivalent for v2i64
   1351   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R2]][1]
   1352   store <2 x i64> %3, <2 x i64>* %c
   1353   ; CHECK-DAG: st.d [[R3]], 0($4)
   1354 
   1355   ret void
   1356 }
   1357 
   1358 define void @pckod_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   1359   ; CHECK-LABEL: pckod_v16i8_2:
   1360 
   1361   %1 = load <16 x i8>, <16 x i8>* %a
   1362   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1363   %2 = load <16 x i8>, <16 x i8>* %b
   1364   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
   1365                      <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   1366   ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1367   store <16 x i8> %3, <16 x i8>* %c
   1368   ; CHECK-DAG: st.b [[R3]], 0($4)
   1369 
   1370   ret void
   1371 }
   1372 
   1373 define void @pckod_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   1374   ; CHECK-LABEL: pckod_v8i16_2:
   1375 
   1376   %1 = load <8 x i16>, <8 x i16>* %a
   1377   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1378   %2 = load <8 x i16>, <8 x i16>* %b
   1379   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 1, i32 3, i32 5, i32 7>
   1380   ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1381   store <8 x i16> %3, <8 x i16>* %c
   1382   ; CHECK-DAG: st.h [[R3]], 0($4)
   1383 
   1384   ret void
   1385 }
   1386 
   1387 define void @pckod_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   1388   ; CHECK-LABEL: pckod_v4i32_2:
   1389 
   1390   %1 = load <4 x i32>, <4 x i32>* %a
   1391   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1392   %2 = load <4 x i32>, <4 x i32>* %b
   1393   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
   1394   ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
   1395   store <4 x i32> %3, <4 x i32>* %c
   1396   ; CHECK-DAG: st.w [[R3]], 0($4)
   1397 
   1398   ret void
   1399 }
   1400 
   1401 define void @pckod_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   1402   ; CHECK-LABEL: pckod_v2i64_2:
   1403 
   1404   %1 = load <2 x i64>, <2 x i64>* %a
   1405   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1406   %2 = load <2 x i64>, <2 x i64>* %b
   1407   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 1>
   1408   ; pckod.d and splati.d are equivalent for v2i64
   1409   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
   1410   store <2 x i64> %3, <2 x i64>* %c
   1411   ; CHECK-DAG: st.d [[R3]], 0($4)
   1412 
   1413   ret void
   1414 }
   1415 
   1416 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   1417   ; CHECK-LABEL: splati_v16i8_0:
   1418 
   1419   %1 = load <16 x i8>, <16 x i8>* %a
   1420   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   1421   %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
   1422                      <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   1423   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
   1424   store <16 x i8> %2, <16 x i8>* %c
   1425   ; CHECK-DAG: st.b [[R3]], 0($4)
   1426 
   1427   ret void
   1428 }
   1429 
   1430 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   1431   ; CHECK-LABEL: splati_v8i16_0:
   1432 
   1433   %1 = load <8 x i16>, <8 x i16>* %a
   1434   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   1435   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   1436   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
   1437   store <8 x i16> %2, <8 x i16>* %c
   1438   ; CHECK-DAG: st.h [[R3]], 0($4)
   1439 
   1440   ret void
   1441 }
   1442 
   1443 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   1444   ; CHECK-LABEL: splati_v4i32_0:
   1445 
   1446   %1 = load <4 x i32>, <4 x i32>* %a
   1447   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   1448   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   1449   ; CHECK-DAG: splati.w [[R3:\$w[0-9]+]], [[R1]][3]
   1450   store <4 x i32> %2, <4 x i32>* %c
   1451   ; CHECK-DAG: st.w [[R3]], 0($4)
   1452 
   1453   ret void
   1454 }
   1455 
   1456 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   1457   ; CHECK-LABEL: splati_v2i64_0:
   1458 
   1459   %1 = load <2 x i64>, <2 x i64>* %a
   1460   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   1461   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   1462   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
   1463   store <2 x i64> %2, <2 x i64>* %c
   1464   ; CHECK-DAG: st.d [[R3]], 0($4)
   1465 
   1466   ret void
   1467 }
   1468