Home | History | Annotate | Download | only in msa
      1 ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s
      2 ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s
      3 
      4 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
      5   ; CHECK: vshf_v16i8_0:
      6 
      7   %1 = load <16 x i8>* %a
      8   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
      9   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     10   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     11   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     12   ; CHECK-DAG: vshf.b [[R3]], [[R1]], [[R1]]
     13   store <16 x i8> %2, <16 x i8>* %c
     14   ; CHECK-DAG: st.b [[R3]], 0($4)
     15 
     16   ret void
     17   ; CHECK: .size vshf_v16i8_0
     18 }
     19 
     20 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     21   ; CHECK: vshf_v16i8_1:
     22 
     23   %1 = load <16 x i8>* %a
     24   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     25   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
     26   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
     27   store <16 x i8> %2, <16 x i8>* %c
     28   ; CHECK-DAG: st.b [[R3]], 0($4)
     29 
     30   ret void
     31   ; CHECK: .size vshf_v16i8_1
     32 }
     33 
     34 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     35   ; CHECK: vshf_v16i8_2:
     36 
     37   %1 = load <16 x i8>* %a
     38   %2 = load <16 x i8>* %b
     39   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     40   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
     41   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     42   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     43   ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R2]]
     44   store <16 x i8> %3, <16 x i8>* %c
     45   ; CHECK-DAG: st.b [[R3]], 0($4)
     46 
     47   ret void
     48   ; CHECK: .size vshf_v16i8_2
     49 }
     50 
     51 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     52   ; CHECK: vshf_v16i8_3:
     53 
     54   %1 = load <16 x i8>* %a
     55   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     56   %2 = load <16 x i8>* %b
     57   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
     58   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
     59   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     60   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[PTR_A]])
     61   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
     62   ; the operands to get the right answer.
     63   ; CHECK-DAG: vshf.b [[R3]], [[R2]], [[R1]]
     64   store <16 x i8> %3, <16 x i8>* %c
     65   ; CHECK-DAG: st.b [[R3]], 0($4)
     66 
     67   ret void
     68   ; CHECK: .size vshf_v16i8_3
     69 }
     70 
     71 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
     72   ; CHECK: vshf_v16i8_4:
     73 
     74   %1 = load <16 x i8>* %a
     75   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
     76   %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
     77   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
     78   store <16 x i8> %2, <16 x i8>* %c
     79   ; CHECK-DAG: st.b [[R3]], 0($4)
     80 
     81   ret void
     82   ; CHECK: .size vshf_v16i8_4
     83 }
     84 
     85 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
     86   ; CHECK: vshf_v8i16_0:
     87 
     88   %1 = load <8 x i16>* %a
     89   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
     90   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
     91   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
     92   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
     93   ; CHECK-DAG: vshf.h [[R3]], [[R1]], [[R1]]
     94   store <8 x i16> %2, <8 x i16>* %c
     95   ; CHECK-DAG: st.h [[R3]], 0($4)
     96 
     97   ret void
     98   ; CHECK: .size vshf_v8i16_0
     99 }
    100 
    101 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    102   ; CHECK: vshf_v8i16_1:
    103 
    104   %1 = load <8 x i16>* %a
    105   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    106   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
    107   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
    108   store <8 x i16> %2, <8 x i16>* %c
    109   ; CHECK-DAG: st.h [[R3]], 0($4)
    110 
    111   ret void
    112   ; CHECK: .size vshf_v8i16_1
    113 }
    114 
    115 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    116   ; CHECK: vshf_v8i16_2:
    117 
    118   %1 = load <8 x i16>* %a
    119   %2 = load <8 x i16>* %b
    120   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    121   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
    122   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    123   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
    124   ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R2]]
    125   store <8 x i16> %3, <8 x i16>* %c
    126   ; CHECK-DAG: st.h [[R3]], 0($4)
    127 
    128   ret void
    129   ; CHECK: .size vshf_v8i16_2
    130 }
    131 
    132 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    133   ; CHECK: vshf_v8i16_3:
    134 
    135   %1 = load <8 x i16>* %a
    136   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    137   %2 = load <8 x i16>* %b
    138   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    139   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
    140   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    141   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[PTR_A]])
    142   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    143   ; the operands to get the right answer.
    144   ; CHECK-DAG: vshf.h [[R3]], [[R2]], [[R1]]
    145   store <8 x i16> %3, <8 x i16>* %c
    146   ; CHECK-DAG: st.h [[R3]], 0($4)
    147 
    148   ret void
    149   ; CHECK: .size vshf_v8i16_3
    150 }
    151 
    152 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    153   ; CHECK: vshf_v8i16_4:
    154 
    155   %1 = load <8 x i16>* %a
    156   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    157   %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
    158   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
    159   store <8 x i16> %2, <8 x i16>* %c
    160   ; CHECK-DAG: st.h [[R3]], 0($4)
    161 
    162   ret void
    163   ; CHECK: .size vshf_v8i16_4
    164 }
    165 
    166 ; Note: v4i32 only has one 4-element set so it's impossible to get a vshf.w
    167 ; instruction when using a single vector.
    168 
    169 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    170   ; CHECK: vshf_v4i32_0:
    171 
    172   %1 = load <4 x i32>* %a
    173   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    174   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    175   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
    176   store <4 x i32> %2, <4 x i32>* %c
    177   ; CHECK-DAG: st.w [[R3]], 0($4)
    178 
    179   ret void
    180   ; CHECK: .size vshf_v4i32_0
    181 }
    182 
    183 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    184   ; CHECK: vshf_v4i32_1:
    185 
    186   %1 = load <4 x i32>* %a
    187   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    188   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    189   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
    190   store <4 x i32> %2, <4 x i32>* %c
    191   ; CHECK-DAG: st.w [[R3]], 0($4)
    192 
    193   ret void
    194   ; CHECK: .size vshf_v4i32_1
    195 }
    196 
    197 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    198   ; CHECK: vshf_v4i32_2:
    199 
    200   %1 = load <4 x i32>* %a
    201   %2 = load <4 x i32>* %b
    202   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    203   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
    204   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
    205   store <4 x i32> %3, <4 x i32>* %c
    206   ; CHECK-DAG: st.w [[R3]], 0($4)
    207 
    208   ret void
    209   ; CHECK: .size vshf_v4i32_2
    210 }
    211 
    212 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    213   ; CHECK: vshf_v4i32_3:
    214 
    215   %1 = load <4 x i32>* %a
    216   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    217   %2 = load <4 x i32>* %b
    218   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    219   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
    220   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    221   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[PTR_A]])
    222   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    223   ; the operands to get the right answer.
    224   ; CHECK-DAG: vshf.w [[R3]], [[R2]], [[R1]]
    225   store <4 x i32> %3, <4 x i32>* %c
    226   ; CHECK-DAG: st.w [[R3]], 0($4)
    227 
    228   ret void
    229   ; CHECK: .size vshf_v4i32_3
    230 }
    231 
    232 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    233   ; CHECK: vshf_v4i32_4:
    234 
    235   %1 = load <4 x i32>* %a
    236   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    237   %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
    238   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
    239   store <4 x i32> %2, <4 x i32>* %c
    240   ; CHECK-DAG: st.w [[R3]], 0($4)
    241 
    242   ret void
    243   ; CHECK: .size vshf_v4i32_4
    244 }
    245 
    246 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    247   ; CHECK: vshf_v2i64_0:
    248 
    249   %1 = load <2 x i64>* %a
    250   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    251   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
    252   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    253   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    254   ; CHECK-DAG: vshf.d [[R3]], [[R1]], [[R1]]
    255   store <2 x i64> %2, <2 x i64>* %c
    256   ; CHECK-DAG: st.d [[R3]], 0($4)
    257 
    258   ret void
    259   ; CHECK: .size vshf_v2i64_0
    260 }
    261 
    262 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    263   ; CHECK: vshf_v2i64_1:
    264 
    265   %1 = load <2 x i64>* %a
    266   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    267   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    268   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    269   store <2 x i64> %2, <2 x i64>* %c
    270   ; CHECK-DAG: st.d [[R3]], 0($4)
    271 
    272   ret void
    273   ; CHECK: .size vshf_v2i64_1
    274 }
    275 
    276 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    277   ; CHECK: vshf_v2i64_2:
    278 
    279   %1 = load <2 x i64>* %a
    280   %2 = load <2 x i64>* %b
    281   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    282   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
    283   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    284   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    285   ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R2]]
    286   store <2 x i64> %3, <2 x i64>* %c
    287   ; CHECK-DAG: st.d [[R3]], 0($4)
    288 
    289   ret void
    290   ; CHECK: .size vshf_v2i64_2
    291 }
    292 
    293 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    294   ; CHECK: vshf_v2i64_3:
    295 
    296   %1 = load <2 x i64>* %a
    297   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    298   %2 = load <2 x i64>* %b
    299   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    300   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
    301   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
    302   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[PTR_A]])
    303   ; The concatenation step of vshf is bitwise not vectorwise so we must reverse
    304   ; the operands to get the right answer.
    305   ; CHECK-DAG: vshf.d [[R3]], [[R2]], [[R1]]
    306   store <2 x i64> %3, <2 x i64>* %c
    307   ; CHECK-DAG: st.d [[R3]], 0($4)
    308 
    309   ret void
    310   ; CHECK: .size vshf_v2i64_3
    311 }
    312 
    313 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    314   ; CHECK: vshf_v2i64_4:
    315 
    316   %1 = load <2 x i64>* %a
    317   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    318   %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
    319   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    320   store <2 x i64> %2, <2 x i64>* %c
    321   ; CHECK-DAG: st.d [[R3]], 0($4)
    322 
    323   ret void
    324   ; CHECK: .size vshf_v2i64_4
    325 }
    326 
    327 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    328   ; CHECK: shf_v16i8_0:
    329 
    330   %1 = load <16 x i8>* %a
    331   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    332   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
    333   ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
    334   store <16 x i8> %2, <16 x i8>* %c
    335   ; CHECK-DAG: st.b [[R3]], 0($4)
    336 
    337   ret void
    338   ; CHECK: .size shf_v16i8_0
    339 }
    340 
    341 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    342   ; CHECK: shf_v8i16_0:
    343 
    344   %1 = load <8 x i16>* %a
    345   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    346   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    347   ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
    348   store <8 x i16> %2, <8 x i16>* %c
    349   ; CHECK-DAG: st.h [[R3]], 0($4)
    350 
    351   ret void
    352   ; CHECK: .size shf_v8i16_0
    353 }
    354 
    355 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    356   ; CHECK: shf_v4i32_0:
    357 
    358   %1 = load <4 x i32>* %a
    359   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    360   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    361   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
    362   store <4 x i32> %2, <4 x i32>* %c
    363   ; CHECK-DAG: st.w [[R3]], 0($4)
    364 
    365   ret void
    366   ; CHECK: .size shf_v4i32_0
    367 }
    368 
    369 ; shf.d does not exist
    370 
    371 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    372   ; CHECK: ilvev_v16i8_0:
    373 
    374   %1 = load <16 x i8>* %a
    375   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    376   %2 = load <16 x i8>* %b
    377   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    378   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    379                      <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
    380   ; CHECK-DAG: ilvev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    381   store <16 x i8> %3, <16 x i8>* %c
    382   ; CHECK-DAG: st.b [[R3]], 0($4)
    383 
    384   ret void
    385   ; CHECK: .size ilvev_v16i8_0
    386 }
    387 
    388 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    389   ; CHECK: ilvev_v8i16_0:
    390 
    391   %1 = load <8 x i16>* %a
    392   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    393   %2 = load <8 x i16>* %b
    394   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    395   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
    396   ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    397   store <8 x i16> %3, <8 x i16>* %c
    398   ; CHECK-DAG: st.h [[R3]], 0($4)
    399 
    400   ret void
    401   ; CHECK: .size ilvev_v8i16_0
    402 }
    403 
    404 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    405   ; CHECK: ilvev_v4i32_0:
    406 
    407   %1 = load <4 x i32>* %a
    408   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    409   %2 = load <4 x i32>* %b
    410   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    411   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    412   ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    413   store <4 x i32> %3, <4 x i32>* %c
    414   ; CHECK-DAG: st.w [[R3]], 0($4)
    415 
    416   ret void
    417   ; CHECK: .size ilvev_v4i32_0
    418 }
    419 
    420 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    421   ; CHECK: ilvev_v2i64_0:
    422 
    423   %1 = load <2 x i64>* %a
    424   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    425   %2 = load <2 x i64>* %b
    426   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    427   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
    428   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    429   store <2 x i64> %3, <2 x i64>* %c
    430   ; CHECK-DAG: st.d [[R3]], 0($4)
    431 
    432   ret void
    433   ; CHECK: .size ilvev_v2i64_0
    434 }
    435 
    436 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    437   ; CHECK: ilvod_v16i8_0:
    438 
    439   %1 = load <16 x i8>* %a
    440   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    441   %2 = load <16 x i8>* %b
    442   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    443   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    444                      <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
    445   ; CHECK-DAG: ilvod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    446   store <16 x i8> %3, <16 x i8>* %c
    447   ; CHECK-DAG: st.b [[R3]], 0($4)
    448 
    449   ret void
    450   ; CHECK: .size ilvod_v16i8_0
    451 }
    452 
    453 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    454   ; CHECK: ilvod_v8i16_0:
    455 
    456   %1 = load <8 x i16>* %a
    457   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    458   %2 = load <8 x i16>* %b
    459   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    460   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
    461   ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    462   store <8 x i16> %3, <8 x i16>* %c
    463   ; CHECK-DAG: st.h [[R3]], 0($4)
    464 
    465   ret void
    466   ; CHECK: .size ilvod_v8i16_0
    467 }
    468 
    469 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    470   ; CHECK: ilvod_v4i32_0:
    471 
    472   %1 = load <4 x i32>* %a
    473   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    474   %2 = load <4 x i32>* %b
    475   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    476   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    477   ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    478   store <4 x i32> %3, <4 x i32>* %c
    479   ; CHECK-DAG: st.w [[R3]], 0($4)
    480 
    481   ret void
    482   ; CHECK: .size ilvod_v4i32_0
    483 }
    484 
    485 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    486   ; CHECK: ilvod_v2i64_0:
    487 
    488   %1 = load <2 x i64>* %a
    489   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    490   %2 = load <2 x i64>* %b
    491   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    492   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
    493   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    494   store <2 x i64> %3, <2 x i64>* %c
    495   ; CHECK-DAG: st.d [[R3]], 0($4)
    496 
    497   ret void
    498   ; CHECK: .size ilvod_v2i64_0
    499 }
    500 
    501 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    502   ; CHECK: ilvl_v16i8_0:
    503 
    504   %1 = load <16 x i8>* %a
    505   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    506   %2 = load <16 x i8>* %b
    507   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    508   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    509                      <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
    510   ; CHECK-DAG: ilvl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    511   store <16 x i8> %3, <16 x i8>* %c
    512   ; CHECK-DAG: st.b [[R3]], 0($4)
    513 
    514   ret void
    515   ; CHECK: .size ilvl_v16i8_0
    516 }
    517 
    518 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    519   ; CHECK: ilvl_v8i16_0:
    520 
    521   %1 = load <8 x i16>* %a
    522   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    523   %2 = load <8 x i16>* %b
    524   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    525   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    526   ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    527   store <8 x i16> %3, <8 x i16>* %c
    528   ; CHECK-DAG: st.h [[R3]], 0($4)
    529 
    530   ret void
    531   ; CHECK: .size ilvl_v8i16_0
    532 }
    533 
    534 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    535   ; CHECK: ilvl_v4i32_0:
    536 
    537   %1 = load <4 x i32>* %a
    538   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    539   %2 = load <4 x i32>* %b
    540   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    541   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    542   ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    543   store <4 x i32> %3, <4 x i32>* %c
    544   ; CHECK-DAG: st.w [[R3]], 0($4)
    545 
    546   ret void
    547   ; CHECK: .size ilvl_v4i32_0
    548 }
    549 
    550 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    551   ; CHECK: ilvl_v2i64_0:
    552 
    553   %1 = load <2 x i64>* %a
    554   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    555   %2 = load <2 x i64>* %b
    556   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    557   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
    558   ; ilvl.d and ilvev.d are equivalent for v2i64
    559   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    560   store <2 x i64> %3, <2 x i64>* %c
    561   ; CHECK-DAG: st.d [[R3]], 0($4)
    562 
    563   ret void
    564   ; CHECK: .size ilvl_v2i64_0
    565 }
    566 
    567 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    568   ; CHECK: ilvr_v16i8_0:
    569 
    570   %1 = load <16 x i8>* %a
    571   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    572   %2 = load <16 x i8>* %b
    573   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    574   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    575                      <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
    576   ; CHECK-DAG: ilvr.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    577   store <16 x i8> %3, <16 x i8>* %c
    578   ; CHECK-DAG: st.b [[R3]], 0($4)
    579 
    580   ret void
    581   ; CHECK: .size ilvr_v16i8_0
    582 }
    583 
    584 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    585   ; CHECK: ilvr_v8i16_0:
    586 
    587   %1 = load <8 x i16>* %a
    588   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    589   %2 = load <8 x i16>* %b
    590   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    591   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
    592   ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    593   store <8 x i16> %3, <8 x i16>* %c
    594   ; CHECK-DAG: st.h [[R3]], 0($4)
    595 
    596   ret void
    597   ; CHECK: .size ilvr_v8i16_0
    598 }
    599 
    600 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    601   ; CHECK: ilvr_v4i32_0:
    602 
    603   %1 = load <4 x i32>* %a
    604   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    605   %2 = load <4 x i32>* %b
    606   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    607   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
    608   ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    609   store <4 x i32> %3, <4 x i32>* %c
    610   ; CHECK-DAG: st.w [[R3]], 0($4)
    611 
    612   ret void
    613   ; CHECK: .size ilvr_v4i32_0
    614 }
    615 
    616 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    617   ; CHECK: ilvr_v2i64_0:
    618 
    619   %1 = load <2 x i64>* %a
    620   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    621   %2 = load <2 x i64>* %b
    622   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    623   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
    624   ; ilvr.d and ilvod.d are equivalent for v2i64
    625   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    626   store <2 x i64> %3, <2 x i64>* %c
    627   ; CHECK-DAG: st.d [[R3]], 0($4)
    628 
    629   ret void
    630   ; CHECK: .size ilvr_v2i64_0
    631 }
    632 
    633 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    634   ; CHECK: pckev_v16i8_0:
    635 
    636   %1 = load <16 x i8>* %a
    637   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    638   %2 = load <16 x i8>* %b
    639   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    640   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    641                      <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
    642   ; CHECK-DAG: pckev.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    643   store <16 x i8> %3, <16 x i8>* %c
    644   ; CHECK-DAG: st.b [[R3]], 0($4)
    645 
    646   ret void
    647   ; CHECK: .size pckev_v16i8_0
    648 }
    649 
    650 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    651   ; CHECK: pckev_v8i16_0:
    652 
    653   %1 = load <8 x i16>* %a
    654   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    655   %2 = load <8 x i16>* %b
    656   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    657   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
    658   ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    659   store <8 x i16> %3, <8 x i16>* %c
    660   ; CHECK-DAG: st.h [[R3]], 0($4)
    661 
    662   ret void
    663   ; CHECK: .size pckev_v8i16_0
    664 }
    665 
    666 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    667   ; CHECK: pckev_v4i32_0:
    668 
    669   %1 = load <4 x i32>* %a
    670   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    671   %2 = load <4 x i32>* %b
    672   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    673   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
    674   ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    675   store <4 x i32> %3, <4 x i32>* %c
    676   ; CHECK-DAG: st.w [[R3]], 0($4)
    677 
    678   ret void
    679   ; CHECK: .size pckev_v4i32_0
    680 }
    681 
    682 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    683   ; CHECK: pckev_v2i64_0:
    684 
    685   %1 = load <2 x i64>* %a
    686   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    687   %2 = load <2 x i64>* %b
    688   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    689   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
    690   ; pckev.d and ilvev.d are equivalent for v2i64
    691   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    692   store <2 x i64> %3, <2 x i64>* %c
    693   ; CHECK-DAG: st.d [[R3]], 0($4)
    694 
    695   ret void
    696   ; CHECK: .size pckev_v2i64_0
    697 }
    698 
    699 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
    700   ; CHECK: pckod_v16i8_0:
    701 
    702   %1 = load <16 x i8>* %a
    703   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    704   %2 = load <16 x i8>* %b
    705   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
    706   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
    707                      <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
    708   ; CHECK-DAG: pckod.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    709   store <16 x i8> %3, <16 x i8>* %c
    710   ; CHECK-DAG: st.b [[R3]], 0($4)
    711 
    712   ret void
    713   ; CHECK: .size pckod_v16i8_0
    714 }
    715 
    716 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
    717   ; CHECK: pckod_v8i16_0:
    718 
    719   %1 = load <8 x i16>* %a
    720   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    721   %2 = load <8 x i16>* %b
    722   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
    723   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    724   ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    725   store <8 x i16> %3, <8 x i16>* %c
    726   ; CHECK-DAG: st.h [[R3]], 0($4)
    727 
    728   ret void
    729   ; CHECK: .size pckod_v8i16_0
    730 }
    731 
    732 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
    733   ; CHECK: pckod_v4i32_0:
    734 
    735   %1 = load <4 x i32>* %a
    736   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    737   %2 = load <4 x i32>* %b
    738   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
    739   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
    740   ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    741   store <4 x i32> %3, <4 x i32>* %c
    742   ; CHECK-DAG: st.w [[R3]], 0($4)
    743 
    744   ret void
    745   ; CHECK: .size pckod_v4i32_0
    746 }
    747 
    748 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
    749   ; CHECK: pckod_v2i64_0:
    750 
    751   %1 = load <2 x i64>* %a
    752   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    753   %2 = load <2 x i64>* %b
    754   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
    755   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
    756   ; pckod.d and ilvod.d are equivalent for v2i64
    757   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
    758   store <2 x i64> %3, <2 x i64>* %c
    759   ; CHECK-DAG: st.d [[R3]], 0($4)
    760 
    761   ret void
    762   ; CHECK: .size pckod_v2i64_0
    763 }
    764 
    765 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
    766   ; CHECK: splati_v16i8_0:
    767 
    768   %1 = load <16 x i8>* %a
    769   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
    770   %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
    771                      <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    772   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][4]
    773   store <16 x i8> %2, <16 x i8>* %c
    774   ; CHECK-DAG: st.b [[R3]], 0($4)
    775 
    776   ret void
    777   ; CHECK: .size splati_v16i8_0
    778 }
    779 
    780 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
    781   ; CHECK: splati_v8i16_0:
    782 
    783   %1 = load <8 x i16>* %a
    784   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
    785   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    786   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
    787   store <8 x i16> %2, <8 x i16>* %c
    788   ; CHECK-DAG: st.h [[R3]], 0($4)
    789 
    790   ret void
    791   ; CHECK: .size splati_v8i16_0
    792 }
    793 
    794 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
    795   ; CHECK: splati_v4i32_0:
    796 
    797   %1 = load <4 x i32>* %a
    798   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
    799   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    800   ; shf.w and splati.w are equivalent
    801   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 255
    802   store <4 x i32> %2, <4 x i32>* %c
    803   ; CHECK-DAG: st.w [[R3]], 0($4)
    804 
    805   ret void
    806   ; CHECK: .size splati_v4i32_0
    807 }
    808 
    809 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
    810   ; CHECK: splati_v2i64_0:
    811 
    812   %1 = load <2 x i64>* %a
    813   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
    814   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
    815   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
    816   store <2 x i64> %2, <2 x i64>* %c
    817   ; CHECK-DAG: st.d [[R3]], 0($4)
    818 
    819   ret void
    820   ; CHECK: .size splati_v2i64_0
    821 }
    822