Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 
      4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
      5 ; CHECK-LABEL: ins16bw:
      6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
      7   %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
      8   ret <16 x i8> %tmp3
      9 }
     10 
     11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
     12 ; CHECK-LABEL: ins8hw:
     13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
     14   %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
     15   ret <8 x i16> %tmp3
     16 }
     17 
     18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
     19 ; CHECK-LABEL: ins4sw:
     20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
     21   %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
     22   ret <4 x i32> %tmp3
     23 }
     24 
     25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
     26 ; CHECK-LABEL: ins2dw:
     27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
     28   %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
     29   ret <2 x i64> %tmp3
     30 }
     31 
     32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
     33 ; CHECK-LABEL: ins8bw:
     34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
     35   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
     36   ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
     40 ; CHECK-LABEL: ins4hw:
     41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
     42   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
     43   ret <4 x i16> %tmp3
     44 }
     45 
     46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
     47 ; CHECK-LABEL: ins2sw:
     48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
     49   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
     50   ret <2 x i32> %tmp3
     51 }
     52 
     53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
     54 ; CHECK-LABEL: ins16b16:
     55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
     56   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
     57   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
     58   ret <16 x i8> %tmp4
     59 }
     60 
     61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
     62 ; CHECK-LABEL: ins8h8:
     63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
     64   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
     65   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
     66   ret <8 x i16> %tmp4
     67 }
     68 
     69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
     70 ; CHECK-LABEL: ins4s4:
     71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     72   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
     73   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
     74   ret <4 x i32> %tmp4
     75 }
     76 
     77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
     78 ; CHECK-LABEL: ins2d2:
     79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     80   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
     81   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
     82   ret <2 x i64> %tmp4
     83 }
     84 
     85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
     86 ; CHECK-LABEL: ins4f4:
     87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     88   %tmp3 = extractelement <4 x float> %tmp1, i32 2
     89   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
     90   ret <4 x float> %tmp4
     91 }
     92 
     93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
     94 ; CHECK-LABEL: ins2df2:
     95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     96   %tmp3 = extractelement <2 x double> %tmp1, i32 0
     97   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
     98   ret <2 x double> %tmp4
     99 }
    100 
    101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
    102 ; CHECK-LABEL: ins8b16:
    103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
    104   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    105   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
    106   ret <16 x i8> %tmp4
    107 }
    108 
    109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
    110 ; CHECK-LABEL: ins4h8:
    111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
    112   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    113   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
    114   ret <8 x i16> %tmp4
    115 }
    116 
    117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
    118 ; CHECK-LABEL: ins2s4:
    119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    120   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    121   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
    122   ret <4 x i32> %tmp4
    123 }
    124 
    125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
    126 ; CHECK-LABEL: ins1d2:
    127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    128   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    129   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
    130   ret <2 x i64> %tmp4
    131 }
    132 
    133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
    134 ; CHECK-LABEL: ins2f4:
    135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    136   %tmp3 = extractelement <2 x float> %tmp1, i32 1
    137   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
    138   ret <4 x float> %tmp4
    139 }
    140 
    141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
    142 ; CHECK-LABEL: ins1f2:
    143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    144   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    145   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
    146   ret <2 x double> %tmp4
    147 }
    148 
    149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
    150 ; CHECK-LABEL: ins16b8:
    151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
    152   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
    153   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
    154   ret <8 x i8> %tmp4
    155 }
    156 
    157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
    158 ; CHECK-LABEL: ins8h4:
    159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    160   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    161   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    162   ret <4 x i16> %tmp4
    163 }
    164 
    165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
    166 ; CHECK-LABEL: ins4s2:
    167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    168   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    169   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    170   ret <2 x i32> %tmp4
    171 }
    172 
    173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
    174 ; CHECK-LABEL: ins2d1:
    175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    176   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
    177   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    178   ret <1 x i64> %tmp4
    179 }
    180 
    181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
    182 ; CHECK-LABEL: ins4f2:
    183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    184   %tmp3 = extractelement <4 x float> %tmp1, i32 2
    185   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    186   ret <2 x float> %tmp4
    187 }
    188 
    189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
    190 ; CHECK-LABEL: ins2f1:
    191 ; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
    192   %tmp3 = extractelement <2 x double> %tmp1, i32 1
    193   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    194   ret <1 x double> %tmp4
    195 }
    196 
    197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
    198 ; CHECK-LABEL: ins8b8:
    199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
    200   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    201   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
    202   ret <8 x i8> %tmp4
    203 }
    204 
    205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
    206 ; CHECK-LABEL: ins4h4:
    207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    208   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    209   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    210   ret <4 x i16> %tmp4
    211 }
    212 
    213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
    214 ; CHECK-LABEL: ins2s2:
    215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    216   %tmp3 = extractelement <2 x i32> %tmp1, i32 0
    217   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    218   ret <2 x i32> %tmp4
    219 }
    220 
    221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
    222 ; CHECK-LABEL: ins1d1:
    223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    224   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    225   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    226   ret <1 x i64> %tmp4
    227 }
    228 
    229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
    230 ; CHECK-LABEL: ins2f2:
    231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    232   %tmp3 = extractelement <2 x float> %tmp1, i32 0
    233   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    234   ret <2 x float> %tmp4
    235 }
    236 
    237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
    238 ; CHECK-LABEL: ins1df1:
    239 ; CHECK-NOT: ins {{v[0-9]+}}
    240   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    241   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    242   ret <1 x double> %tmp4
    243 }
    244 
    245 define i32 @umovw16b(<16 x i8> %tmp1) {
    246 ; CHECK-LABEL: umovw16b:
    247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    248   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    249   %tmp4 = zext i8 %tmp3 to i32
    250   ret i32 %tmp4
    251 }
    252 
    253 define i32 @umovw8h(<8 x i16> %tmp1) {
    254 ; CHECK-LABEL: umovw8h:
    255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    256   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    257   %tmp4 = zext i16 %tmp3 to i32
    258   ret i32 %tmp4
    259 }
    260 
    261 define i32 @umovw4s(<4 x i32> %tmp1) {
    262 ; CHECK-LABEL: umovw4s:
    263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
    264   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    265   ret i32 %tmp3
    266 }
    267 
    268 define i64 @umovx2d(<2 x i64> %tmp1) {
    269 ; CHECK-LABEL: umovx2d:
    270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
    271   %tmp3 = extractelement <2 x i64> %tmp1, i32 1
    272   ret i64 %tmp3
    273 }
    274 
    275 define i32 @umovw8b(<8 x i8> %tmp1) {
    276 ; CHECK-LABEL: umovw8b:
    277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
    278   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
    279   %tmp4 = zext i8 %tmp3 to i32
    280   ret i32 %tmp4
    281 }
    282 
    283 define i32 @umovw4h(<4 x i16> %tmp1) {
    284 ; CHECK-LABEL: umovw4h:
    285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    286   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    287   %tmp4 = zext i16 %tmp3 to i32
    288   ret i32 %tmp4
    289 }
    290 
    291 define i32 @umovw2s(<2 x i32> %tmp1) {
    292 ; CHECK-LABEL: umovw2s:
    293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
    294   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    295   ret i32 %tmp3
    296 }
    297 
    298 define i64 @umovx1d(<1 x i64> %tmp1) {
    299 ; CHECK-LABEL: umovx1d:
    300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    301   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    302   ret i64 %tmp3
    303 }
    304 
    305 define i32 @smovw16b(<16 x i8> %tmp1) {
    306 ; CHECK-LABEL: smovw16b:
    307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    308   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    309   %tmp4 = sext i8 %tmp3 to i32
    310   %tmp5 = add i32 %tmp4, %tmp4
    311   ret i32 %tmp5
    312 }
    313 
    314 define i32 @smovw8h(<8 x i16> %tmp1) {
    315 ; CHECK-LABEL: smovw8h:
    316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    317   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    318   %tmp4 = sext i16 %tmp3 to i32
    319   %tmp5 = add i32 %tmp4, %tmp4
    320   ret i32 %tmp5
    321 }
    322 
    323 define i64 @smovx16b(<16 x i8> %tmp1) {
    324 ; CHECK-LABEL: smovx16b:
    325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
    326   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    327   %tmp4 = sext i8 %tmp3 to i64
    328   ret i64 %tmp4
    329 }
    330 
    331 define i64 @smovx8h(<8 x i16> %tmp1) {
    332 ; CHECK-LABEL: smovx8h:
    333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
    334   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    335   %tmp4 = sext i16 %tmp3 to i64
    336   ret i64 %tmp4
    337 }
    338 
    339 define i64 @smovx4s(<4 x i32> %tmp1) {
    340 ; CHECK-LABEL: smovx4s:
    341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
    342   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    343   %tmp4 = sext i32 %tmp3 to i64
    344   ret i64 %tmp4
    345 }
    346 
    347 define i32 @smovw8b(<8 x i8> %tmp1) {
    348 ; CHECK-LABEL: smovw8b:
    349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
    350   %tmp3 = extractelement <8 x i8> %tmp1, i32 4
    351   %tmp4 = sext i8 %tmp3 to i32
    352   %tmp5 = add i32 %tmp4, %tmp4
    353   ret i32 %tmp5
    354 }
    355 
    356 define i32 @smovw4h(<4 x i16> %tmp1) {
    357 ; CHECK-LABEL: smovw4h:
    358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    359   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    360   %tmp4 = sext i16 %tmp3 to i32
    361   %tmp5 = add i32 %tmp4, %tmp4
    362   ret i32 %tmp5
    363 }
    364 
    365 define i32 @smovx8b(<8 x i8> %tmp1) {
    366 ; CHECK-LABEL: smovx8b:
    367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
    368   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
    369   %tmp4 = sext i8 %tmp3 to i32
    370   ret i32 %tmp4
    371 }
    372 
    373 define i32 @smovx4h(<4 x i16> %tmp1) {
    374 ; CHECK-LABEL: smovx4h:
    375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
    376   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    377   %tmp4 = sext i16 %tmp3 to i32
    378   ret i32 %tmp4
    379 }
    380 
    381 define i64 @smovx2s(<2 x i32> %tmp1) {
    382 ; CHECK-LABEL: smovx2s:
    383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
    384   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    385   %tmp4 = sext i32 %tmp3 to i64
    386   ret i64 %tmp4
    387 }
    388 
    389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
    390 ; CHECK-LABEL: test_vcopy_lane_s8:
    391 ; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
    392   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
    393   ret <8 x i8> %vset_lane
    394 }
    395 
    396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
    397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
    398 ; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
    399   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
    400   ret <16 x i8> %vset_lane
    401 }
    402 
    403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
    404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
    405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
    406   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
    407   ret <8 x i8> %vset_lane
    408 }
    409 
    410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
    411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
    412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
    413   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    414   ret <16 x i8> %vset_lane
    415 }
    416 
    417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
    418 ; CHECK-LABEL: test_vdup_n_u8:
    419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
    420   %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
    421   %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
    422   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
    423   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
    424   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
    425   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
    426   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
    427   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
    428   ret <8 x i8> %vecinit7.i
    429 }
    430 
    431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
    432 ; CHECK-LABEL: test_vdup_n_u16:
    433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
    434   %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
    435   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
    436   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
    437   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
    438   ret <4 x i16> %vecinit3.i
    439 }
    440 
    441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
    442 ; CHECK-LABEL: test_vdup_n_u32:
    443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
    444   %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
    445   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
    446   ret <2 x i32> %vecinit1.i
    447 }
    448 
    449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
    450 ; CHECK-LABEL: test_vdup_n_u64:
    451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    452   %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
    453   ret <1 x i64> %vecinit.i
    454 }
    455 
    456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
    457 ; CHECK-LABEL: test_vdupq_n_u8:
    458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
    459   %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
    460   %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
    461   %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
    462   %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
    463   %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
    464   %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
    465   %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
    466   %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
    467   %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
    468   %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
    469   %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
    470   %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
    471   %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
    472   %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
    473   %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
    474   %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
    475   ret <16 x i8> %vecinit15.i
    476 }
    477 
    478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
    479 ; CHECK-LABEL: test_vdupq_n_u16:
    480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
    481   %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
    482   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
    483   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
    484   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
    485   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
    486   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
    487   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
    488   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
    489   ret <8 x i16> %vecinit7.i
    490 }
    491 
    492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
    493 ; CHECK-LABEL: test_vdupq_n_u32:
    494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
    495   %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
    496   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
    497   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
    498   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
    499   ret <4 x i32> %vecinit3.i
    500 }
    501 
    502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
    503 ; CHECK-LABEL: test_vdupq_n_u64:
    504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
    505   %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
    506   %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
    507   ret <2 x i64> %vecinit1.i
    508 }
    509 
    510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
    511 ; CHECK-LABEL: test_vdup_lane_s8:
    512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    513   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    514   ret <8 x i8> %shuffle
    515 }
    516 
    517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
    518 ; CHECK-LABEL: test_vdup_lane_s16:
    519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    520   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    521   ret <4 x i16> %shuffle
    522 }
    523 
    524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
    525 ; CHECK-LABEL: test_vdup_lane_s32:
    526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    527   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    528   ret <2 x i32> %shuffle
    529 }
    530 
    531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
    532 ; CHECK-LABEL: test_vdupq_lane_s8:
    533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    534   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    535   ret <16 x i8> %shuffle
    536 }
    537 
    538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
    539 ; CHECK-LABEL: test_vdupq_lane_s16:
    540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    541   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    542   ret <8 x i16> %shuffle
    543 }
    544 
    545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
    546 ; CHECK-LABEL: test_vdupq_lane_s32:
    547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    548   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    549   ret <4 x i32> %shuffle
    550 }
    551 
    552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
    553 ; CHECK-LABEL: test_vdupq_lane_s64:
    554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    555   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
    556   ret <2 x i64> %shuffle
    557 }
    558 
    559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
    560 ; CHECK-LABEL: test_vdup_laneq_s8:
    561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    562   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    563   ret <8 x i8> %shuffle
    564 }
    565 
    566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
    567 ; CHECK-LABEL: test_vdup_laneq_s16:
    568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    569   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    570   ret <4 x i16> %shuffle
    571 }
    572 
    573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
    574 ; CHECK-LABEL: test_vdup_laneq_s32:
    575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    576   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
    577   ret <2 x i32> %shuffle
    578 }
    579 
    580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
    581 ; CHECK-LABEL: test_vdupq_laneq_s8:
    582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    583   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    584   ret <16 x i8> %shuffle
    585 }
    586 
    587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
    588 ; CHECK-LABEL: test_vdupq_laneq_s16:
    589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    590   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    591   ret <8 x i16> %shuffle
    592 }
    593 
    594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
    595 ; CHECK-LABEL: test_vdupq_laneq_s32:
    596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    597   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    598   ret <4 x i32> %shuffle
    599 }
    600 
    601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
    602 ; CHECK-LABEL: test_vdupq_laneq_s64:
    603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    604   %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
    605   ret <2 x i64> %shuffle
    606 }
    607 
    608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
    609 ; CHECK-LABEL: test_bitcastv8i8toi64:
    610    %res = bitcast <8 x i8> %in to i64
    611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    612    ret i64 %res
    613 }
    614 
    615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
    616 ; CHECK-LABEL: test_bitcastv4i16toi64:
    617    %res = bitcast <4 x i16> %in to i64
    618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    619    ret i64 %res
    620 }
    621 
    622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
    623 ; CHECK-LABEL: test_bitcastv2i32toi64:
    624    %res = bitcast <2 x i32> %in to i64
    625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    626    ret i64 %res
    627 }
    628 
    629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
    630 ; CHECK-LABEL: test_bitcastv2f32toi64:
    631    %res = bitcast <2 x float> %in to i64
    632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    633    ret i64 %res
    634 }
    635 
    636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
    637 ; CHECK-LABEL: test_bitcastv1i64toi64:
    638    %res = bitcast <1 x i64> %in to i64
    639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    640    ret i64 %res
    641 }
    642 
    643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
    644 ; CHECK-LABEL: test_bitcastv1f64toi64:
    645    %res = bitcast <1 x double> %in to i64
    646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    647    ret i64 %res
    648 }
    649 
    650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
    651 ; CHECK-LABEL: test_bitcasti64tov8i8:
    652    %res = bitcast i64 %in to <8 x i8>
    653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    654    ret <8 x i8> %res
    655 }
    656 
    657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
    658 ; CHECK-LABEL: test_bitcasti64tov4i16:
    659    %res = bitcast i64 %in to <4 x i16>
    660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    661    ret <4 x i16> %res
    662 }
    663 
    664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
    665 ; CHECK-LABEL: test_bitcasti64tov2i32:
    666    %res = bitcast i64 %in to <2 x i32>
    667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    668    ret <2 x i32> %res
    669 }
    670 
    671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
    672 ; CHECK-LABEL: test_bitcasti64tov2f32:
    673    %res = bitcast i64 %in to <2 x float>
    674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    675    ret <2 x float> %res
    676 }
    677 
    678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
    679 ; CHECK-LABEL: test_bitcasti64tov1i64:
    680    %res = bitcast i64 %in to <1 x i64>
    681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    682    ret <1 x i64> %res
    683 }
    684 
    685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
    686 ; CHECK-LABEL: test_bitcasti64tov1f64:
    687    %res = bitcast i64 %in to <1 x double>
    688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    689    ret <1 x double> %res
    690 }
    691 
    692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
    693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
    694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    696   %sub.i = sub <8 x i8> zeroinitializer, %a
    697   %1 = bitcast <8 x i8> %sub.i to <1 x double>
    698   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    699   ret <1 x i64> %vcvt.i
    700 }
    701 
    702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
    703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
    704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    706   %sub.i = sub <4 x i16> zeroinitializer, %a
    707   %1 = bitcast <4 x i16> %sub.i to <1 x double>
    708   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    709   ret <1 x i64> %vcvt.i
    710 }
    711 
    712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
    713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
    714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    716   %sub.i = sub <2 x i32> zeroinitializer, %a
    717   %1 = bitcast <2 x i32> %sub.i to <1 x double>
    718   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    719   ret <1 x i64> %vcvt.i
    720 }
    721 
    722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
    723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
    724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
    725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    726   %sub.i = sub <1 x i64> zeroinitializer, %a
    727   %1 = bitcast <1 x i64> %sub.i to <1 x double>
    728   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    729   ret <1 x i64> %vcvt.i
    730 }
    731 
    732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
    733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
    734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    736   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
    737   %1 = bitcast <2 x float> %sub.i to <1 x double>
    738   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    739   ret <1 x i64> %vcvt.i
    740 }
    741 
    742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
    743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
    744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    746   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    747   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
    748   %sub.i = sub <8 x i8> zeroinitializer, %1
    749   ret <8 x i8> %sub.i
    750 }
    751 
    752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
    753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
    754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    756   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    757   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
    758   %sub.i = sub <4 x i16> zeroinitializer, %1
    759   ret <4 x i16> %sub.i
    760 }
    761 
    762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
    763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
    764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    766   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    767   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
    768   %sub.i = sub <2 x i32> zeroinitializer, %1
    769   ret <2 x i32> %sub.i
    770 }
    771 
    772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
    773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
    774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
    776   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    777   %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
    778   %sub.i = sub <1 x i64> zeroinitializer, %1
    779   ret <1 x i64> %sub.i
    780 }
    781 
    782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
    783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
    784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    786   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    787   %1 = bitcast <1 x double> %vcvt.i to <2 x float>
    788   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
    789   ret <2 x float> %sub.i
    790 }
    791 
    792 ; Test insert element into an undef vector
    793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
    794 ; CHECK-LABEL: scalar_to_vector.v8i8:
    795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    796   %b = insertelement <8 x i8> undef, i8 %a, i32 0
    797   ret <8 x i8> %b
    798 }
    799 
    800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
    801 ; CHECK-LABEL: scalar_to_vector.v16i8:
    802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    803   %b = insertelement <16 x i8> undef, i8 %a, i32 0
    804   ret <16 x i8> %b
    805 }
    806 
    807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
    808 ; CHECK-LABEL: scalar_to_vector.v4i16:
    809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    810   %b = insertelement <4 x i16> undef, i16 %a, i32 0
    811   ret <4 x i16> %b
    812 }
    813 
    814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
    815 ; CHECK-LABEL: scalar_to_vector.v8i16:
    816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    817   %b = insertelement <8 x i16> undef, i16 %a, i32 0
    818   ret <8 x i16> %b
    819 }
    820 
    821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
    822 ; CHECK-LABEL: scalar_to_vector.v2i32:
    823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    824   %b = insertelement <2 x i32> undef, i32 %a, i32 0
    825   ret <2 x i32> %b
    826 }
    827 
    828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
    829 ; CHECK-LABEL: scalar_to_vector.v4i32:
    830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    831   %b = insertelement <4 x i32> undef, i32 %a, i32 0
    832   ret <4 x i32> %b
    833 }
    834 
    835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
    836 ; CHECK-LABEL: scalar_to_vector.v2i64:
    837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    838   %b = insertelement <2 x i64> undef, i64 %a, i32 0
    839   ret <2 x i64> %b
    840 }
    841 
    842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
    843 ; CHECK-LABEL: testDUP.v1i8:
    844 ; CHECK: dup v0.8b, v0.b[0]
    845   %b = extractelement <1 x i8> %a, i32 0
    846   %c = insertelement <8 x i8> undef, i8 %b, i32 0
    847   %d = insertelement <8 x i8> %c, i8 %b, i32 1
    848   %e = insertelement <8 x i8> %d, i8 %b, i32 2
    849   %f = insertelement <8 x i8> %e, i8 %b, i32 3
    850   %g = insertelement <8 x i8> %f, i8 %b, i32 4
    851   %h = insertelement <8 x i8> %g, i8 %b, i32 5
    852   %i = insertelement <8 x i8> %h, i8 %b, i32 6
    853   %j = insertelement <8 x i8> %i, i8 %b, i32 7
    854   ret <8 x i8> %j
    855 }
    856 
    857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
    858 ; CHECK-LABEL: testDUP.v1i16:
    859 ; CHECK: dup v0.8h, v0.h[0]
    860   %b = extractelement <1 x i16> %a, i32 0
    861   %c = insertelement <8 x i16> undef, i16 %b, i32 0
    862   %d = insertelement <8 x i16> %c, i16 %b, i32 1
    863   %e = insertelement <8 x i16> %d, i16 %b, i32 2
    864   %f = insertelement <8 x i16> %e, i16 %b, i32 3
    865   %g = insertelement <8 x i16> %f, i16 %b, i32 4
    866   %h = insertelement <8 x i16> %g, i16 %b, i32 5
    867   %i = insertelement <8 x i16> %h, i16 %b, i32 6
    868   %j = insertelement <8 x i16> %i, i16 %b, i32 7
    869   ret <8 x i16> %j
    870 }
    871 
    872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
    873 ; CHECK-LABEL: testDUP.v1i32:
    874 ; CHECK: dup v0.4s, v0.s[0]
    875   %b = extractelement <1 x i32> %a, i32 0
    876   %c = insertelement <4 x i32> undef, i32 %b, i32 0
    877   %d = insertelement <4 x i32> %c, i32 %b, i32 1
    878   %e = insertelement <4 x i32> %d, i32 %b, i32 2
    879   %f = insertelement <4 x i32> %e, i32 %b, i32 3
    880   ret <4 x i32> %f
    881 }
    882 
    883 define <8 x i8> @getl(<16 x i8> %x) #0 {
    884 ; CHECK-LABEL: getl:
    885 ; CHECK: ret
    886   %vecext = extractelement <16 x i8> %x, i32 0
    887   %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
    888   %vecext1 = extractelement <16 x i8> %x, i32 1
    889   %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
    890   %vecext3 = extractelement <16 x i8> %x, i32 2
    891   %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
    892   %vecext5 = extractelement <16 x i8> %x, i32 3
    893   %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
    894   %vecext7 = extractelement <16 x i8> %x, i32 4
    895   %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
    896   %vecext9 = extractelement <16 x i8> %x, i32 5
    897   %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
    898   %vecext11 = extractelement <16 x i8> %x, i32 6
    899   %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
    900   %vecext13 = extractelement <16 x i8> %x, i32 7
    901   %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
    902   ret <8 x i8> %vecinit14
    903 }
    904 
    905 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
    906 ; CHECK: str q0
    907 ; CHECK: add x[[PTR:[0-9]+]], {{.*}}, w0, sxtw #1
    908 ; CHECK-DAG: ld1 { v[[R:[0-9]+]].h }[0], [x[[PTR]]]
    909 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
    910 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
    911 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
    912 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
    913   %tmp = extractelement <8 x i16> %x, i32 %idx
    914   %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
    915   %tmp3 = extractelement <8 x i16> %x, i32 1
    916   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
    917   %tmp5 = extractelement <8 x i16> %x, i32 2
    918   %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
    919   %tmp7 = extractelement <8 x i16> %x, i32 3
    920   %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
    921   ret <4 x i16> %tmp8
    922 }
    923 
    924 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
    925 ; CHECK: str h0, [{{.*}}, w0, sxtw #1]
    926 ; CHECK-DAG: ldr d[[R:[0-9]+]]
    927 ; CHECK-DAG: ins v[[R]].h[1], v0.h[1]
    928 ; CHECK-DAG: ins v[[R]].h[2], v0.h[2]
    929 ; CHECK-DAG: ins v[[R]].h[3], v0.h[3]
    930 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
    931   %tmp = extractelement <8 x i16> %x, i32 0
    932   %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
    933   %tmp3 = extractelement <8 x i16> %x, i32 1
    934   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
    935   %tmp5 = extractelement <8 x i16> %x, i32 2
    936   %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
    937   %tmp7 = extractelement <8 x i16> %x, i32 3
    938   %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
    939   ret <4 x i16> %tmp8
    940 }
    941 
    942 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
    943 ; CHECK-LABEL: test_dup_v2i32_v4i16:
    944 ; CHECK: dup v0.4h, v0.h[2]
    945 entry:
    946   %x = extractelement <2 x i32> %a, i32 1
    947   %vget_lane = trunc i32 %x to i16
    948   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    949   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    950   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    951   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    952   ret <4 x i16> %vecinit3.i
    953 }
    954 
    955 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
    956 ; CHECK-LABEL: test_dup_v4i32_v8i16:
    957 ; CHECK: dup v0.8h, v0.h[6]
    958 entry:
    959   %x = extractelement <4 x i32> %a, i32 3
    960   %vget_lane = trunc i32 %x to i16
    961   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    962   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    963   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    964   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    965   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    966   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    967   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    968   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    969   ret <8 x i16> %vecinit7.i
    970 }
    971 
    972 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
    973 ; CHECK-LABEL: test_dup_v1i64_v4i16:
    974 ; CHECK: dup v0.4h, v0.h[0]
    975 entry:
    976   %x = extractelement <1 x i64> %a, i32 0
    977   %vget_lane = trunc i64 %x to i16
    978   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    979   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    980   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    981   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    982   ret <4 x i16> %vecinit3.i
    983 }
    984 
    985 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
    986 ; CHECK-LABEL: test_dup_v1i64_v2i32:
    987 ; CHECK: dup v0.2s, v0.s[0]
    988 entry:
    989   %x = extractelement <1 x i64> %a, i32 0
    990   %vget_lane = trunc i64 %x to i32
    991   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
    992   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
    993   ret <2 x i32> %vecinit1.i
    994 }
    995 
    996 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
    997 ; CHECK-LABEL: test_dup_v2i64_v8i16:
    998 ; CHECK: dup v0.8h, v0.h[4]
    999 entry:
   1000   %x = extractelement <2 x i64> %a, i32 1
   1001   %vget_lane = trunc i64 %x to i16
   1002   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
   1003   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1004   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1005   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1006   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
   1007   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
   1008   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
   1009   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
   1010   ret <8 x i16> %vecinit7.i
   1011 }
   1012 
   1013 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
   1014 ; CHECK-LABEL: test_dup_v2i64_v4i32:
   1015 ; CHECK: dup v0.4s, v0.s[2]
   1016 entry:
   1017   %x = extractelement <2 x i64> %a, i32 1
   1018   %vget_lane = trunc i64 %x to i32
   1019   %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
   1020   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1021   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
   1022   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
   1023   ret <4 x i32> %vecinit3.i
   1024 }
   1025 
   1026 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
   1027 ; CHECK-LABEL: test_dup_v4i32_v4i16:
   1028 ; CHECK: dup v0.4h, v0.h[2]
   1029 entry:
   1030   %x = extractelement <4 x i32> %a, i32 1
   1031   %vget_lane = trunc i32 %x to i16
   1032   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1033   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1034   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1035   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1036   ret <4 x i16> %vecinit3.i
   1037 }
   1038 
   1039 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
   1040 ; CHECK-LABEL: test_dup_v2i64_v4i16:
   1041 ; CHECK: dup v0.4h, v0.h[0]
   1042 entry:
   1043   %x = extractelement <2 x i64> %a, i32 0
   1044   %vget_lane = trunc i64 %x to i16
   1045   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1046   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1047   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1048   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1049   ret <4 x i16> %vecinit3.i
   1050 }
   1051 
   1052 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
   1053 ; CHECK-LABEL: test_dup_v2i64_v2i32:
   1054 ; CHECK: dup v0.2s, v0.s[0]
   1055 entry:
   1056   %x = extractelement <2 x i64> %a, i32 0
   1057   %vget_lane = trunc i64 %x to i32
   1058   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
   1059   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1060   ret <2 x i32> %vecinit1.i
   1061 }
   1062 
   1063 
   1064 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
   1065 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
   1066 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1067 ; CHECK-NEXT: ret
   1068 entry:
   1069   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1070   %1 = insertelement <1 x float> undef, float %0, i32 0
   1071   %2 = extractelement <1 x float> %1, i32 0
   1072   %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
   1073   ret <2 x float> %vecinit1.i
   1074 }
   1075 
   1076 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
   1077 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
   1078 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1079 ; CHECK-NEXT: ret
   1080 entry:
   1081   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1082   %1 = insertelement <1 x float> undef, float %0, i32 0
   1083   %2 = extractelement <1 x float> %1, i32 0
   1084   %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
   1085   ret <4 x float> %vecinit1.i
   1086 }
   1087 
   1088 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
   1089 
   1090 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
   1091 ; CHECK-LABEL: test_concat_undef_v1i32:
   1092 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
   1093 entry:
   1094   %0 = extractelement <2 x i32> %a, i32 0
   1095   %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
   1096   ret <2 x i32> %vecinit1.i
   1097 }
   1098 
   1099 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
   1100 
   1101 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
   1102 ; CHECK-LABEL: test_concat_v1i32_undef:
   1103 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1104 ; CHECK-NEXT: ret
   1105 entry:
   1106   %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1107   %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
   1108   ret <2 x i32> %vecinit.i432
   1109 }
   1110 
   1111 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
   1112 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
   1113 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
   1114 entry:
   1115   %0 = extractelement <2 x i32> %a, i32 0
   1116   %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
   1117   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
   1118   ret <2 x i32> %vecinit1.i
   1119 }
   1120 
   1121 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
   1122 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
   1123 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1124 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1125 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
   1126 entry:
   1127   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1128   %d = insertelement <2 x i32> undef, i32 %c, i32 0
   1129   %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
   1130   %f = insertelement <2 x i32> undef, i32 %e, i32 0
   1131   %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
   1132   ret <2 x i32> %h
   1133 }
   1134 
   1135 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
   1136 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
   1137 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1138 entry:
   1139   %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1140   ret <16 x i8> %vecinit30
   1141 }
   1142 
   1143 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
   1144 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
   1145 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1146 entry:
   1147   %vecext = extractelement <8 x i8> %x, i32 0
   1148   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1149   %vecext1 = extractelement <8 x i8> %x, i32 1
   1150   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1151   %vecext3 = extractelement <8 x i8> %x, i32 2
   1152   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1153   %vecext5 = extractelement <8 x i8> %x, i32 3
   1154   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1155   %vecext7 = extractelement <8 x i8> %x, i32 4
   1156   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1157   %vecext9 = extractelement <8 x i8> %x, i32 5
   1158   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1159   %vecext11 = extractelement <8 x i8> %x, i32 6
   1160   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1161   %vecext13 = extractelement <8 x i8> %x, i32 7
   1162   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1163   %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1164   ret <16 x i8> %vecinit30
   1165 }
   1166 
   1167 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
   1168 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
   1169 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1170 entry:
   1171   %vecext = extractelement <16 x i8> %x, i32 0
   1172   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1173   %vecext1 = extractelement <16 x i8> %x, i32 1
   1174   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1175   %vecext3 = extractelement <16 x i8> %x, i32 2
   1176   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1177   %vecext5 = extractelement <16 x i8> %x, i32 3
   1178   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1179   %vecext7 = extractelement <16 x i8> %x, i32 4
   1180   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1181   %vecext9 = extractelement <16 x i8> %x, i32 5
   1182   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1183   %vecext11 = extractelement <16 x i8> %x, i32 6
   1184   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1185   %vecext13 = extractelement <16 x i8> %x, i32 7
   1186   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1187   %vecext15 = extractelement <8 x i8> %y, i32 0
   1188   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1189   %vecext17 = extractelement <8 x i8> %y, i32 1
   1190   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1191   %vecext19 = extractelement <8 x i8> %y, i32 2
   1192   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1193   %vecext21 = extractelement <8 x i8> %y, i32 3
   1194   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1195   %vecext23 = extractelement <8 x i8> %y, i32 4
   1196   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1197   %vecext25 = extractelement <8 x i8> %y, i32 5
   1198   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1199   %vecext27 = extractelement <8 x i8> %y, i32 6
   1200   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1201   %vecext29 = extractelement <8 x i8> %y, i32 7
   1202   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1203   ret <16 x i8> %vecinit30
   1204 }
   1205 
   1206 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
   1207 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
   1208 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1209 entry:
   1210   %vecext = extractelement <8 x i8> %x, i32 0
   1211   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1212   %vecext1 = extractelement <8 x i8> %x, i32 1
   1213   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1214   %vecext3 = extractelement <8 x i8> %x, i32 2
   1215   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1216   %vecext5 = extractelement <8 x i8> %x, i32 3
   1217   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1218   %vecext7 = extractelement <8 x i8> %x, i32 4
   1219   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1220   %vecext9 = extractelement <8 x i8> %x, i32 5
   1221   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1222   %vecext11 = extractelement <8 x i8> %x, i32 6
   1223   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1224   %vecext13 = extractelement <8 x i8> %x, i32 7
   1225   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1226   %vecext15 = extractelement <8 x i8> %y, i32 0
   1227   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1228   %vecext17 = extractelement <8 x i8> %y, i32 1
   1229   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1230   %vecext19 = extractelement <8 x i8> %y, i32 2
   1231   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1232   %vecext21 = extractelement <8 x i8> %y, i32 3
   1233   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1234   %vecext23 = extractelement <8 x i8> %y, i32 4
   1235   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1236   %vecext25 = extractelement <8 x i8> %y, i32 5
   1237   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1238   %vecext27 = extractelement <8 x i8> %y, i32 6
   1239   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1240   %vecext29 = extractelement <8 x i8> %y, i32 7
   1241   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1242   ret <16 x i8> %vecinit30
   1243 }
   1244 
   1245 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
   1246 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
   1247 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1248 entry:
   1249   %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1250   ret <8 x i16> %vecinit14
   1251 }
   1252 
   1253 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
   1254 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
   1255 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1256 entry:
   1257   %vecext = extractelement <4 x i16> %x, i32 0
   1258   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1259   %vecext1 = extractelement <4 x i16> %x, i32 1
   1260   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1261   %vecext3 = extractelement <4 x i16> %x, i32 2
   1262   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1263   %vecext5 = extractelement <4 x i16> %x, i32 3
   1264   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1265   %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1266   ret <8 x i16> %vecinit14
   1267 }
   1268 
   1269 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
   1270 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
   1271 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1272 entry:
   1273   %vecext = extractelement <8 x i16> %x, i32 0
   1274   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1275   %vecext1 = extractelement <8 x i16> %x, i32 1
   1276   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1277   %vecext3 = extractelement <8 x i16> %x, i32 2
   1278   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1279   %vecext5 = extractelement <8 x i16> %x, i32 3
   1280   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1281   %vecext7 = extractelement <4 x i16> %y, i32 0
   1282   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1283   %vecext9 = extractelement <4 x i16> %y, i32 1
   1284   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1285   %vecext11 = extractelement <4 x i16> %y, i32 2
   1286   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1287   %vecext13 = extractelement <4 x i16> %y, i32 3
   1288   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1289   ret <8 x i16> %vecinit14
   1290 }
   1291 
   1292 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
   1293 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
   1294 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1295 entry:
   1296   %vecext = extractelement <4 x i16> %x, i32 0
   1297   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1298   %vecext1 = extractelement <4 x i16> %x, i32 1
   1299   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1300   %vecext3 = extractelement <4 x i16> %x, i32 2
   1301   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1302   %vecext5 = extractelement <4 x i16> %x, i32 3
   1303   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1304   %vecext7 = extractelement <4 x i16> %y, i32 0
   1305   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1306   %vecext9 = extractelement <4 x i16> %y, i32 1
   1307   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1308   %vecext11 = extractelement <4 x i16> %y, i32 2
   1309   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1310   %vecext13 = extractelement <4 x i16> %y, i32 3
   1311   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1312   ret <8 x i16> %vecinit14
   1313 }
   1314 
   1315 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
   1316 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
   1317 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1318 entry:
   1319   %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1320   ret <4 x i32> %vecinit6
   1321 }
   1322 
   1323 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
   1324 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
   1325 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1326 entry:
   1327   %vecext = extractelement <2 x i32> %x, i32 0
   1328   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1329   %vecext1 = extractelement <2 x i32> %x, i32 1
   1330   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1331   %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1332   ret <4 x i32> %vecinit6
   1333 }
   1334 
   1335 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
   1336 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
   1337 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1338 entry:
   1339   %vecext = extractelement <4 x i32> %x, i32 0
   1340   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1341   %vecext1 = extractelement <4 x i32> %x, i32 1
   1342   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1343   %vecext3 = extractelement <2 x i32> %y, i32 0
   1344   %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
   1345   %vecext5 = extractelement <2 x i32> %y, i32 1
   1346   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
   1347   ret <4 x i32> %vecinit6
   1348 }
   1349 
   1350 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
   1351 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
   1352 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1353 entry:
   1354   %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1355   ret <4 x i32> %vecinit6
   1356 }
   1357 
   1358 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
   1359 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
   1360 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1361 entry:
   1362   %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1363   ret <2 x i64> %vecinit2
   1364 }
   1365 
   1366 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
   1367 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
   1368 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1369 entry:
   1370   %vecext = extractelement <1 x i64> %x, i32 0
   1371   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1372   %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1373   ret <2 x i64> %vecinit2
   1374 }
   1375 
   1376 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
   1377 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
   1378 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1379 entry:
   1380   %vecext = extractelement <2 x i64> %x, i32 0
   1381   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1382   %vecext1 = extractelement <1 x i64> %y, i32 0
   1383   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1384   ret <2 x i64> %vecinit2
   1385 }
   1386 
   1387 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
   1388 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
   1389 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1390 entry:
   1391   %vecext = extractelement <1 x i64> %x, i32 0
   1392   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1393   %vecext1 = extractelement <1 x i64> %y, i32 0
   1394   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1395   ret <2 x i64> %vecinit2
   1396 }
   1397 
   1398 
   1399 define <4 x i16> @concat_vector_v4i16_const() {
   1400 ; CHECK-LABEL: concat_vector_v4i16_const:
   1401 ; CHECK: movi {{d[0-9]+}}, #0
   1402  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
   1403  ret <4 x i16> %r
   1404 }
   1405 
   1406 define <4 x i16> @concat_vector_v4i16_const_one() {
   1407 ; CHECK-LABEL: concat_vector_v4i16_const_one:
   1408 ; CHECK: movi {{v[0-9]+}}.4h, #1
   1409  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
   1410  ret <4 x i16> %r
   1411 }
   1412 
   1413 define <4 x i32> @concat_vector_v4i32_const() {
   1414 ; CHECK-LABEL: concat_vector_v4i32_const:
   1415 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1416  %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
   1417  ret <4 x i32> %r
   1418 }
   1419 
   1420 define <8 x i8> @concat_vector_v8i8_const() {
   1421 ; CHECK-LABEL: concat_vector_v8i8_const:
   1422 ; CHECK: movi {{d[0-9]+}}, #0
   1423  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
   1424  ret <8 x i8> %r
   1425 }
   1426 
   1427 define <8 x i16> @concat_vector_v8i16_const() {
   1428 ; CHECK-LABEL: concat_vector_v8i16_const:
   1429 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1430  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
   1431  ret <8 x i16> %r
   1432 }
   1433 
   1434 define <8 x i16> @concat_vector_v8i16_const_one() {
   1435 ; CHECK-LABEL: concat_vector_v8i16_const_one:
   1436 ; CHECK: movi {{v[0-9]+}}.8h, #1
   1437  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
   1438  ret <8 x i16> %r
   1439 }
   1440 
   1441 define <16 x i8> @concat_vector_v16i8_const() {
   1442 ; CHECK-LABEL: concat_vector_v16i8_const:
   1443 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1444  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
   1445  ret <16 x i8> %r
   1446 }
   1447 
   1448 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
   1449 ; CHECK-LABEL: concat_vector_v4i16:
   1450 ; CHECK: dup v0.4h, v0.h[0]
   1451  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
   1452  ret <4 x i16> %r
   1453 }
   1454 
   1455 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
   1456 ; CHECK-LABEL: concat_vector_v4i32:
   1457 ; CHECK: dup v0.4s, v0.s[0]
   1458  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
   1459  ret <4 x i32> %r
   1460 }
   1461 
   1462 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
   1463 ; CHECK-LABEL: concat_vector_v8i8:
   1464 ; CHECK: dup v0.8b, v0.b[0]
   1465  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
   1466  ret <8 x i8> %r
   1467 }
   1468 
   1469 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
   1470 ; CHECK-LABEL: concat_vector_v8i16:
   1471 ; CHECK: dup v0.8h, v0.h[0]
   1472  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
   1473  ret <8 x i16> %r
   1474 }
   1475 
   1476 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
   1477 ; CHECK-LABEL: concat_vector_v16i8:
   1478 ; CHECK: dup v0.16b, v0.b[0]
   1479  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
   1480  ret <16 x i8> %r
   1481 }
   1482