Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 
      4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
      5 ; CHECK-LABEL: ins16bw:
      6 ; CHECK: mov {{v[0-9]+}}.b[15], {{w[0-9]+}}
      7   %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
      8   ret <16 x i8> %tmp3
      9 }
     10 
     11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
     12 ; CHECK-LABEL: ins8hw:
     13 ; CHECK: mov {{v[0-9]+}}.h[6], {{w[0-9]+}}
     14   %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
     15   ret <8 x i16> %tmp3
     16 }
     17 
     18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
     19 ; CHECK-LABEL: ins4sw:
     20 ; CHECK: mov {{v[0-9]+}}.s[2], {{w[0-9]+}}
     21   %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
     22   ret <4 x i32> %tmp3
     23 }
     24 
     25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
     26 ; CHECK-LABEL: ins2dw:
     27 ; CHECK: mov {{v[0-9]+}}.d[1], {{x[0-9]+}}
     28   %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
     29   ret <2 x i64> %tmp3
     30 }
     31 
     32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
     33 ; CHECK-LABEL: ins8bw:
     34 ; CHECK: mov {{v[0-9]+}}.b[5], {{w[0-9]+}}
     35   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
     36   ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
     40 ; CHECK-LABEL: ins4hw:
     41 ; CHECK: mov {{v[0-9]+}}.h[3], {{w[0-9]+}}
     42   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
     43   ret <4 x i16> %tmp3
     44 }
     45 
     46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
     47 ; CHECK-LABEL: ins2sw:
     48 ; CHECK: mov {{v[0-9]+}}.s[1], {{w[0-9]+}}
     49   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
     50   ret <2 x i32> %tmp3
     51 }
     52 
     53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
     54 ; CHECK-LABEL: ins16b16:
     55 ; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
     56   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
     57   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
     58   ret <16 x i8> %tmp4
     59 }
     60 
     61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
     62 ; CHECK-LABEL: ins8h8:
     63 ; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
     64   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
     65   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
     66   ret <8 x i16> %tmp4
     67 }
     68 
     69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
     70 ; CHECK-LABEL: ins4s4:
     71 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     72   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
     73   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
     74   ret <4 x i32> %tmp4
     75 }
     76 
     77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
     78 ; CHECK-LABEL: ins2d2:
     79 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     80   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
     81   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
     82   ret <2 x i64> %tmp4
     83 }
     84 
     85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
     86 ; CHECK-LABEL: ins4f4:
     87 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     88   %tmp3 = extractelement <4 x float> %tmp1, i32 2
     89   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
     90   ret <4 x float> %tmp4
     91 }
     92 
     93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
     94 ; CHECK-LABEL: ins2df2:
     95 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     96   %tmp3 = extractelement <2 x double> %tmp1, i32 0
     97   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
     98   ret <2 x double> %tmp4
     99 }
    100 
    101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
    102 ; CHECK-LABEL: ins8b16:
    103 ; CHECK: mov {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
    104   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    105   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
    106   ret <16 x i8> %tmp4
    107 }
    108 
    109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
    110 ; CHECK-LABEL: ins4h8:
    111 ; CHECK: mov {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
    112   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    113   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
    114   ret <8 x i16> %tmp4
    115 }
    116 
    117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
    118 ; CHECK-LABEL: ins2s4:
    119 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    120   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    121   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
    122   ret <4 x i32> %tmp4
    123 }
    124 
    125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
    126 ; CHECK-LABEL: ins1d2:
    127 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    128   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    129   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
    130   ret <2 x i64> %tmp4
    131 }
    132 
    133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
    134 ; CHECK-LABEL: ins2f4:
    135 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    136   %tmp3 = extractelement <2 x float> %tmp1, i32 1
    137   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
    138   ret <4 x float> %tmp4
    139 }
    140 
    141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
    142 ; CHECK-LABEL: ins1f2:
    143 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
    144   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    145   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
    146   ret <2 x double> %tmp4
    147 }
    148 
    149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
    150 ; CHECK-LABEL: ins16b8:
    151 ; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
    152   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
    153   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
    154   ret <8 x i8> %tmp4
    155 }
    156 
    157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
    158 ; CHECK-LABEL: ins8h4:
    159 ; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    160   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    161   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    162   ret <4 x i16> %tmp4
    163 }
    164 
    165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
    166 ; CHECK-LABEL: ins4s2:
    167 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    168   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    169   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    170   ret <2 x i32> %tmp4
    171 }
    172 
    173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
    174 ; CHECK-LABEL: ins2d1:
    175 ; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    176   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
    177   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    178   ret <1 x i64> %tmp4
    179 }
    180 
    181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
    182 ; CHECK-LABEL: ins4f2:
    183 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    184   %tmp3 = extractelement <4 x float> %tmp1, i32 2
    185   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    186   ret <2 x float> %tmp4
    187 }
    188 
    189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
    190 ; CHECK-LABEL: ins2f1:
    191 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1]
    192   %tmp3 = extractelement <2 x double> %tmp1, i32 1
    193   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    194   ret <1 x double> %tmp4
    195 }
    196 
    197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
    198 ; CHECK-LABEL: ins8b8:
    199 ; CHECK: mov {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
    200   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    201   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
    202   ret <8 x i8> %tmp4
    203 }
    204 
    205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
    206 ; CHECK-LABEL: ins4h4:
    207 ; CHECK: mov {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    208   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    209   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    210   ret <4 x i16> %tmp4
    211 }
    212 
    213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
    214 ; CHECK-LABEL: ins2s2:
    215 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    216   %tmp3 = extractelement <2 x i32> %tmp1, i32 0
    217   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    218   ret <2 x i32> %tmp4
    219 }
    220 
    221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
    222 ; CHECK-LABEL: ins1d1:
    223 ; CHECK: mov {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    224   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    225   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    226   ret <1 x i64> %tmp4
    227 }
    228 
    229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
    230 ; CHECK-LABEL: ins2f2:
    231 ; CHECK: mov {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    232   %tmp3 = extractelement <2 x float> %tmp1, i32 0
    233   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    234   ret <2 x float> %tmp4
    235 }
    236 
    237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
    238 ; CHECK-LABEL: ins1df1:
    239 ; CHECK-NOT: mov {{v[0-9]+}}
    240   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    241   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    242   ret <1 x double> %tmp4
    243 }
    244 
    245 define i32 @umovw16b(<16 x i8> %tmp1) {
    246 ; CHECK-LABEL: umovw16b:
    247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    248   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    249   %tmp4 = zext i8 %tmp3 to i32
    250   ret i32 %tmp4
    251 }
    252 
    253 define i32 @umovw8h(<8 x i16> %tmp1) {
    254 ; CHECK-LABEL: umovw8h:
    255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    256   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    257   %tmp4 = zext i16 %tmp3 to i32
    258   ret i32 %tmp4
    259 }
    260 
    261 define i32 @umovw4s(<4 x i32> %tmp1) {
    262 ; CHECK-LABEL: umovw4s:
    263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
    264   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    265   ret i32 %tmp3
    266 }
    267 
    268 define i64 @umovx2d(<2 x i64> %tmp1) {
    269 ; CHECK-LABEL: umovx2d:
    270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
    271   %tmp3 = extractelement <2 x i64> %tmp1, i32 1
    272   ret i64 %tmp3
    273 }
    274 
    275 define i32 @umovw8b(<8 x i8> %tmp1) {
    276 ; CHECK-LABEL: umovw8b:
    277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
    278   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
    279   %tmp4 = zext i8 %tmp3 to i32
    280   ret i32 %tmp4
    281 }
    282 
    283 define i32 @umovw4h(<4 x i16> %tmp1) {
    284 ; CHECK-LABEL: umovw4h:
    285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    286   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    287   %tmp4 = zext i16 %tmp3 to i32
    288   ret i32 %tmp4
    289 }
    290 
    291 define i32 @umovw2s(<2 x i32> %tmp1) {
    292 ; CHECK-LABEL: umovw2s:
    293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
    294   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    295   ret i32 %tmp3
    296 }
    297 
    298 define i64 @umovx1d(<1 x i64> %tmp1) {
    299 ; CHECK-LABEL: umovx1d:
    300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    301   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    302   ret i64 %tmp3
    303 }
    304 
    305 define i32 @smovw16b(<16 x i8> %tmp1) {
    306 ; CHECK-LABEL: smovw16b:
    307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    308   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    309   %tmp4 = sext i8 %tmp3 to i32
    310   %tmp5 = add i32 %tmp4, %tmp4
    311   ret i32 %tmp5
    312 }
    313 
    314 define i32 @smovw8h(<8 x i16> %tmp1) {
    315 ; CHECK-LABEL: smovw8h:
    316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    317   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    318   %tmp4 = sext i16 %tmp3 to i32
    319   %tmp5 = add i32 %tmp4, %tmp4
    320   ret i32 %tmp5
    321 }
    322 
    323 define i64 @smovx16b(<16 x i8> %tmp1) {
    324 ; CHECK-LABEL: smovx16b:
    325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
    326   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    327   %tmp4 = sext i8 %tmp3 to i64
    328   ret i64 %tmp4
    329 }
    330 
    331 define i64 @smovx8h(<8 x i16> %tmp1) {
    332 ; CHECK-LABEL: smovx8h:
    333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
    334   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    335   %tmp4 = sext i16 %tmp3 to i64
    336   ret i64 %tmp4
    337 }
    338 
    339 define i64 @smovx4s(<4 x i32> %tmp1) {
    340 ; CHECK-LABEL: smovx4s:
    341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
    342   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    343   %tmp4 = sext i32 %tmp3 to i64
    344   ret i64 %tmp4
    345 }
    346 
    347 define i32 @smovw8b(<8 x i8> %tmp1) {
    348 ; CHECK-LABEL: smovw8b:
    349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
    350   %tmp3 = extractelement <8 x i8> %tmp1, i32 4
    351   %tmp4 = sext i8 %tmp3 to i32
    352   %tmp5 = add i32 %tmp4, %tmp4
    353   ret i32 %tmp5
    354 }
    355 
    356 define i32 @smovw4h(<4 x i16> %tmp1) {
    357 ; CHECK-LABEL: smovw4h:
    358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    359   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    360   %tmp4 = sext i16 %tmp3 to i32
    361   %tmp5 = add i32 %tmp4, %tmp4
    362   ret i32 %tmp5
    363 }
    364 
    365 define i32 @smovx8b(<8 x i8> %tmp1) {
    366 ; CHECK-LABEL: smovx8b:
    367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
    368   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
    369   %tmp4 = sext i8 %tmp3 to i32
    370   ret i32 %tmp4
    371 }
    372 
    373 define i32 @smovx4h(<4 x i16> %tmp1) {
    374 ; CHECK-LABEL: smovx4h:
    375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
    376   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    377   %tmp4 = sext i16 %tmp3 to i32
    378   ret i32 %tmp4
    379 }
    380 
    381 define i64 @smovx2s(<2 x i32> %tmp1) {
    382 ; CHECK-LABEL: smovx2s:
    383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
    384   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    385   %tmp4 = sext i32 %tmp3 to i64
    386   ret i64 %tmp4
    387 }
    388 
    389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
    390 ; CHECK-LABEL: test_vcopy_lane_s8:
    391 ; CHECK: mov  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
    392   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
    393   ret <8 x i8> %vset_lane
    394 }
    395 
    396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
    397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
    398 ; CHECK: mov  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
    399   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
    400   ret <16 x i8> %vset_lane
    401 }
    402 
    403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
    404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
    405 ; CHECK: mov {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
    406   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
    407   ret <8 x i8> %vset_lane
    408 }
    409 
    410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
    411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
    412 ; CHECK: mov {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
    413   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    414   ret <16 x i8> %vset_lane
    415 }
    416 
    417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
    418 ; CHECK-LABEL: test_vdup_n_u8:
    419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
    420   %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
    421   %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
    422   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
    423   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
    424   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
    425   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
    426   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
    427   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
    428   ret <8 x i8> %vecinit7.i
    429 }
    430 
    431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
    432 ; CHECK-LABEL: test_vdup_n_u16:
    433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
    434   %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
    435   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
    436   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
    437   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
    438   ret <4 x i16> %vecinit3.i
    439 }
    440 
    441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
    442 ; CHECK-LABEL: test_vdup_n_u32:
    443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
    444   %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
    445   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
    446   ret <2 x i32> %vecinit1.i
    447 }
    448 
    449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
    450 ; CHECK-LABEL: test_vdup_n_u64:
    451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    452   %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
    453   ret <1 x i64> %vecinit.i
    454 }
    455 
    456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
    457 ; CHECK-LABEL: test_vdupq_n_u8:
    458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
    459   %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
    460   %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
    461   %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
    462   %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
    463   %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
    464   %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
    465   %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
    466   %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
    467   %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
    468   %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
    469   %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
    470   %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
    471   %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
    472   %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
    473   %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
    474   %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
    475   ret <16 x i8> %vecinit15.i
    476 }
    477 
    478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
    479 ; CHECK-LABEL: test_vdupq_n_u16:
    480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
    481   %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
    482   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
    483   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
    484   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
    485   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
    486   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
    487   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
    488   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
    489   ret <8 x i16> %vecinit7.i
    490 }
    491 
    492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
    493 ; CHECK-LABEL: test_vdupq_n_u32:
    494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
    495   %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
    496   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
    497   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
    498   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
    499   ret <4 x i32> %vecinit3.i
    500 }
    501 
    502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
    503 ; CHECK-LABEL: test_vdupq_n_u64:
    504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
    505   %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
    506   %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
    507   ret <2 x i64> %vecinit1.i
    508 }
    509 
    510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
    511 ; CHECK-LABEL: test_vdup_lane_s8:
    512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    513   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    514   ret <8 x i8> %shuffle
    515 }
    516 
    517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
    518 ; CHECK-LABEL: test_vdup_lane_s16:
    519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    520   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    521   ret <4 x i16> %shuffle
    522 }
    523 
    524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
    525 ; CHECK-LABEL: test_vdup_lane_s32:
    526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    527   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    528   ret <2 x i32> %shuffle
    529 }
    530 
    531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
    532 ; CHECK-LABEL: test_vdupq_lane_s8:
    533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    534   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    535   ret <16 x i8> %shuffle
    536 }
    537 
    538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
    539 ; CHECK-LABEL: test_vdupq_lane_s16:
    540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    541   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    542   ret <8 x i16> %shuffle
    543 }
    544 
    545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
    546 ; CHECK-LABEL: test_vdupq_lane_s32:
    547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    548   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    549   ret <4 x i32> %shuffle
    550 }
    551 
    552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
    553 ; CHECK-LABEL: test_vdupq_lane_s64:
    554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    555   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
    556   ret <2 x i64> %shuffle
    557 }
    558 
    559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
    560 ; CHECK-LABEL: test_vdup_laneq_s8:
    561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    562   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    563   ret <8 x i8> %shuffle
    564 }
    565 
    566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
    567 ; CHECK-LABEL: test_vdup_laneq_s16:
    568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    569   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    570   ret <4 x i16> %shuffle
    571 }
    572 
    573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
    574 ; CHECK-LABEL: test_vdup_laneq_s32:
    575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    576   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
    577   ret <2 x i32> %shuffle
    578 }
    579 
    580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
    581 ; CHECK-LABEL: test_vdupq_laneq_s8:
    582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    583   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    584   ret <16 x i8> %shuffle
    585 }
    586 
    587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
    588 ; CHECK-LABEL: test_vdupq_laneq_s16:
    589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    590   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    591   ret <8 x i16> %shuffle
    592 }
    593 
    594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
    595 ; CHECK-LABEL: test_vdupq_laneq_s32:
    596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    597   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    598   ret <4 x i32> %shuffle
    599 }
    600 
    601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
    602 ; CHECK-LABEL: test_vdupq_laneq_s64:
    603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    604   %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
    605   ret <2 x i64> %shuffle
    606 }
    607 
    608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
    609 ; CHECK-LABEL: test_bitcastv8i8toi64:
    610    %res = bitcast <8 x i8> %in to i64
    611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    612    ret i64 %res
    613 }
    614 
    615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
    616 ; CHECK-LABEL: test_bitcastv4i16toi64:
    617    %res = bitcast <4 x i16> %in to i64
    618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    619    ret i64 %res
    620 }
    621 
    622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
    623 ; CHECK-LABEL: test_bitcastv2i32toi64:
    624    %res = bitcast <2 x i32> %in to i64
    625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    626    ret i64 %res
    627 }
    628 
    629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
    630 ; CHECK-LABEL: test_bitcastv2f32toi64:
    631    %res = bitcast <2 x float> %in to i64
    632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    633    ret i64 %res
    634 }
    635 
    636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
    637 ; CHECK-LABEL: test_bitcastv1i64toi64:
    638    %res = bitcast <1 x i64> %in to i64
    639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    640    ret i64 %res
    641 }
    642 
    643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
    644 ; CHECK-LABEL: test_bitcastv1f64toi64:
    645    %res = bitcast <1 x double> %in to i64
    646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    647    ret i64 %res
    648 }
    649 
    650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
    651 ; CHECK-LABEL: test_bitcasti64tov8i8:
    652    %res = bitcast i64 %in to <8 x i8>
    653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    654    ret <8 x i8> %res
    655 }
    656 
    657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
    658 ; CHECK-LABEL: test_bitcasti64tov4i16:
    659    %res = bitcast i64 %in to <4 x i16>
    660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    661    ret <4 x i16> %res
    662 }
    663 
    664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
    665 ; CHECK-LABEL: test_bitcasti64tov2i32:
    666    %res = bitcast i64 %in to <2 x i32>
    667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    668    ret <2 x i32> %res
    669 }
    670 
    671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
    672 ; CHECK-LABEL: test_bitcasti64tov2f32:
    673    %res = bitcast i64 %in to <2 x float>
    674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    675    ret <2 x float> %res
    676 }
    677 
    678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
    679 ; CHECK-LABEL: test_bitcasti64tov1i64:
    680    %res = bitcast i64 %in to <1 x i64>
    681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    682    ret <1 x i64> %res
    683 }
    684 
    685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
    686 ; CHECK-LABEL: test_bitcasti64tov1f64:
    687    %res = bitcast i64 %in to <1 x double>
    688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    689    ret <1 x double> %res
    690 }
    691 
    692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
    693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
    694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    696   %sub.i = sub <8 x i8> zeroinitializer, %a
    697   %1 = bitcast <8 x i8> %sub.i to <1 x double>
    698   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    699   ret <1 x i64> %vcvt.i
    700 }
    701 
    702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
    703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
    704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    706   %sub.i = sub <4 x i16> zeroinitializer, %a
    707   %1 = bitcast <4 x i16> %sub.i to <1 x double>
    708   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    709   ret <1 x i64> %vcvt.i
    710 }
    711 
    712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
    713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
    714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    716   %sub.i = sub <2 x i32> zeroinitializer, %a
    717   %1 = bitcast <2 x i32> %sub.i to <1 x double>
    718   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    719   ret <1 x i64> %vcvt.i
    720 }
    721 
    722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
    723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
    724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
    725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    726   %sub.i = sub <1 x i64> zeroinitializer, %a
    727   %1 = bitcast <1 x i64> %sub.i to <1 x double>
    728   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    729   ret <1 x i64> %vcvt.i
    730 }
    731 
    732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
    733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
    734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    736   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
    737   %1 = bitcast <2 x float> %sub.i to <1 x double>
    738   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    739   ret <1 x i64> %vcvt.i
    740 }
    741 
    742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
    743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
    744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    746   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    747   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
    748   %sub.i = sub <8 x i8> zeroinitializer, %1
    749   ret <8 x i8> %sub.i
    750 }
    751 
    752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
    753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
    754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    756   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    757   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
    758   %sub.i = sub <4 x i16> zeroinitializer, %1
    759   ret <4 x i16> %sub.i
    760 }
    761 
    762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
    763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
    764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    766   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    767   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
    768   %sub.i = sub <2 x i32> zeroinitializer, %1
    769   ret <2 x i32> %sub.i
    770 }
    771 
    772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
    773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
    774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
    776   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    777   %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
    778   %sub.i = sub <1 x i64> zeroinitializer, %1
    779   ret <1 x i64> %sub.i
    780 }
    781 
    782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
    783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
    784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    786   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    787   %1 = bitcast <1 x double> %vcvt.i to <2 x float>
    788   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
    789   ret <2 x float> %sub.i
    790 }
    791 
    792 ; Test insert element into an undef vector
    793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
    794 ; CHECK-LABEL: scalar_to_vector.v8i8:
    795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    796   %b = insertelement <8 x i8> undef, i8 %a, i32 0
    797   ret <8 x i8> %b
    798 }
    799 
    800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
    801 ; CHECK-LABEL: scalar_to_vector.v16i8:
    802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    803   %b = insertelement <16 x i8> undef, i8 %a, i32 0
    804   ret <16 x i8> %b
    805 }
    806 
    807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
    808 ; CHECK-LABEL: scalar_to_vector.v4i16:
    809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    810   %b = insertelement <4 x i16> undef, i16 %a, i32 0
    811   ret <4 x i16> %b
    812 }
    813 
    814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
    815 ; CHECK-LABEL: scalar_to_vector.v8i16:
    816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    817   %b = insertelement <8 x i16> undef, i16 %a, i32 0
    818   ret <8 x i16> %b
    819 }
    820 
    821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
    822 ; CHECK-LABEL: scalar_to_vector.v2i32:
    823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    824   %b = insertelement <2 x i32> undef, i32 %a, i32 0
    825   ret <2 x i32> %b
    826 }
    827 
    828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
    829 ; CHECK-LABEL: scalar_to_vector.v4i32:
    830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    831   %b = insertelement <4 x i32> undef, i32 %a, i32 0
    832   ret <4 x i32> %b
    833 }
    834 
    835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
    836 ; CHECK-LABEL: scalar_to_vector.v2i64:
    837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    838   %b = insertelement <2 x i64> undef, i64 %a, i32 0
    839   ret <2 x i64> %b
    840 }
    841 
    842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
    843 ; CHECK-LABEL: testDUP.v1i8:
    844 ; CHECK: dup v0.8b, v0.b[0]
    845   %b = extractelement <1 x i8> %a, i32 0
    846   %c = insertelement <8 x i8> undef, i8 %b, i32 0
    847   %d = insertelement <8 x i8> %c, i8 %b, i32 1
    848   %e = insertelement <8 x i8> %d, i8 %b, i32 2
    849   %f = insertelement <8 x i8> %e, i8 %b, i32 3
    850   %g = insertelement <8 x i8> %f, i8 %b, i32 4
    851   %h = insertelement <8 x i8> %g, i8 %b, i32 5
    852   %i = insertelement <8 x i8> %h, i8 %b, i32 6
    853   %j = insertelement <8 x i8> %i, i8 %b, i32 7
    854   ret <8 x i8> %j
    855 }
    856 
    857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
    858 ; CHECK-LABEL: testDUP.v1i16:
    859 ; CHECK: dup v0.8h, v0.h[0]
    860   %b = extractelement <1 x i16> %a, i32 0
    861   %c = insertelement <8 x i16> undef, i16 %b, i32 0
    862   %d = insertelement <8 x i16> %c, i16 %b, i32 1
    863   %e = insertelement <8 x i16> %d, i16 %b, i32 2
    864   %f = insertelement <8 x i16> %e, i16 %b, i32 3
    865   %g = insertelement <8 x i16> %f, i16 %b, i32 4
    866   %h = insertelement <8 x i16> %g, i16 %b, i32 5
    867   %i = insertelement <8 x i16> %h, i16 %b, i32 6
    868   %j = insertelement <8 x i16> %i, i16 %b, i32 7
    869   ret <8 x i16> %j
    870 }
    871 
    872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
    873 ; CHECK-LABEL: testDUP.v1i32:
    874 ; CHECK: dup v0.4s, v0.s[0]
    875   %b = extractelement <1 x i32> %a, i32 0
    876   %c = insertelement <4 x i32> undef, i32 %b, i32 0
    877   %d = insertelement <4 x i32> %c, i32 %b, i32 1
    878   %e = insertelement <4 x i32> %d, i32 %b, i32 2
    879   %f = insertelement <4 x i32> %e, i32 %b, i32 3
    880   ret <4 x i32> %f
    881 }
    882 
    883 define <8 x i8> @getl(<16 x i8> %x) #0 {
    884 ; CHECK-LABEL: getl:
    885 ; CHECK: ret
    886   %vecext = extractelement <16 x i8> %x, i32 0
    887   %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
    888   %vecext1 = extractelement <16 x i8> %x, i32 1
    889   %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
    890   %vecext3 = extractelement <16 x i8> %x, i32 2
    891   %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
    892   %vecext5 = extractelement <16 x i8> %x, i32 3
    893   %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
    894   %vecext7 = extractelement <16 x i8> %x, i32 4
    895   %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
    896   %vecext9 = extractelement <16 x i8> %x, i32 5
    897   %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
    898   %vecext11 = extractelement <16 x i8> %x, i32 6
    899   %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
    900   %vecext13 = extractelement <16 x i8> %x, i32 7
    901   %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
    902   ret <8 x i8> %vecinit14
    903 }
    904 
    905 ; CHECK-LABEL: test_extracts_inserts_varidx_extract:
    906 ; CHECK: str q0
    907 ; CHECK-DAG: and [[MASKED_IDX:x[0-9]+]], x0, #0x7
    908 ; CHECK: bfi [[PTR:x[0-9]+]], [[MASKED_IDX]], #1, #3
    909 ; CHECK-DAG: ldr h[[R:[0-9]+]], {{\[}}[[PTR]]{{\]}}
    910 ; CHECK-DAG: mov v[[R]].h[1], v0.h[1]
    911 ; CHECK-DAG: mov v[[R]].h[2], v0.h[2]
    912 ; CHECK-DAG: mov v[[R]].h[3], v0.h[3]
    913 define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) {
    914   %tmp = extractelement <8 x i16> %x, i32 %idx
    915   %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0
    916   %tmp3 = extractelement <8 x i16> %x, i32 1
    917   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
    918   %tmp5 = extractelement <8 x i16> %x, i32 2
    919   %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
    920   %tmp7 = extractelement <8 x i16> %x, i32 3
    921   %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
    922   ret <4 x i16> %tmp8
    923 }
    924 
    925 ; CHECK-LABEL: test_extracts_inserts_varidx_insert:
    926 ; CHECK: and [[MASKED_IDX:x[0-9]+]], x0, #0x3
    927 ; CHECK: bfi x9, [[MASKED_IDX]], #1, #2
    928 ; CHECK: str h0, [x9]
    929 ; CHECK-DAG: ldr d[[R:[0-9]+]]
    930 ; CHECK-DAG: mov v[[R]].h[1], v0.h[1]
    931 ; CHECK-DAG: mov v[[R]].h[2], v0.h[2]
    932 ; CHECK-DAG: mov v[[R]].h[3], v0.h[3]
    933 define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) {
    934   %tmp = extractelement <8 x i16> %x, i32 0
    935   %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx
    936   %tmp3 = extractelement <8 x i16> %x, i32 1
    937   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1
    938   %tmp5 = extractelement <8 x i16> %x, i32 2
    939   %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2
    940   %tmp7 = extractelement <8 x i16> %x, i32 3
    941   %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3
    942   ret <4 x i16> %tmp8
    943 }
    944 
    945 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
    946 ; CHECK-LABEL: test_dup_v2i32_v4i16:
    947 ; CHECK: dup v0.4h, v0.h[2]
    948 entry:
    949   %x = extractelement <2 x i32> %a, i32 1
    950   %vget_lane = trunc i32 %x to i16
    951   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    952   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    953   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    954   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    955   ret <4 x i16> %vecinit3.i
    956 }
    957 
    958 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
    959 ; CHECK-LABEL: test_dup_v4i32_v8i16:
    960 ; CHECK: dup v0.8h, v0.h[6]
    961 entry:
    962   %x = extractelement <4 x i32> %a, i32 3
    963   %vget_lane = trunc i32 %x to i16
    964   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    965   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    966   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    967   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    968   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    969   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    970   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    971   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    972   ret <8 x i16> %vecinit7.i
    973 }
    974 
    975 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
    976 ; CHECK-LABEL: test_dup_v1i64_v4i16:
    977 ; CHECK: dup v0.4h, v0.h[0]
    978 entry:
    979   %x = extractelement <1 x i64> %a, i32 0
    980   %vget_lane = trunc i64 %x to i16
    981   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    982   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    983   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    984   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    985   ret <4 x i16> %vecinit3.i
    986 }
    987 
    988 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
    989 ; CHECK-LABEL: test_dup_v1i64_v2i32:
    990 ; CHECK: dup v0.2s, v0.s[0]
    991 entry:
    992   %x = extractelement <1 x i64> %a, i32 0
    993   %vget_lane = trunc i64 %x to i32
    994   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
    995   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
    996   ret <2 x i32> %vecinit1.i
    997 }
    998 
    999 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
   1000 ; CHECK-LABEL: test_dup_v2i64_v8i16:
   1001 ; CHECK: dup v0.8h, v0.h[4]
   1002 entry:
   1003   %x = extractelement <2 x i64> %a, i32 1
   1004   %vget_lane = trunc i64 %x to i16
   1005   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
   1006   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1007   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1008   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1009   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
   1010   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
   1011   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
   1012   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
   1013   ret <8 x i16> %vecinit7.i
   1014 }
   1015 
   1016 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
   1017 ; CHECK-LABEL: test_dup_v2i64_v4i32:
   1018 ; CHECK: dup v0.4s, v0.s[2]
   1019 entry:
   1020   %x = extractelement <2 x i64> %a, i32 1
   1021   %vget_lane = trunc i64 %x to i32
   1022   %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
   1023   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1024   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
   1025   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
   1026   ret <4 x i32> %vecinit3.i
   1027 }
   1028 
   1029 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
   1030 ; CHECK-LABEL: test_dup_v4i32_v4i16:
   1031 ; CHECK: dup v0.4h, v0.h[2]
   1032 entry:
   1033   %x = extractelement <4 x i32> %a, i32 1
   1034   %vget_lane = trunc i32 %x to i16
   1035   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1036   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1037   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1038   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1039   ret <4 x i16> %vecinit3.i
   1040 }
   1041 
   1042 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
   1043 ; CHECK-LABEL: test_dup_v2i64_v4i16:
   1044 ; CHECK: dup v0.4h, v0.h[0]
   1045 entry:
   1046   %x = extractelement <2 x i64> %a, i32 0
   1047   %vget_lane = trunc i64 %x to i16
   1048   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1049   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1050   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1051   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1052   ret <4 x i16> %vecinit3.i
   1053 }
   1054 
   1055 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
   1056 ; CHECK-LABEL: test_dup_v2i64_v2i32:
   1057 ; CHECK: dup v0.2s, v0.s[0]
   1058 entry:
   1059   %x = extractelement <2 x i64> %a, i32 0
   1060   %vget_lane = trunc i64 %x to i32
   1061   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
   1062   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1063   ret <2 x i32> %vecinit1.i
   1064 }
   1065 
   1066 
   1067 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
   1068 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
   1069 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1070 ; CHECK-NEXT: ret
   1071 entry:
   1072   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1073   %1 = insertelement <1 x float> undef, float %0, i32 0
   1074   %2 = extractelement <1 x float> %1, i32 0
   1075   %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
   1076   ret <2 x float> %vecinit1.i
   1077 }
   1078 
   1079 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
   1080 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
   1081 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1082 ; CHECK-NEXT: ret
   1083 entry:
   1084   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1085   %1 = insertelement <1 x float> undef, float %0, i32 0
   1086   %2 = extractelement <1 x float> %1, i32 0
   1087   %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
   1088   ret <4 x float> %vecinit1.i
   1089 }
   1090 
   1091 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
   1092 
   1093 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
   1094 ; CHECK-LABEL: test_concat_undef_v1i32:
   1095 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
   1096 entry:
   1097   %0 = extractelement <2 x i32> %a, i32 0
   1098   %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
   1099   ret <2 x i32> %vecinit1.i
   1100 }
   1101 
   1102 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
   1103 
   1104 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
   1105 ; CHECK-LABEL: test_concat_v1i32_undef:
   1106 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1107 ; CHECK-NEXT: ret
   1108 entry:
   1109   %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1110   %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
   1111   ret <2 x i32> %vecinit.i432
   1112 }
   1113 
   1114 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
   1115 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
   1116 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
   1117 entry:
   1118   %0 = extractelement <2 x i32> %a, i32 0
   1119   %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
   1120   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
   1121   ret <2 x i32> %vecinit1.i
   1122 }
   1123 
   1124 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
   1125 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
   1126 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1127 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1128 ; CHECK: mov {{v[0-9]+}}.s[1], w{{[0-9]+}}
   1129 entry:
   1130   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1131   %d = insertelement <2 x i32> undef, i32 %c, i32 0
   1132   %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
   1133   %f = insertelement <2 x i32> undef, i32 %e, i32 0
   1134   %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
   1135   ret <2 x i32> %h
   1136 }
   1137 
   1138 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
   1139 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
   1140 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1141 entry:
   1142   %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1143   ret <16 x i8> %vecinit30
   1144 }
   1145 
   1146 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
   1147 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
   1148 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1149 entry:
   1150   %vecext = extractelement <8 x i8> %x, i32 0
   1151   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1152   %vecext1 = extractelement <8 x i8> %x, i32 1
   1153   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1154   %vecext3 = extractelement <8 x i8> %x, i32 2
   1155   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1156   %vecext5 = extractelement <8 x i8> %x, i32 3
   1157   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1158   %vecext7 = extractelement <8 x i8> %x, i32 4
   1159   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1160   %vecext9 = extractelement <8 x i8> %x, i32 5
   1161   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1162   %vecext11 = extractelement <8 x i8> %x, i32 6
   1163   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1164   %vecext13 = extractelement <8 x i8> %x, i32 7
   1165   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1166   %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1167   ret <16 x i8> %vecinit30
   1168 }
   1169 
   1170 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
   1171 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
   1172 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1173 entry:
   1174   %vecext = extractelement <16 x i8> %x, i32 0
   1175   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1176   %vecext1 = extractelement <16 x i8> %x, i32 1
   1177   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1178   %vecext3 = extractelement <16 x i8> %x, i32 2
   1179   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1180   %vecext5 = extractelement <16 x i8> %x, i32 3
   1181   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1182   %vecext7 = extractelement <16 x i8> %x, i32 4
   1183   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1184   %vecext9 = extractelement <16 x i8> %x, i32 5
   1185   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1186   %vecext11 = extractelement <16 x i8> %x, i32 6
   1187   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1188   %vecext13 = extractelement <16 x i8> %x, i32 7
   1189   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1190   %vecext15 = extractelement <8 x i8> %y, i32 0
   1191   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1192   %vecext17 = extractelement <8 x i8> %y, i32 1
   1193   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1194   %vecext19 = extractelement <8 x i8> %y, i32 2
   1195   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1196   %vecext21 = extractelement <8 x i8> %y, i32 3
   1197   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1198   %vecext23 = extractelement <8 x i8> %y, i32 4
   1199   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1200   %vecext25 = extractelement <8 x i8> %y, i32 5
   1201   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1202   %vecext27 = extractelement <8 x i8> %y, i32 6
   1203   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1204   %vecext29 = extractelement <8 x i8> %y, i32 7
   1205   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1206   ret <16 x i8> %vecinit30
   1207 }
   1208 
   1209 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
   1210 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
   1211 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1212 entry:
   1213   %vecext = extractelement <8 x i8> %x, i32 0
   1214   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1215   %vecext1 = extractelement <8 x i8> %x, i32 1
   1216   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1217   %vecext3 = extractelement <8 x i8> %x, i32 2
   1218   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1219   %vecext5 = extractelement <8 x i8> %x, i32 3
   1220   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1221   %vecext7 = extractelement <8 x i8> %x, i32 4
   1222   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1223   %vecext9 = extractelement <8 x i8> %x, i32 5
   1224   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1225   %vecext11 = extractelement <8 x i8> %x, i32 6
   1226   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1227   %vecext13 = extractelement <8 x i8> %x, i32 7
   1228   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1229   %vecext15 = extractelement <8 x i8> %y, i32 0
   1230   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1231   %vecext17 = extractelement <8 x i8> %y, i32 1
   1232   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1233   %vecext19 = extractelement <8 x i8> %y, i32 2
   1234   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1235   %vecext21 = extractelement <8 x i8> %y, i32 3
   1236   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1237   %vecext23 = extractelement <8 x i8> %y, i32 4
   1238   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1239   %vecext25 = extractelement <8 x i8> %y, i32 5
   1240   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1241   %vecext27 = extractelement <8 x i8> %y, i32 6
   1242   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1243   %vecext29 = extractelement <8 x i8> %y, i32 7
   1244   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1245   ret <16 x i8> %vecinit30
   1246 }
   1247 
   1248 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
   1249 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
   1250 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1251 entry:
   1252   %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1253   ret <8 x i16> %vecinit14
   1254 }
   1255 
   1256 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
   1257 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
   1258 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1259 entry:
   1260   %vecext = extractelement <4 x i16> %x, i32 0
   1261   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1262   %vecext1 = extractelement <4 x i16> %x, i32 1
   1263   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1264   %vecext3 = extractelement <4 x i16> %x, i32 2
   1265   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1266   %vecext5 = extractelement <4 x i16> %x, i32 3
   1267   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1268   %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1269   ret <8 x i16> %vecinit14
   1270 }
   1271 
   1272 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
   1273 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
   1274 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1275 entry:
   1276   %vecext = extractelement <8 x i16> %x, i32 0
   1277   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1278   %vecext1 = extractelement <8 x i16> %x, i32 1
   1279   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1280   %vecext3 = extractelement <8 x i16> %x, i32 2
   1281   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1282   %vecext5 = extractelement <8 x i16> %x, i32 3
   1283   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1284   %vecext7 = extractelement <4 x i16> %y, i32 0
   1285   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1286   %vecext9 = extractelement <4 x i16> %y, i32 1
   1287   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1288   %vecext11 = extractelement <4 x i16> %y, i32 2
   1289   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1290   %vecext13 = extractelement <4 x i16> %y, i32 3
   1291   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1292   ret <8 x i16> %vecinit14
   1293 }
   1294 
   1295 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
   1296 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
   1297 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1298 entry:
   1299   %vecext = extractelement <4 x i16> %x, i32 0
   1300   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1301   %vecext1 = extractelement <4 x i16> %x, i32 1
   1302   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1303   %vecext3 = extractelement <4 x i16> %x, i32 2
   1304   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1305   %vecext5 = extractelement <4 x i16> %x, i32 3
   1306   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1307   %vecext7 = extractelement <4 x i16> %y, i32 0
   1308   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1309   %vecext9 = extractelement <4 x i16> %y, i32 1
   1310   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1311   %vecext11 = extractelement <4 x i16> %y, i32 2
   1312   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1313   %vecext13 = extractelement <4 x i16> %y, i32 3
   1314   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1315   ret <8 x i16> %vecinit14
   1316 }
   1317 
   1318 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
   1319 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
   1320 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1321 entry:
   1322   %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1323   ret <4 x i32> %vecinit6
   1324 }
   1325 
   1326 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
   1327 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
   1328 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1329 entry:
   1330   %vecext = extractelement <2 x i32> %x, i32 0
   1331   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1332   %vecext1 = extractelement <2 x i32> %x, i32 1
   1333   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1334   %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1335   ret <4 x i32> %vecinit6
   1336 }
   1337 
   1338 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
   1339 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
   1340 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1341 entry:
   1342   %vecext = extractelement <4 x i32> %x, i32 0
   1343   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1344   %vecext1 = extractelement <4 x i32> %x, i32 1
   1345   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1346   %vecext3 = extractelement <2 x i32> %y, i32 0
   1347   %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
   1348   %vecext5 = extractelement <2 x i32> %y, i32 1
   1349   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
   1350   ret <4 x i32> %vecinit6
   1351 }
   1352 
   1353 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
   1354 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
   1355 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1356 entry:
   1357   %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1358   ret <4 x i32> %vecinit6
   1359 }
   1360 
   1361 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
   1362 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
   1363 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1364 entry:
   1365   %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1366   ret <2 x i64> %vecinit2
   1367 }
   1368 
   1369 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
   1370 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
   1371 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1372 entry:
   1373   %vecext = extractelement <1 x i64> %x, i32 0
   1374   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1375   %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1376   ret <2 x i64> %vecinit2
   1377 }
   1378 
   1379 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
   1380 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
   1381 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1382 entry:
   1383   %vecext = extractelement <2 x i64> %x, i32 0
   1384   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1385   %vecext1 = extractelement <1 x i64> %y, i32 0
   1386   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1387   ret <2 x i64> %vecinit2
   1388 }
   1389 
   1390 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
   1391 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
   1392 ; CHECK: mov {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1393 entry:
   1394   %vecext = extractelement <1 x i64> %x, i32 0
   1395   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1396   %vecext1 = extractelement <1 x i64> %y, i32 0
   1397   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1398   ret <2 x i64> %vecinit2
   1399 }
   1400 
   1401 
   1402 define <4 x i16> @concat_vector_v4i16_const() {
   1403 ; CHECK-LABEL: concat_vector_v4i16_const:
   1404 ; CHECK: movi {{d[0-9]+}}, #0
   1405  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
   1406  ret <4 x i16> %r
   1407 }
   1408 
   1409 define <4 x i16> @concat_vector_v4i16_const_one() {
   1410 ; CHECK-LABEL: concat_vector_v4i16_const_one:
   1411 ; CHECK: movi {{v[0-9]+}}.4h, #1
   1412  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
   1413  ret <4 x i16> %r
   1414 }
   1415 
   1416 define <4 x i32> @concat_vector_v4i32_const() {
   1417 ; CHECK-LABEL: concat_vector_v4i32_const:
   1418 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1419  %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
   1420  ret <4 x i32> %r
   1421 }
   1422 
   1423 define <8 x i8> @concat_vector_v8i8_const() {
   1424 ; CHECK-LABEL: concat_vector_v8i8_const:
   1425 ; CHECK: movi {{d[0-9]+}}, #0
   1426  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
   1427  ret <8 x i8> %r
   1428 }
   1429 
   1430 define <8 x i16> @concat_vector_v8i16_const() {
   1431 ; CHECK-LABEL: concat_vector_v8i16_const:
   1432 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1433  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
   1434  ret <8 x i16> %r
   1435 }
   1436 
   1437 define <8 x i16> @concat_vector_v8i16_const_one() {
   1438 ; CHECK-LABEL: concat_vector_v8i16_const_one:
   1439 ; CHECK: movi {{v[0-9]+}}.8h, #1
   1440  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
   1441  ret <8 x i16> %r
   1442 }
   1443 
   1444 define <16 x i8> @concat_vector_v16i8_const() {
   1445 ; CHECK-LABEL: concat_vector_v16i8_const:
   1446 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1447  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
   1448  ret <16 x i8> %r
   1449 }
   1450 
   1451 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
   1452 ; CHECK-LABEL: concat_vector_v4i16:
   1453 ; CHECK: dup v0.4h, v0.h[0]
   1454  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
   1455  ret <4 x i16> %r
   1456 }
   1457 
   1458 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
   1459 ; CHECK-LABEL: concat_vector_v4i32:
   1460 ; CHECK: dup v0.4s, v0.s[0]
   1461  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
   1462  ret <4 x i32> %r
   1463 }
   1464 
   1465 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
   1466 ; CHECK-LABEL: concat_vector_v8i8:
   1467 ; CHECK: dup v0.8b, v0.b[0]
   1468  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
   1469  ret <8 x i8> %r
   1470 }
   1471 
   1472 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
   1473 ; CHECK-LABEL: concat_vector_v8i16:
   1474 ; CHECK: dup v0.8h, v0.h[0]
   1475  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
   1476  ret <8 x i16> %r
   1477 }
   1478 
   1479 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
   1480 ; CHECK-LABEL: concat_vector_v16i8:
   1481 ; CHECK: dup v0.16b, v0.b[0]
   1482  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
   1483  ret <16 x i8> %r
   1484 }
   1485