Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 
      4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
      5 ; CHECK-LABEL: ins16bw:
      6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
      7   %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
      8   ret <16 x i8> %tmp3
      9 }
     10 
     11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
     12 ; CHECK-LABEL: ins8hw:
     13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
     14   %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
     15   ret <8 x i16> %tmp3
     16 }
     17 
     18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
     19 ; CHECK-LABEL: ins4sw:
     20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
     21   %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
     22   ret <4 x i32> %tmp3
     23 }
     24 
     25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
     26 ; CHECK-LABEL: ins2dw:
     27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
     28   %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
     29   ret <2 x i64> %tmp3
     30 }
     31 
     32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
     33 ; CHECK-LABEL: ins8bw:
     34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
     35   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
     36   ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
     40 ; CHECK-LABEL: ins4hw:
     41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
     42   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
     43   ret <4 x i16> %tmp3
     44 }
     45 
     46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
     47 ; CHECK-LABEL: ins2sw:
     48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
     49   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
     50   ret <2 x i32> %tmp3
     51 }
     52 
     53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
     54 ; CHECK-LABEL: ins16b16:
     55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
     56   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
     57   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
     58   ret <16 x i8> %tmp4
     59 }
     60 
     61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
     62 ; CHECK-LABEL: ins8h8:
     63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
     64   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
     65   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
     66   ret <8 x i16> %tmp4
     67 }
     68 
     69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
     70 ; CHECK-LABEL: ins4s4:
     71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     72   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
     73   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
     74   ret <4 x i32> %tmp4
     75 }
     76 
     77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
     78 ; CHECK-LABEL: ins2d2:
     79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     80   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
     81   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
     82   ret <2 x i64> %tmp4
     83 }
     84 
     85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
     86 ; CHECK-LABEL: ins4f4:
     87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     88   %tmp3 = extractelement <4 x float> %tmp1, i32 2
     89   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
     90   ret <4 x float> %tmp4
     91 }
     92 
     93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
     94 ; CHECK-LABEL: ins2df2:
     95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     96   %tmp3 = extractelement <2 x double> %tmp1, i32 0
     97   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
     98   ret <2 x double> %tmp4
     99 }
    100 
    101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
    102 ; CHECK-LABEL: ins8b16:
    103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
    104   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    105   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
    106   ret <16 x i8> %tmp4
    107 }
    108 
    109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
    110 ; CHECK-LABEL: ins4h8:
    111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
    112   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    113   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
    114   ret <8 x i16> %tmp4
    115 }
    116 
    117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
    118 ; CHECK-LABEL: ins2s4:
    119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    120   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    121   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
    122   ret <4 x i32> %tmp4
    123 }
    124 
    125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
    126 ; CHECK-LABEL: ins1d2:
    127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    128   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    129   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
    130   ret <2 x i64> %tmp4
    131 }
    132 
    133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
    134 ; CHECK-LABEL: ins2f4:
    135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    136   %tmp3 = extractelement <2 x float> %tmp1, i32 1
    137   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
    138   ret <4 x float> %tmp4
    139 }
    140 
    141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
    142 ; CHECK-LABEL: ins1f2:
    143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    144   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    145   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
    146   ret <2 x double> %tmp4
    147 }
    148 
    149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
    150 ; CHECK-LABEL: ins16b8:
    151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
    152   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
    153   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
    154   ret <8 x i8> %tmp4
    155 }
    156 
    157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
    158 ; CHECK-LABEL: ins8h4:
    159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    160   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    161   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    162   ret <4 x i16> %tmp4
    163 }
    164 
    165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
    166 ; CHECK-LABEL: ins4s2:
    167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    168   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    169   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    170   ret <2 x i32> %tmp4
    171 }
    172 
    173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
    174 ; CHECK-LABEL: ins2d1:
    175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    176   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
    177   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    178   ret <1 x i64> %tmp4
    179 }
    180 
    181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
    182 ; CHECK-LABEL: ins4f2:
    183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    184   %tmp3 = extractelement <4 x float> %tmp1, i32 2
    185   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    186   ret <2 x float> %tmp4
    187 }
    188 
    189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
    190 ; CHECK-LABEL: ins2f1:
    191 ; CHECK: mov {{d[0-9]+}}, {{v[0-9]+}}.d[1]
    192   %tmp3 = extractelement <2 x double> %tmp1, i32 1
    193   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    194   ret <1 x double> %tmp4
    195 }
    196 
    197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
    198 ; CHECK-LABEL: ins8b8:
    199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
    200   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    201   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
    202   ret <8 x i8> %tmp4
    203 }
    204 
    205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
    206 ; CHECK-LABEL: ins4h4:
    207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    208   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    209   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    210   ret <4 x i16> %tmp4
    211 }
    212 
    213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
    214 ; CHECK-LABEL: ins2s2:
    215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    216   %tmp3 = extractelement <2 x i32> %tmp1, i32 0
    217   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    218   ret <2 x i32> %tmp4
    219 }
    220 
    221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
    222 ; CHECK-LABEL: ins1d1:
    223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    224   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    225   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    226   ret <1 x i64> %tmp4
    227 }
    228 
    229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
    230 ; CHECK-LABEL: ins2f2:
    231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    232   %tmp3 = extractelement <2 x float> %tmp1, i32 0
    233   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    234   ret <2 x float> %tmp4
    235 }
    236 
    237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
    238 ; CHECK-LABEL: ins1df1:
    239 ; CHECK-NOT: ins {{v[0-9]+}}
    240   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    241   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    242   ret <1 x double> %tmp4
    243 }
    244 
    245 define i32 @umovw16b(<16 x i8> %tmp1) {
    246 ; CHECK-LABEL: umovw16b:
    247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    248   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    249   %tmp4 = zext i8 %tmp3 to i32
    250   ret i32 %tmp4
    251 }
    252 
    253 define i32 @umovw8h(<8 x i16> %tmp1) {
    254 ; CHECK-LABEL: umovw8h:
    255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    256   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    257   %tmp4 = zext i16 %tmp3 to i32
    258   ret i32 %tmp4
    259 }
    260 
    261 define i32 @umovw4s(<4 x i32> %tmp1) {
    262 ; CHECK-LABEL: umovw4s:
    263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
    264   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    265   ret i32 %tmp3
    266 }
    267 
    268 define i64 @umovx2d(<2 x i64> %tmp1) {
    269 ; CHECK-LABEL: umovx2d:
    270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
    271   %tmp3 = extractelement <2 x i64> %tmp1, i32 1
    272   ret i64 %tmp3
    273 }
    274 
    275 define i32 @umovw8b(<8 x i8> %tmp1) {
    276 ; CHECK-LABEL: umovw8b:
    277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
    278   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
    279   %tmp4 = zext i8 %tmp3 to i32
    280   ret i32 %tmp4
    281 }
    282 
    283 define i32 @umovw4h(<4 x i16> %tmp1) {
    284 ; CHECK-LABEL: umovw4h:
    285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    286   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    287   %tmp4 = zext i16 %tmp3 to i32
    288   ret i32 %tmp4
    289 }
    290 
    291 define i32 @umovw2s(<2 x i32> %tmp1) {
    292 ; CHECK-LABEL: umovw2s:
    293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
    294   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    295   ret i32 %tmp3
    296 }
    297 
    298 define i64 @umovx1d(<1 x i64> %tmp1) {
    299 ; CHECK-LABEL: umovx1d:
    300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    301   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    302   ret i64 %tmp3
    303 }
    304 
    305 define i32 @smovw16b(<16 x i8> %tmp1) {
    306 ; CHECK-LABEL: smovw16b:
    307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    308   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    309   %tmp4 = sext i8 %tmp3 to i32
    310   %tmp5 = add i32 %tmp4, %tmp4
    311   ret i32 %tmp5
    312 }
    313 
    314 define i32 @smovw8h(<8 x i16> %tmp1) {
    315 ; CHECK-LABEL: smovw8h:
    316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    317   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    318   %tmp4 = sext i16 %tmp3 to i32
    319   %tmp5 = add i32 %tmp4, %tmp4
    320   ret i32 %tmp5
    321 }
    322 
    323 define i64 @smovx16b(<16 x i8> %tmp1) {
    324 ; CHECK-LABEL: smovx16b:
    325 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
    326   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    327   %tmp4 = sext i8 %tmp3 to i64
    328   ret i64 %tmp4
    329 }
    330 
    331 define i64 @smovx8h(<8 x i16> %tmp1) {
    332 ; CHECK-LABEL: smovx8h:
    333 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
    334   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    335   %tmp4 = sext i16 %tmp3 to i64
    336   ret i64 %tmp4
    337 }
    338 
    339 define i64 @smovx4s(<4 x i32> %tmp1) {
    340 ; CHECK-LABEL: smovx4s:
    341 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
    342   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    343   %tmp4 = sext i32 %tmp3 to i64
    344   ret i64 %tmp4
    345 }
    346 
    347 define i32 @smovw8b(<8 x i8> %tmp1) {
    348 ; CHECK-LABEL: smovw8b:
    349 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
    350   %tmp3 = extractelement <8 x i8> %tmp1, i32 4
    351   %tmp4 = sext i8 %tmp3 to i32
    352   %tmp5 = add i32 %tmp4, %tmp4
    353   ret i32 %tmp5
    354 }
    355 
    356 define i32 @smovw4h(<4 x i16> %tmp1) {
    357 ; CHECK-LABEL: smovw4h:
    358 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    359   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    360   %tmp4 = sext i16 %tmp3 to i32
    361   %tmp5 = add i32 %tmp4, %tmp4
    362   ret i32 %tmp5
    363 }
    364 
    365 define i32 @smovx8b(<8 x i8> %tmp1) {
    366 ; CHECK-LABEL: smovx8b:
    367 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
    368   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
    369   %tmp4 = sext i8 %tmp3 to i32
    370   ret i32 %tmp4
    371 }
    372 
    373 define i32 @smovx4h(<4 x i16> %tmp1) {
    374 ; CHECK-LABEL: smovx4h:
    375 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
    376   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    377   %tmp4 = sext i16 %tmp3 to i32
    378   ret i32 %tmp4
    379 }
    380 
    381 define i64 @smovx2s(<2 x i32> %tmp1) {
    382 ; CHECK-LABEL: smovx2s:
    383 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
    384   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    385   %tmp4 = sext i32 %tmp3 to i64
    386   ret i64 %tmp4
    387 }
    388 
    389 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
    390 ; CHECK-LABEL: test_vcopy_lane_s8:
    391 ; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
    392   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
    393   ret <8 x i8> %vset_lane
    394 }
    395 
    396 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
    397 ; CHECK-LABEL: test_vcopyq_laneq_s8:
    398 ; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
    399   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
    400   ret <16 x i8> %vset_lane
    401 }
    402 
    403 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
    404 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
    405 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
    406   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
    407   ret <8 x i8> %vset_lane
    408 }
    409 
    410 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
    411 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
    412 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
    413   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    414   ret <16 x i8> %vset_lane
    415 }
    416 
    417 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
    418 ; CHECK-LABEL: test_vdup_n_u8:
    419 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
    420   %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
    421   %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
    422   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
    423   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
    424   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
    425   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
    426   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
    427   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
    428   ret <8 x i8> %vecinit7.i
    429 }
    430 
    431 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
    432 ; CHECK-LABEL: test_vdup_n_u16:
    433 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
    434   %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
    435   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
    436   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
    437   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
    438   ret <4 x i16> %vecinit3.i
    439 }
    440 
    441 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
    442 ; CHECK-LABEL: test_vdup_n_u32:
    443 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
    444   %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
    445   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
    446   ret <2 x i32> %vecinit1.i
    447 }
    448 
    449 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
    450 ; CHECK-LABEL: test_vdup_n_u64:
    451 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    452   %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
    453   ret <1 x i64> %vecinit.i
    454 }
    455 
    456 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
    457 ; CHECK-LABEL: test_vdupq_n_u8:
    458 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
    459   %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
    460   %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
    461   %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
    462   %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
    463   %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
    464   %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
    465   %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
    466   %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
    467   %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
    468   %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
    469   %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
    470   %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
    471   %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
    472   %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
    473   %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
    474   %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
    475   ret <16 x i8> %vecinit15.i
    476 }
    477 
    478 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
    479 ; CHECK-LABEL: test_vdupq_n_u16:
    480 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
    481   %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
    482   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
    483   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
    484   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
    485   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
    486   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
    487   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
    488   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
    489   ret <8 x i16> %vecinit7.i
    490 }
    491 
    492 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
    493 ; CHECK-LABEL: test_vdupq_n_u32:
    494 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
    495   %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
    496   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
    497   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
    498   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
    499   ret <4 x i32> %vecinit3.i
    500 }
    501 
    502 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
    503 ; CHECK-LABEL: test_vdupq_n_u64:
    504 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
    505   %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
    506   %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
    507   ret <2 x i64> %vecinit1.i
    508 }
    509 
    510 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
    511 ; CHECK-LABEL: test_vdup_lane_s8:
    512 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    513   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    514   ret <8 x i8> %shuffle
    515 }
    516 
    517 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
    518 ; CHECK-LABEL: test_vdup_lane_s16:
    519 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    520   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    521   ret <4 x i16> %shuffle
    522 }
    523 
    524 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
    525 ; CHECK-LABEL: test_vdup_lane_s32:
    526 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    527   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    528   ret <2 x i32> %shuffle
    529 }
    530 
    531 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
    532 ; CHECK-LABEL: test_vdupq_lane_s8:
    533 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    534   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    535   ret <16 x i8> %shuffle
    536 }
    537 
    538 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
    539 ; CHECK-LABEL: test_vdupq_lane_s16:
    540 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    541   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    542   ret <8 x i16> %shuffle
    543 }
    544 
    545 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
    546 ; CHECK-LABEL: test_vdupq_lane_s32:
    547 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    548   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    549   ret <4 x i32> %shuffle
    550 }
    551 
    552 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
    553 ; CHECK-LABEL: test_vdupq_lane_s64:
    554 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    555   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
    556   ret <2 x i64> %shuffle
    557 }
    558 
    559 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
    560 ; CHECK-LABEL: test_vdup_laneq_s8:
    561 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    562   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    563   ret <8 x i8> %shuffle
    564 }
    565 
    566 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
    567 ; CHECK-LABEL: test_vdup_laneq_s16:
    568 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    569   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    570   ret <4 x i16> %shuffle
    571 }
    572 
    573 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
    574 ; CHECK-LABEL: test_vdup_laneq_s32:
    575 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    576   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
    577   ret <2 x i32> %shuffle
    578 }
    579 
    580 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
    581 ; CHECK-LABEL: test_vdupq_laneq_s8:
    582 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    583   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    584   ret <16 x i8> %shuffle
    585 }
    586 
    587 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
    588 ; CHECK-LABEL: test_vdupq_laneq_s16:
    589 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    590   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    591   ret <8 x i16> %shuffle
    592 }
    593 
    594 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
    595 ; CHECK-LABEL: test_vdupq_laneq_s32:
    596 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    597   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    598   ret <4 x i32> %shuffle
    599 }
    600 
    601 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
    602 ; CHECK-LABEL: test_vdupq_laneq_s64:
    603 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    604   %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
    605   ret <2 x i64> %shuffle
    606 }
    607 
    608 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
    609 ; CHECK-LABEL: test_bitcastv8i8toi64:
    610    %res = bitcast <8 x i8> %in to i64
    611 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    612    ret i64 %res
    613 }
    614 
    615 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
    616 ; CHECK-LABEL: test_bitcastv4i16toi64:
    617    %res = bitcast <4 x i16> %in to i64
    618 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    619    ret i64 %res
    620 }
    621 
    622 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
    623 ; CHECK-LABEL: test_bitcastv2i32toi64:
    624    %res = bitcast <2 x i32> %in to i64
    625 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    626    ret i64 %res
    627 }
    628 
    629 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
    630 ; CHECK-LABEL: test_bitcastv2f32toi64:
    631    %res = bitcast <2 x float> %in to i64
    632 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    633    ret i64 %res
    634 }
    635 
    636 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
    637 ; CHECK-LABEL: test_bitcastv1i64toi64:
    638    %res = bitcast <1 x i64> %in to i64
    639 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    640    ret i64 %res
    641 }
    642 
    643 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
    644 ; CHECK-LABEL: test_bitcastv1f64toi64:
    645    %res = bitcast <1 x double> %in to i64
    646 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    647    ret i64 %res
    648 }
    649 
    650 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
    651 ; CHECK-LABEL: test_bitcasti64tov8i8:
    652    %res = bitcast i64 %in to <8 x i8>
    653 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    654    ret <8 x i8> %res
    655 }
    656 
    657 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
    658 ; CHECK-LABEL: test_bitcasti64tov4i16:
    659    %res = bitcast i64 %in to <4 x i16>
    660 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    661    ret <4 x i16> %res
    662 }
    663 
    664 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
    665 ; CHECK-LABEL: test_bitcasti64tov2i32:
    666    %res = bitcast i64 %in to <2 x i32>
    667 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    668    ret <2 x i32> %res
    669 }
    670 
    671 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
    672 ; CHECK-LABEL: test_bitcasti64tov2f32:
    673    %res = bitcast i64 %in to <2 x float>
    674 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    675    ret <2 x float> %res
    676 }
    677 
    678 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
    679 ; CHECK-LABEL: test_bitcasti64tov1i64:
    680    %res = bitcast i64 %in to <1 x i64>
    681 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    682    ret <1 x i64> %res
    683 }
    684 
    685 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
    686 ; CHECK-LABEL: test_bitcasti64tov1f64:
    687    %res = bitcast i64 %in to <1 x double>
    688 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    689    ret <1 x double> %res
    690 }
    691 
    692 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
    693 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
    694 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    695 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    696   %sub.i = sub <8 x i8> zeroinitializer, %a
    697   %1 = bitcast <8 x i8> %sub.i to <1 x double>
    698   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    699   ret <1 x i64> %vcvt.i
    700 }
    701 
    702 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
    703 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
    704 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    705 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    706   %sub.i = sub <4 x i16> zeroinitializer, %a
    707   %1 = bitcast <4 x i16> %sub.i to <1 x double>
    708   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    709   ret <1 x i64> %vcvt.i
    710 }
    711 
    712 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
    713 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
    714 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    715 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    716   %sub.i = sub <2 x i32> zeroinitializer, %a
    717   %1 = bitcast <2 x i32> %sub.i to <1 x double>
    718   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    719   ret <1 x i64> %vcvt.i
    720 }
    721 
    722 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
    723 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
    724 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
    725 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    726   %sub.i = sub <1 x i64> zeroinitializer, %a
    727   %1 = bitcast <1 x i64> %sub.i to <1 x double>
    728   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    729   ret <1 x i64> %vcvt.i
    730 }
    731 
    732 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
    733 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
    734 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    735 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    736   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
    737   %1 = bitcast <2 x float> %sub.i to <1 x double>
    738   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    739   ret <1 x i64> %vcvt.i
    740 }
    741 
    742 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
    743 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
    744 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    745 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    746   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    747   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
    748   %sub.i = sub <8 x i8> zeroinitializer, %1
    749   ret <8 x i8> %sub.i
    750 }
    751 
    752 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
    753 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
    754 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    755 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    756   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    757   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
    758   %sub.i = sub <4 x i16> zeroinitializer, %1
    759   ret <4 x i16> %sub.i
    760 }
    761 
    762 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
    763 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
    764 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    765 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    766   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    767   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
    768   %sub.i = sub <2 x i32> zeroinitializer, %1
    769   ret <2 x i32> %sub.i
    770 }
    771 
    772 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
    773 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
    774 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    775 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
    776   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    777   %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
    778   %sub.i = sub <1 x i64> zeroinitializer, %1
    779   ret <1 x i64> %sub.i
    780 }
    781 
    782 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
    783 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
    784 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    785 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    786   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    787   %1 = bitcast <1 x double> %vcvt.i to <2 x float>
    788   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
    789   ret <2 x float> %sub.i
    790 }
    791 
    792 ; Test insert element into an undef vector
    793 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
    794 ; CHECK-LABEL: scalar_to_vector.v8i8:
    795 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    796   %b = insertelement <8 x i8> undef, i8 %a, i32 0
    797   ret <8 x i8> %b
    798 }
    799 
    800 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
    801 ; CHECK-LABEL: scalar_to_vector.v16i8:
    802 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    803   %b = insertelement <16 x i8> undef, i8 %a, i32 0
    804   ret <16 x i8> %b
    805 }
    806 
    807 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
    808 ; CHECK-LABEL: scalar_to_vector.v4i16:
    809 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    810   %b = insertelement <4 x i16> undef, i16 %a, i32 0
    811   ret <4 x i16> %b
    812 }
    813 
    814 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
    815 ; CHECK-LABEL: scalar_to_vector.v8i16:
    816 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    817   %b = insertelement <8 x i16> undef, i16 %a, i32 0
    818   ret <8 x i16> %b
    819 }
    820 
    821 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
    822 ; CHECK-LABEL: scalar_to_vector.v2i32:
    823 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    824   %b = insertelement <2 x i32> undef, i32 %a, i32 0
    825   ret <2 x i32> %b
    826 }
    827 
    828 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
    829 ; CHECK-LABEL: scalar_to_vector.v4i32:
    830 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    831   %b = insertelement <4 x i32> undef, i32 %a, i32 0
    832   ret <4 x i32> %b
    833 }
    834 
    835 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
    836 ; CHECK-LABEL: scalar_to_vector.v2i64:
    837 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    838   %b = insertelement <2 x i64> undef, i64 %a, i32 0
    839   ret <2 x i64> %b
    840 }
    841 
    842 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
    843 ; CHECK-LABEL: testDUP.v1i8:
    844 ; CHECK: dup v0.8b, v0.b[0]
    845   %b = extractelement <1 x i8> %a, i32 0
    846   %c = insertelement <8 x i8> undef, i8 %b, i32 0
    847   %d = insertelement <8 x i8> %c, i8 %b, i32 1
    848   %e = insertelement <8 x i8> %d, i8 %b, i32 2
    849   %f = insertelement <8 x i8> %e, i8 %b, i32 3
    850   %g = insertelement <8 x i8> %f, i8 %b, i32 4
    851   %h = insertelement <8 x i8> %g, i8 %b, i32 5
    852   %i = insertelement <8 x i8> %h, i8 %b, i32 6
    853   %j = insertelement <8 x i8> %i, i8 %b, i32 7
    854   ret <8 x i8> %j
    855 }
    856 
    857 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
    858 ; CHECK-LABEL: testDUP.v1i16:
    859 ; CHECK: dup v0.8h, v0.h[0]
    860   %b = extractelement <1 x i16> %a, i32 0
    861   %c = insertelement <8 x i16> undef, i16 %b, i32 0
    862   %d = insertelement <8 x i16> %c, i16 %b, i32 1
    863   %e = insertelement <8 x i16> %d, i16 %b, i32 2
    864   %f = insertelement <8 x i16> %e, i16 %b, i32 3
    865   %g = insertelement <8 x i16> %f, i16 %b, i32 4
    866   %h = insertelement <8 x i16> %g, i16 %b, i32 5
    867   %i = insertelement <8 x i16> %h, i16 %b, i32 6
    868   %j = insertelement <8 x i16> %i, i16 %b, i32 7
    869   ret <8 x i16> %j
    870 }
    871 
    872 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
    873 ; CHECK-LABEL: testDUP.v1i32:
    874 ; CHECK: dup v0.4s, v0.s[0]
    875   %b = extractelement <1 x i32> %a, i32 0
    876   %c = insertelement <4 x i32> undef, i32 %b, i32 0
    877   %d = insertelement <4 x i32> %c, i32 %b, i32 1
    878   %e = insertelement <4 x i32> %d, i32 %b, i32 2
    879   %f = insertelement <4 x i32> %e, i32 %b, i32 3
    880   ret <4 x i32> %f
    881 }
    882 
    883 define <8 x i8> @getl(<16 x i8> %x) #0 {
    884 ; CHECK-LABEL: getl:
    885 ; CHECK: ret
    886   %vecext = extractelement <16 x i8> %x, i32 0
    887   %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
    888   %vecext1 = extractelement <16 x i8> %x, i32 1
    889   %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
    890   %vecext3 = extractelement <16 x i8> %x, i32 2
    891   %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
    892   %vecext5 = extractelement <16 x i8> %x, i32 3
    893   %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
    894   %vecext7 = extractelement <16 x i8> %x, i32 4
    895   %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
    896   %vecext9 = extractelement <16 x i8> %x, i32 5
    897   %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
    898   %vecext11 = extractelement <16 x i8> %x, i32 6
    899   %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
    900   %vecext13 = extractelement <16 x i8> %x, i32 7
    901   %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
    902   ret <8 x i8> %vecinit14
    903 }
    904 
    905 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
    906 ; CHECK-LABEL: test_dup_v2i32_v4i16:
    907 ; CHECK: dup v0.4h, v0.h[2]
    908 entry:
    909   %x = extractelement <2 x i32> %a, i32 1
    910   %vget_lane = trunc i32 %x to i16
    911   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    912   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    913   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    914   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    915   ret <4 x i16> %vecinit3.i
    916 }
    917 
    918 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
    919 ; CHECK-LABEL: test_dup_v4i32_v8i16:
    920 ; CHECK: dup v0.8h, v0.h[6]
    921 entry:
    922   %x = extractelement <4 x i32> %a, i32 3
    923   %vget_lane = trunc i32 %x to i16
    924   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    925   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    926   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    927   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    928   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    929   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    930   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    931   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    932   ret <8 x i16> %vecinit7.i
    933 }
    934 
    935 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
    936 ; CHECK-LABEL: test_dup_v1i64_v4i16:
    937 ; CHECK: dup v0.4h, v0.h[0]
    938 entry:
    939   %x = extractelement <1 x i64> %a, i32 0
    940   %vget_lane = trunc i64 %x to i16
    941   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    942   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    943   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    944   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    945   ret <4 x i16> %vecinit3.i
    946 }
    947 
    948 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
    949 ; CHECK-LABEL: test_dup_v1i64_v2i32:
    950 ; CHECK: dup v0.2s, v0.s[0]
    951 entry:
    952   %x = extractelement <1 x i64> %a, i32 0
    953   %vget_lane = trunc i64 %x to i32
    954   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
    955   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
    956   ret <2 x i32> %vecinit1.i
    957 }
    958 
    959 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
    960 ; CHECK-LABEL: test_dup_v2i64_v8i16:
    961 ; CHECK: dup v0.8h, v0.h[4]
    962 entry:
    963   %x = extractelement <2 x i64> %a, i32 1
    964   %vget_lane = trunc i64 %x to i16
    965   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    966   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    967   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    968   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    969   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    970   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    971   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    972   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    973   ret <8 x i16> %vecinit7.i
    974 }
    975 
    976 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
    977 ; CHECK-LABEL: test_dup_v2i64_v4i32:
    978 ; CHECK: dup v0.4s, v0.s[2]
    979 entry:
    980   %x = extractelement <2 x i64> %a, i32 1
    981   %vget_lane = trunc i64 %x to i32
    982   %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
    983   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
    984   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
    985   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
    986   ret <4 x i32> %vecinit3.i
    987 }
    988 
    989 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
    990 ; CHECK-LABEL: test_dup_v4i32_v4i16:
    991 ; CHECK: dup v0.4h, v0.h[2]
    992 entry:
    993   %x = extractelement <4 x i32> %a, i32 1
    994   %vget_lane = trunc i32 %x to i16
    995   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    996   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    997   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    998   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    999   ret <4 x i16> %vecinit3.i
   1000 }
   1001 
   1002 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
   1003 ; CHECK-LABEL: test_dup_v2i64_v4i16:
   1004 ; CHECK: dup v0.4h, v0.h[0]
   1005 entry:
   1006   %x = extractelement <2 x i64> %a, i32 0
   1007   %vget_lane = trunc i64 %x to i16
   1008   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1009   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1010   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1011   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1012   ret <4 x i16> %vecinit3.i
   1013 }
   1014 
   1015 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
   1016 ; CHECK-LABEL: test_dup_v2i64_v2i32:
   1017 ; CHECK: dup v0.2s, v0.s[0]
   1018 entry:
   1019   %x = extractelement <2 x i64> %a, i32 0
   1020   %vget_lane = trunc i64 %x to i32
   1021   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
   1022   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1023   ret <2 x i32> %vecinit1.i
   1024 }
   1025 
   1026 
   1027 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
   1028 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
   1029 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1030 ; CHECK-NEXT: ret
   1031 entry:
   1032   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1033   %1 = insertelement <1 x float> undef, float %0, i32 0
   1034   %2 = extractelement <1 x float> %1, i32 0
   1035   %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
   1036   ret <2 x float> %vecinit1.i
   1037 }
   1038 
   1039 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
   1040 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
   1041 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1042 ; CHECK-NEXT: ret
   1043 entry:
   1044   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1045   %1 = insertelement <1 x float> undef, float %0, i32 0
   1046   %2 = extractelement <1 x float> %1, i32 0
   1047   %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
   1048   ret <4 x float> %vecinit1.i
   1049 }
   1050 
   1051 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
   1052 
   1053 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
   1054 ; CHECK-LABEL: test_concat_undef_v1i32:
   1055 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
   1056 entry:
   1057   %0 = extractelement <2 x i32> %a, i32 0
   1058   %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
   1059   ret <2 x i32> %vecinit1.i
   1060 }
   1061 
   1062 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
   1063 
   1064 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
   1065 ; CHECK-LABEL: test_concat_v1i32_undef:
   1066 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1067 ; CHECK-NEXT: ret
   1068 entry:
   1069   %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1070   %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
   1071   ret <2 x i32> %vecinit.i432
   1072 }
   1073 
   1074 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
   1075 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
   1076 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
   1077 entry:
   1078   %0 = extractelement <2 x i32> %a, i32 0
   1079   %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
   1080   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
   1081   ret <2 x i32> %vecinit1.i
   1082 }
   1083 
   1084 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
   1085 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
   1086 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1087 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1088 ; CHECK: ins {{v[0-9]+}}.s[1], w{{[0-9]+}}
   1089 entry:
   1090   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1091   %d = insertelement <2 x i32> undef, i32 %c, i32 0
   1092   %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
   1093   %f = insertelement <2 x i32> undef, i32 %e, i32 0
   1094   %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
   1095   ret <2 x i32> %h
   1096 }
   1097 
   1098 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
   1099 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
   1100 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1101 entry:
   1102   %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1103   ret <16 x i8> %vecinit30
   1104 }
   1105 
   1106 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
   1107 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
   1108 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1109 entry:
   1110   %vecext = extractelement <8 x i8> %x, i32 0
   1111   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1112   %vecext1 = extractelement <8 x i8> %x, i32 1
   1113   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1114   %vecext3 = extractelement <8 x i8> %x, i32 2
   1115   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1116   %vecext5 = extractelement <8 x i8> %x, i32 3
   1117   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1118   %vecext7 = extractelement <8 x i8> %x, i32 4
   1119   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1120   %vecext9 = extractelement <8 x i8> %x, i32 5
   1121   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1122   %vecext11 = extractelement <8 x i8> %x, i32 6
   1123   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1124   %vecext13 = extractelement <8 x i8> %x, i32 7
   1125   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1126   %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1127   ret <16 x i8> %vecinit30
   1128 }
   1129 
   1130 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
   1131 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
   1132 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1133 entry:
   1134   %vecext = extractelement <16 x i8> %x, i32 0
   1135   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1136   %vecext1 = extractelement <16 x i8> %x, i32 1
   1137   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1138   %vecext3 = extractelement <16 x i8> %x, i32 2
   1139   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1140   %vecext5 = extractelement <16 x i8> %x, i32 3
   1141   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1142   %vecext7 = extractelement <16 x i8> %x, i32 4
   1143   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1144   %vecext9 = extractelement <16 x i8> %x, i32 5
   1145   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1146   %vecext11 = extractelement <16 x i8> %x, i32 6
   1147   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1148   %vecext13 = extractelement <16 x i8> %x, i32 7
   1149   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1150   %vecext15 = extractelement <8 x i8> %y, i32 0
   1151   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1152   %vecext17 = extractelement <8 x i8> %y, i32 1
   1153   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1154   %vecext19 = extractelement <8 x i8> %y, i32 2
   1155   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1156   %vecext21 = extractelement <8 x i8> %y, i32 3
   1157   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1158   %vecext23 = extractelement <8 x i8> %y, i32 4
   1159   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1160   %vecext25 = extractelement <8 x i8> %y, i32 5
   1161   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1162   %vecext27 = extractelement <8 x i8> %y, i32 6
   1163   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1164   %vecext29 = extractelement <8 x i8> %y, i32 7
   1165   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1166   ret <16 x i8> %vecinit30
   1167 }
   1168 
   1169 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
   1170 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
   1171 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1172 entry:
   1173   %vecext = extractelement <8 x i8> %x, i32 0
   1174   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1175   %vecext1 = extractelement <8 x i8> %x, i32 1
   1176   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1177   %vecext3 = extractelement <8 x i8> %x, i32 2
   1178   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1179   %vecext5 = extractelement <8 x i8> %x, i32 3
   1180   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1181   %vecext7 = extractelement <8 x i8> %x, i32 4
   1182   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1183   %vecext9 = extractelement <8 x i8> %x, i32 5
   1184   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1185   %vecext11 = extractelement <8 x i8> %x, i32 6
   1186   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1187   %vecext13 = extractelement <8 x i8> %x, i32 7
   1188   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1189   %vecext15 = extractelement <8 x i8> %y, i32 0
   1190   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1191   %vecext17 = extractelement <8 x i8> %y, i32 1
   1192   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1193   %vecext19 = extractelement <8 x i8> %y, i32 2
   1194   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1195   %vecext21 = extractelement <8 x i8> %y, i32 3
   1196   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1197   %vecext23 = extractelement <8 x i8> %y, i32 4
   1198   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1199   %vecext25 = extractelement <8 x i8> %y, i32 5
   1200   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1201   %vecext27 = extractelement <8 x i8> %y, i32 6
   1202   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1203   %vecext29 = extractelement <8 x i8> %y, i32 7
   1204   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1205   ret <16 x i8> %vecinit30
   1206 }
   1207 
   1208 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
   1209 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
   1210 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1211 entry:
   1212   %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1213   ret <8 x i16> %vecinit14
   1214 }
   1215 
   1216 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
   1217 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
   1218 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1219 entry:
   1220   %vecext = extractelement <4 x i16> %x, i32 0
   1221   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1222   %vecext1 = extractelement <4 x i16> %x, i32 1
   1223   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1224   %vecext3 = extractelement <4 x i16> %x, i32 2
   1225   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1226   %vecext5 = extractelement <4 x i16> %x, i32 3
   1227   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1228   %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1229   ret <8 x i16> %vecinit14
   1230 }
   1231 
   1232 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
   1233 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
   1234 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1235 entry:
   1236   %vecext = extractelement <8 x i16> %x, i32 0
   1237   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1238   %vecext1 = extractelement <8 x i16> %x, i32 1
   1239   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1240   %vecext3 = extractelement <8 x i16> %x, i32 2
   1241   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1242   %vecext5 = extractelement <8 x i16> %x, i32 3
   1243   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1244   %vecext7 = extractelement <4 x i16> %y, i32 0
   1245   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1246   %vecext9 = extractelement <4 x i16> %y, i32 1
   1247   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1248   %vecext11 = extractelement <4 x i16> %y, i32 2
   1249   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1250   %vecext13 = extractelement <4 x i16> %y, i32 3
   1251   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1252   ret <8 x i16> %vecinit14
   1253 }
   1254 
   1255 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
   1256 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
   1257 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1258 entry:
   1259   %vecext = extractelement <4 x i16> %x, i32 0
   1260   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1261   %vecext1 = extractelement <4 x i16> %x, i32 1
   1262   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1263   %vecext3 = extractelement <4 x i16> %x, i32 2
   1264   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1265   %vecext5 = extractelement <4 x i16> %x, i32 3
   1266   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1267   %vecext7 = extractelement <4 x i16> %y, i32 0
   1268   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1269   %vecext9 = extractelement <4 x i16> %y, i32 1
   1270   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1271   %vecext11 = extractelement <4 x i16> %y, i32 2
   1272   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1273   %vecext13 = extractelement <4 x i16> %y, i32 3
   1274   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1275   ret <8 x i16> %vecinit14
   1276 }
   1277 
   1278 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
   1279 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
   1280 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1281 entry:
   1282   %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1283   ret <4 x i32> %vecinit6
   1284 }
   1285 
   1286 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
   1287 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
   1288 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1289 entry:
   1290   %vecext = extractelement <2 x i32> %x, i32 0
   1291   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1292   %vecext1 = extractelement <2 x i32> %x, i32 1
   1293   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1294   %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1295   ret <4 x i32> %vecinit6
   1296 }
   1297 
   1298 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
   1299 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
   1300 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1301 entry:
   1302   %vecext = extractelement <4 x i32> %x, i32 0
   1303   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1304   %vecext1 = extractelement <4 x i32> %x, i32 1
   1305   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1306   %vecext3 = extractelement <2 x i32> %y, i32 0
   1307   %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
   1308   %vecext5 = extractelement <2 x i32> %y, i32 1
   1309   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
   1310   ret <4 x i32> %vecinit6
   1311 }
   1312 
   1313 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
   1314 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
   1315 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1316 entry:
   1317   %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1318   ret <4 x i32> %vecinit6
   1319 }
   1320 
   1321 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
   1322 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
   1323 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1324 entry:
   1325   %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1326   ret <2 x i64> %vecinit2
   1327 }
   1328 
   1329 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
   1330 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
   1331 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1332 entry:
   1333   %vecext = extractelement <1 x i64> %x, i32 0
   1334   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1335   %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1336   ret <2 x i64> %vecinit2
   1337 }
   1338 
   1339 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
   1340 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
   1341 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1342 entry:
   1343   %vecext = extractelement <2 x i64> %x, i32 0
   1344   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1345   %vecext1 = extractelement <1 x i64> %y, i32 0
   1346   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1347   ret <2 x i64> %vecinit2
   1348 }
   1349 
   1350 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
   1351 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
   1352 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1353 entry:
   1354   %vecext = extractelement <1 x i64> %x, i32 0
   1355   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1356   %vecext1 = extractelement <1 x i64> %y, i32 0
   1357   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1358   ret <2 x i64> %vecinit2
   1359 }
   1360 
   1361 
   1362 define <4 x i16> @concat_vector_v4i16_const() {
   1363 ; CHECK-LABEL: concat_vector_v4i16_const:
   1364 ; CHECK: movi {{d[0-9]+}}, #0
   1365  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
   1366  ret <4 x i16> %r
   1367 }
   1368 
   1369 define <4 x i16> @concat_vector_v4i16_const_one() {
   1370 ; CHECK-LABEL: concat_vector_v4i16_const_one:
   1371 ; CHECK: movi {{v[0-9]+}}.4h, #0x1
   1372  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
   1373  ret <4 x i16> %r
   1374 }
   1375 
   1376 define <4 x i32> @concat_vector_v4i32_const() {
   1377 ; CHECK-LABEL: concat_vector_v4i32_const:
   1378 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1379  %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
   1380  ret <4 x i32> %r
   1381 }
   1382 
   1383 define <8 x i8> @concat_vector_v8i8_const() {
   1384 ; CHECK-LABEL: concat_vector_v8i8_const:
   1385 ; CHECK: movi {{d[0-9]+}}, #0
   1386  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
   1387  ret <8 x i8> %r
   1388 }
   1389 
   1390 define <8 x i16> @concat_vector_v8i16_const() {
   1391 ; CHECK-LABEL: concat_vector_v8i16_const:
   1392 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1393  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
   1394  ret <8 x i16> %r
   1395 }
   1396 
   1397 define <8 x i16> @concat_vector_v8i16_const_one() {
   1398 ; CHECK-LABEL: concat_vector_v8i16_const_one:
   1399 ; CHECK: movi {{v[0-9]+}}.8h, #0x1
   1400  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
   1401  ret <8 x i16> %r
   1402 }
   1403 
   1404 define <16 x i8> @concat_vector_v16i8_const() {
   1405 ; CHECK-LABEL: concat_vector_v16i8_const:
   1406 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1407  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
   1408  ret <16 x i8> %r
   1409 }
   1410 
   1411 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
   1412 ; CHECK-LABEL: concat_vector_v4i16:
   1413 ; CHECK: dup v0.4h, v0.h[0]
   1414  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
   1415  ret <4 x i16> %r
   1416 }
   1417 
   1418 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
   1419 ; CHECK-LABEL: concat_vector_v4i32:
   1420 ; CHECK: dup v0.4s, v0.s[0]
   1421  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
   1422  ret <4 x i32> %r
   1423 }
   1424 
   1425 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
   1426 ; CHECK-LABEL: concat_vector_v8i8:
   1427 ; CHECK: dup v0.8b, v0.b[0]
   1428  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
   1429  ret <8 x i8> %r
   1430 }
   1431 
   1432 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
   1433 ; CHECK-LABEL: concat_vector_v8i16:
   1434 ; CHECK: dup v0.8h, v0.h[0]
   1435  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
   1436  ret <8 x i16> %r
   1437 }
   1438 
   1439 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
   1440 ; CHECK-LABEL: concat_vector_v16i8:
   1441 ; CHECK: dup v0.16b, v0.b[0]
   1442  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
   1443  ret <16 x i8> %r
   1444 }
   1445