Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
      2 
      3 
      4 define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) {
      5 ; CHECK-LABEL: ins16bw:
      6 ; CHECK: ins {{v[0-9]+}}.b[15], {{w[0-9]+}}
      7   %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15
      8   ret <16 x i8> %tmp3
      9 }
     10 
     11 define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) {
     12 ; CHECK-LABEL: ins8hw:
     13 ; CHECK: ins {{v[0-9]+}}.h[6], {{w[0-9]+}}
     14   %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6
     15   ret <8 x i16> %tmp3
     16 }
     17 
     18 define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) {
     19 ; CHECK-LABEL: ins4sw:
     20 ; CHECK: ins {{v[0-9]+}}.s[2], {{w[0-9]+}}
     21   %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2
     22   ret <4 x i32> %tmp3
     23 }
     24 
     25 define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) {
     26 ; CHECK-LABEL: ins2dw:
     27 ; CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}}
     28   %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1
     29   ret <2 x i64> %tmp3
     30 }
     31 
     32 define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) {
     33 ; CHECK-LABEL: ins8bw:
     34 ; CHECK: ins {{v[0-9]+}}.b[5], {{w[0-9]+}}
     35   %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5
     36   ret <8 x i8> %tmp3
     37 }
     38 
     39 define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) {
     40 ; CHECK-LABEL: ins4hw:
     41 ; CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}}
     42   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3
     43   ret <4 x i16> %tmp3
     44 }
     45 
     46 define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) {
     47 ; CHECK-LABEL: ins2sw:
     48 ; CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
     49   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
     50   ret <2 x i32> %tmp3
     51 }
     52 
     53 define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) {
     54 ; CHECK-LABEL: ins16b16:
     55 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
     56   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
     57   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
     58   ret <16 x i8> %tmp4
     59 }
     60 
     61 define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) {
     62 ; CHECK-LABEL: ins8h8:
     63 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
     64   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
     65   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
     66   ret <8 x i16> %tmp4
     67 }
     68 
     69 define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) {
     70 ; CHECK-LABEL: ins4s4:
     71 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     72   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
     73   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
     74   ret <4 x i32> %tmp4
     75 }
     76 
     77 define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) {
     78 ; CHECK-LABEL: ins2d2:
     79 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     80   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
     81   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
     82   ret <2 x i64> %tmp4
     83 }
     84 
     85 define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) {
     86 ; CHECK-LABEL: ins4f4:
     87 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
     88   %tmp3 = extractelement <4 x float> %tmp1, i32 2
     89   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
     90   ret <4 x float> %tmp4
     91 }
     92 
     93 define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) {
     94 ; CHECK-LABEL: ins2df2:
     95 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
     96   %tmp3 = extractelement <2 x double> %tmp1, i32 0
     97   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
     98   ret <2 x double> %tmp4
     99 }
    100 
    101 define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) {
    102 ; CHECK-LABEL: ins8b16:
    103 ; CHECK: ins {{v[0-9]+}}.b[15], {{v[0-9]+}}.b[2]
    104   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    105   %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15
    106   ret <16 x i8> %tmp4
    107 }
    108 
    109 define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) {
    110 ; CHECK-LABEL: ins4h8:
    111 ; CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[2]
    112   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    113   %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7
    114   ret <8 x i16> %tmp4
    115 }
    116 
    117 define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) {
    118 ; CHECK-LABEL: ins2s4:
    119 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    120   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    121   %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1
    122   ret <4 x i32> %tmp4
    123 }
    124 
    125 define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) {
    126 ; CHECK-LABEL: ins1d2:
    127 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    128   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    129   %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1
    130   ret <2 x i64> %tmp4
    131 }
    132 
    133 define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) {
    134 ; CHECK-LABEL: ins2f4:
    135 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[1]
    136   %tmp3 = extractelement <2 x float> %tmp1, i32 1
    137   %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1
    138   ret <4 x float> %tmp4
    139 }
    140 
    141 define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) {
    142 ; CHECK-LABEL: ins1f2:
    143 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
    144   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    145   %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1
    146   ret <2 x double> %tmp4
    147 }
    148 
    149 define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) {
    150 ; CHECK-LABEL: ins16b8:
    151 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[2]
    152   %tmp3 = extractelement <16 x i8> %tmp1, i32 2
    153   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7
    154   ret <8 x i8> %tmp4
    155 }
    156 
    157 define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) {
    158 ; CHECK-LABEL: ins8h4:
    159 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    160   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    161   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    162   ret <4 x i16> %tmp4
    163 }
    164 
    165 define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) {
    166 ; CHECK-LABEL: ins4s2:
    167 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    168   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    169   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    170   ret <2 x i32> %tmp4
    171 }
    172 
    173 define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) {
    174 ; CHECK-LABEL: ins2d1:
    175 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    176   %tmp3 = extractelement <2 x i64> %tmp1, i32 0
    177   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    178   ret <1 x i64> %tmp4
    179 }
    180 
    181 define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) {
    182 ; CHECK-LABEL: ins4f2:
    183 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[2]
    184   %tmp3 = extractelement <4 x float> %tmp1, i32 2
    185   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    186   ret <2 x float> %tmp4
    187 }
    188 
    189 define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) {
    190 ; CHECK-LABEL: ins2f1:
    191 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
    192   %tmp3 = extractelement <2 x double> %tmp1, i32 1
    193   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    194   ret <1 x double> %tmp4
    195 }
    196 
    197 define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) {
    198 ; CHECK-LABEL: ins8b8:
    199 ; CHECK: ins {{v[0-9]+}}.b[4], {{v[0-9]+}}.b[2]
    200   %tmp3 = extractelement <8 x i8> %tmp1, i32 2
    201   %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4
    202   ret <8 x i8> %tmp4
    203 }
    204 
    205 define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) {
    206 ; CHECK-LABEL: ins4h4:
    207 ; CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[2]
    208   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    209   %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3
    210   ret <4 x i16> %tmp4
    211 }
    212 
    213 define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) {
    214 ; CHECK-LABEL: ins2s2:
    215 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    216   %tmp3 = extractelement <2 x i32> %tmp1, i32 0
    217   %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1
    218   ret <2 x i32> %tmp4
    219 }
    220 
    221 define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) {
    222 ; CHECK-LABEL: ins1d1:
    223 ; CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0]
    224   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    225   %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0
    226   ret <1 x i64> %tmp4
    227 }
    228 
    229 define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) {
    230 ; CHECK-LABEL: ins2f2:
    231 ; CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0]
    232   %tmp3 = extractelement <2 x float> %tmp1, i32 0
    233   %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1
    234   ret <2 x float> %tmp4
    235 }
    236 
    237 define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) {
    238 ; CHECK-LABEL: ins1df1:
    239 ; CHECK-NOT: ins {{v[0-9]+}}
    240   %tmp3 = extractelement <1 x double> %tmp1, i32 0
    241   %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0
    242   ret <1 x double> %tmp4
    243 }
    244 
    245 define i32 @umovw16b(<16 x i8> %tmp1) {
    246 ; CHECK-LABEL: umovw16b:
    247 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    248   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    249   %tmp4 = zext i8 %tmp3 to i32
    250   ret i32 %tmp4
    251 }
    252 
    253 define i32 @umovw8h(<8 x i16> %tmp1) {
    254 ; CHECK-LABEL: umovw8h:
    255 ; CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    256   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    257   %tmp4 = zext i16 %tmp3 to i32
    258   ret i32 %tmp4
    259 }
    260 
    261 define i32 @umovw4s(<4 x i32> %tmp1) {
    262 ; CHECK-LABEL: umovw4s:
    263 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[2]
    264   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    265   ret i32 %tmp3
    266 }
    267 
    268 define i64 @umovx2d(<2 x i64> %tmp1) {
    269 ; CHECK-LABEL: umovx2d:
    270 ; CHECK: mov {{x[0-9]+}}, {{v[0-9]+}}.d[1]
    271   %tmp3 = extractelement <2 x i64> %tmp1, i32 1
    272   ret i64 %tmp3
    273 }
    274 
    275 define i32 @umovw8b(<8 x i8> %tmp1) {
    276 ; CHECK-LABEL: umovw8b:
    277 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.b[7]
    278   %tmp3 = extractelement <8 x i8> %tmp1, i32 7
    279   %tmp4 = zext i8 %tmp3 to i32
    280   ret i32 %tmp4
    281 }
    282 
    283 define i32 @umovw4h(<4 x i16> %tmp1) {
    284 ; CHECK-LABEL: umovw4h:
    285 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    286   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    287   %tmp4 = zext i16 %tmp3 to i32
    288   ret i32 %tmp4
    289 }
    290 
    291 define i32 @umovw2s(<2 x i32> %tmp1) {
    292 ; CHECK-LABEL: umovw2s:
    293 ; CHECK: mov {{w[0-9]+}}, {{v[0-9]+}}.s[1]
    294   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    295   ret i32 %tmp3
    296 }
    297 
    298 define i64 @umovx1d(<1 x i64> %tmp1) {
    299 ; CHECK-LABEL: umovx1d:
    300 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    301   %tmp3 = extractelement <1 x i64> %tmp1, i32 0
    302   ret i64 %tmp3
    303 }
    304 
    305 define i32 @smovw16b(<16 x i8> %tmp1) {
    306 ; CHECK-LABEL: smovw16b:
    307 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[8]
    308   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    309   %tmp4 = sext i8 %tmp3 to i32
    310   %tmp5 = add i32 %tmp4, %tmp4
    311   ret i32 %tmp5
    312 }
    313 
    314 define i32 @smovw8h(<8 x i16> %tmp1) {
    315 ; CHECK-LABEL: smovw8h:
    316 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    317   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    318   %tmp4 = sext i16 %tmp3 to i32
    319   %tmp5 = add i32 %tmp4, %tmp4
    320   ret i32 %tmp5
    321 }
    322 
    323 define i32 @smovx16b(<16 x i8> %tmp1) {
    324 ; CHECK-LABEL: smovx16b:
    325 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
    326   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
    327   %tmp4 = sext i8 %tmp3 to i32
    328   %tmp5 = add i32 %tmp4, %tmp4
    329   ret i32 %tmp5
    330 }
    331 
    332 define i32 @smovx8h(<8 x i16> %tmp1) {
    333 ; CHECK-LABEL: smovx8h:
    334 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
    335   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
    336   %tmp4 = sext i16 %tmp3 to i32
    337   ret i32 %tmp4
    338 }
    339 
    340 define i64 @smovx4s(<4 x i32> %tmp1) {
    341 ; CHECK-LABEL: smovx4s:
    342 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2]
    343   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
    344   %tmp4 = sext i32 %tmp3 to i64
    345   ret i64 %tmp4
    346 }
    347 
    348 define i32 @smovw8b(<8 x i8> %tmp1) {
    349 ; CHECK-LABEL: smovw8b:
    350 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[4]
    351   %tmp3 = extractelement <8 x i8> %tmp1, i32 4
    352   %tmp4 = sext i8 %tmp3 to i32
    353   %tmp5 = add i32 %tmp4, %tmp4
    354   ret i32 %tmp5
    355 }
    356 
    357 define i32 @smovw4h(<4 x i16> %tmp1) {
    358 ; CHECK-LABEL: smovw4h:
    359 ; CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[2]
    360   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    361   %tmp4 = sext i16 %tmp3 to i32
    362   %tmp5 = add i32 %tmp4, %tmp4
    363   ret i32 %tmp5
    364 }
    365 
    366 define i32 @smovx8b(<8 x i8> %tmp1) {
    367 ; CHECK-LABEL: smovx8b:
    368 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[6]
    369   %tmp3 = extractelement <8 x i8> %tmp1, i32 6
    370   %tmp4 = sext i8 %tmp3 to i32
    371   ret i32 %tmp4
    372 }
    373 
    374 define i32 @smovx4h(<4 x i16> %tmp1) {
    375 ; CHECK-LABEL: smovx4h:
    376 ; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
    377   %tmp3 = extractelement <4 x i16> %tmp1, i32 2
    378   %tmp4 = sext i16 %tmp3 to i32
    379   ret i32 %tmp4
    380 }
    381 
    382 define i64 @smovx2s(<2 x i32> %tmp1) {
    383 ; CHECK-LABEL: smovx2s:
    384 ; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1]
    385   %tmp3 = extractelement <2 x i32> %tmp1, i32 1
    386   %tmp4 = sext i32 %tmp3 to i64
    387   ret i64 %tmp4
    388 }
    389 
    390 define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) {
    391 ; CHECK-LABEL: test_vcopy_lane_s8:
    392 ; CHECK: ins  {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
    393   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7>
    394   ret <8 x i8> %vset_lane
    395 }
    396 
    397 define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) {
    398 ; CHECK-LABEL: test_vcopyq_laneq_s8:
    399 ; CHECK: ins  {{v[0-9]+}}.b[14], {{v[0-9]+}}.b[6]
    400   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15>
    401   ret <16 x i8> %vset_lane
    402 }
    403 
    404 define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) {
    405 ; CHECK-LABEL: test_vcopy_lane_swap_s8:
    406 ; CHECK: ins {{v[0-9]+}}.b[7], {{v[0-9]+}}.b[0]
    407   %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
    408   ret <8 x i8> %vset_lane
    409 }
    410 
    411 define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) {
    412 ; CHECK-LABEL: test_vcopyq_laneq_swap_s8:
    413 ; CHECK: ins {{v[0-9]+}}.b[0], {{v[0-9]+}}.b[15]
    414   %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    415   ret <16 x i8> %vset_lane
    416 }
    417 
    418 define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 {
    419 ; CHECK-LABEL: test_vdup_n_u8:
    420 ; CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}}
    421   %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0
    422   %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1
    423   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2
    424   %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3
    425   %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4
    426   %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5
    427   %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6
    428   %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7
    429   ret <8 x i8> %vecinit7.i
    430 }
    431 
    432 define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 {
    433 ; CHECK-LABEL: test_vdup_n_u16:
    434 ; CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}}
    435   %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0
    436   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1
    437   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2
    438   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3
    439   ret <4 x i16> %vecinit3.i
    440 }
    441 
    442 define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 {
    443 ; CHECK-LABEL: test_vdup_n_u32:
    444 ; CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}}
    445   %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0
    446   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1
    447   ret <2 x i32> %vecinit1.i
    448 }
    449 
    450 define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 {
    451 ; CHECK-LABEL: test_vdup_n_u64:
    452 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    453   %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0
    454   ret <1 x i64> %vecinit.i
    455 }
    456 
    457 define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 {
    458 ; CHECK-LABEL: test_vdupq_n_u8:
    459 ; CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}}
    460   %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0
    461   %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1
    462   %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2
    463   %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3
    464   %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4
    465   %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5
    466   %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6
    467   %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7
    468   %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8
    469   %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9
    470   %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10
    471   %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11
    472   %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12
    473   %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13
    474   %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14
    475   %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15
    476   ret <16 x i8> %vecinit15.i
    477 }
    478 
    479 define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 {
    480 ; CHECK-LABEL: test_vdupq_n_u16:
    481 ; CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}}
    482   %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0
    483   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1
    484   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2
    485   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3
    486   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4
    487   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5
    488   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6
    489   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7
    490   ret <8 x i16> %vecinit7.i
    491 }
    492 
    493 define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 {
    494 ; CHECK-LABEL: test_vdupq_n_u32:
    495 ; CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}}
    496   %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0
    497   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1
    498   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2
    499   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3
    500   ret <4 x i32> %vecinit3.i
    501 }
    502 
    503 define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 {
    504 ; CHECK-LABEL: test_vdupq_n_u64:
    505 ; CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}}
    506   %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0
    507   %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1
    508   ret <2 x i64> %vecinit1.i
    509 }
    510 
    511 define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 {
    512 ; CHECK-LABEL: test_vdup_lane_s8:
    513 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    514   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    515   ret <8 x i8> %shuffle
    516 }
    517 
    518 define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 {
    519 ; CHECK-LABEL: test_vdup_lane_s16:
    520 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    521   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    522   ret <4 x i16> %shuffle
    523 }
    524 
    525 define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 {
    526 ; CHECK-LABEL: test_vdup_lane_s32:
    527 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    528   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
    529   ret <2 x i32> %shuffle
    530 }
    531 
    532 define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 {
    533 ; CHECK-LABEL: test_vdupq_lane_s8:
    534 ; CHECK: {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    535   %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    536   ret <16 x i8> %shuffle
    537 }
    538 
    539 define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 {
    540 ; CHECK-LABEL: test_vdupq_lane_s16:
    541 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    542   %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    543   ret <8 x i16> %shuffle
    544 }
    545 
    546 define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 {
    547 ; CHECK-LABEL: test_vdupq_lane_s32:
    548 ; CHECK: {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    549   %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    550   ret <4 x i32> %shuffle
    551 }
    552 
    553 define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 {
    554 ; CHECK-LABEL: test_vdupq_lane_s64:
    555 ; CHECK: {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    556   %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer
    557   ret <2 x i64> %shuffle
    558 }
    559 
    560 define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 {
    561 ; CHECK-LABEL: test_vdup_laneq_s8:
    562 ; CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5]
    563   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    564   ret <8 x i8> %shuffle
    565 }
    566 
    567 define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 {
    568 ; CHECK-LABEL: test_vdup_laneq_s16:
    569 ; CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2]
    570   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    571   ret <4 x i16> %shuffle
    572 }
    573 
    574 define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 {
    575 ; CHECK-LABEL: test_vdup_laneq_s32:
    576 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
    577   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
    578   ret <2 x i32> %shuffle
    579 }
    580 
    581 define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 {
    582 ; CHECK-LABEL: test_vdupq_laneq_s8:
    583 ; CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5]
    584   %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    585   ret <16 x i8> %shuffle
    586 }
    587 
    588 define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 {
    589 ; CHECK-LABEL: test_vdupq_laneq_s16:
    590 ; CHECK: {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2]
    591   %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
    592   ret <8 x i16> %shuffle
    593 }
    594 
    595 define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 {
    596 ; CHECK-LABEL: test_vdupq_laneq_s32:
    597 ; CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
    598   %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
    599   ret <4 x i32> %shuffle
    600 }
    601 
    602 define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 {
    603 ; CHECK-LABEL: test_vdupq_laneq_s64:
    604 ; CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0]
    605   %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer
    606   ret <2 x i64> %shuffle
    607 }
    608 
    609 define i64 @test_bitcastv8i8toi64(<8 x i8> %in) {
    610 ; CHECK-LABEL: test_bitcastv8i8toi64:
    611    %res = bitcast <8 x i8> %in to i64
    612 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    613    ret i64 %res
    614 }
    615 
    616 define i64 @test_bitcastv4i16toi64(<4 x i16> %in) {
    617 ; CHECK-LABEL: test_bitcastv4i16toi64:
    618    %res = bitcast <4 x i16> %in to i64
    619 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    620    ret i64 %res
    621 }
    622 
    623 define i64 @test_bitcastv2i32toi64(<2 x i32> %in) {
    624 ; CHECK-LABEL: test_bitcastv2i32toi64:
    625    %res = bitcast <2 x i32> %in to i64
    626 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    627    ret i64 %res
    628 }
    629 
    630 define i64 @test_bitcastv2f32toi64(<2 x float> %in) {
    631 ; CHECK-LABEL: test_bitcastv2f32toi64:
    632    %res = bitcast <2 x float> %in to i64
    633 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    634    ret i64 %res
    635 }
    636 
    637 define i64 @test_bitcastv1i64toi64(<1 x i64> %in) {
    638 ; CHECK-LABEL: test_bitcastv1i64toi64:
    639    %res = bitcast <1 x i64> %in to i64
    640 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    641    ret i64 %res
    642 }
    643 
    644 define i64 @test_bitcastv1f64toi64(<1 x double> %in) {
    645 ; CHECK-LABEL: test_bitcastv1f64toi64:
    646    %res = bitcast <1 x double> %in to i64
    647 ; CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}}
    648    ret i64 %res
    649 }
    650 
    651 define <8 x i8> @test_bitcasti64tov8i8(i64 %in) {
    652 ; CHECK-LABEL: test_bitcasti64tov8i8:
    653    %res = bitcast i64 %in to <8 x i8>
    654 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    655    ret <8 x i8> %res
    656 }
    657 
    658 define <4 x i16> @test_bitcasti64tov4i16(i64 %in) {
    659 ; CHECK-LABEL: test_bitcasti64tov4i16:
    660    %res = bitcast i64 %in to <4 x i16>
    661 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    662    ret <4 x i16> %res
    663 }
    664 
    665 define <2 x i32> @test_bitcasti64tov2i32(i64 %in) {
    666 ; CHECK-LABEL: test_bitcasti64tov2i32:
    667    %res = bitcast i64 %in to <2 x i32>
    668 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    669    ret <2 x i32> %res
    670 }
    671 
    672 define <2 x float> @test_bitcasti64tov2f32(i64 %in) {
    673 ; CHECK-LABEL: test_bitcasti64tov2f32:
    674    %res = bitcast i64 %in to <2 x float>
    675 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    676    ret <2 x float> %res
    677 }
    678 
    679 define <1 x i64> @test_bitcasti64tov1i64(i64 %in) {
    680 ; CHECK-LABEL: test_bitcasti64tov1i64:
    681    %res = bitcast i64 %in to <1 x i64>
    682 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    683    ret <1 x i64> %res
    684 }
    685 
    686 define <1 x double> @test_bitcasti64tov1f64(i64 %in) {
    687 ; CHECK-LABEL: test_bitcasti64tov1f64:
    688    %res = bitcast i64 %in to <1 x double>
    689 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    690    ret <1 x double> %res
    691 }
    692 
    693 define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 {
    694 ; CHECK-LABEL: test_bitcastv8i8tov1f64:
    695 ; CHECK: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    696 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    697   %sub.i = sub <8 x i8> zeroinitializer, %a
    698   %1 = bitcast <8 x i8> %sub.i to <1 x double>
    699   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    700   ret <1 x i64> %vcvt.i
    701 }
    702 
    703 define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 {
    704 ; CHECK-LABEL: test_bitcastv4i16tov1f64:
    705 ; CHECK: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    706 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    707   %sub.i = sub <4 x i16> zeroinitializer, %a
    708   %1 = bitcast <4 x i16> %sub.i to <1 x double>
    709   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    710   ret <1 x i64> %vcvt.i
    711 }
    712 
    713 define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 {
    714 ; CHECK-LABEL: test_bitcastv2i32tov1f64:
    715 ; CHECK: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    716 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    717   %sub.i = sub <2 x i32> zeroinitializer, %a
    718   %1 = bitcast <2 x i32> %sub.i to <1 x double>
    719   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    720   ret <1 x i64> %vcvt.i
    721 }
    722 
    723 define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 {
    724 ; CHECK-LABEL: test_bitcastv1i64tov1f64:
    725 ; CHECK: neg {{d[0-9]+}}, {{d[0-9]+}}
    726 ; CHECK-NEXT: fcvtzs {{[dx][0-9]+}}, {{d[0-9]+}}
    727   %sub.i = sub <1 x i64> zeroinitializer, %a
    728   %1 = bitcast <1 x i64> %sub.i to <1 x double>
    729   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    730   ret <1 x i64> %vcvt.i
    731 }
    732 
    733 define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 {
    734 ; CHECK-LABEL: test_bitcastv2f32tov1f64:
    735 ; CHECK: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    736 ; CHECK-NEXT: fcvtzs {{[xd][0-9]+}}, {{d[0-9]+}}
    737   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a
    738   %1 = bitcast <2 x float> %sub.i to <1 x double>
    739   %vcvt.i = fptosi <1 x double> %1 to <1 x i64>
    740   ret <1 x i64> %vcvt.i
    741 }
    742 
    743 define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 {
    744 ; CHECK-LABEL: test_bitcastv1f64tov8i8:
    745 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    746 ; CHECK-NEXT: neg {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
    747   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    748   %1 = bitcast <1 x double> %vcvt.i to <8 x i8>
    749   %sub.i = sub <8 x i8> zeroinitializer, %1
    750   ret <8 x i8> %sub.i
    751 }
    752 
    753 define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 {
    754 ; CHECK-LABEL: test_bitcastv1f64tov4i16:
    755 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    756 ; CHECK-NEXT: neg {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
    757   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    758   %1 = bitcast <1 x double> %vcvt.i to <4 x i16>
    759   %sub.i = sub <4 x i16> zeroinitializer, %1
    760   ret <4 x i16> %sub.i
    761 }
    762 
    763 define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 {
    764 ; CHECK-LABEL: test_bitcastv1f64tov2i32:
    765 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    766 ; CHECK-NEXT: neg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    767   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    768   %1 = bitcast <1 x double> %vcvt.i to <2 x i32>
    769   %sub.i = sub <2 x i32> zeroinitializer, %1
    770   ret <2 x i32> %sub.i
    771 }
    772 
    773 define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 {
    774 ; CHECK-LABEL: test_bitcastv1f64tov1i64:
    775 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    776 ; CHECK-NEXT: neg {{d[0-9]+}}, {{d[0-9]+}}
    777   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    778   %1 = bitcast <1 x double> %vcvt.i to <1 x i64>
    779   %sub.i = sub <1 x i64> zeroinitializer, %1
    780   ret <1 x i64> %sub.i
    781 }
    782 
    783 define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 {
    784 ; CHECK-LABEL: test_bitcastv1f64tov2f32:
    785 ; CHECK: scvtf {{d[0-9]+}}, {{[xd][0-9]+}}
    786 ; CHECK-NEXT: fneg {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
    787   %vcvt.i = sitofp <1 x i64> %a to <1 x double>
    788   %1 = bitcast <1 x double> %vcvt.i to <2 x float>
    789   %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1
    790   ret <2 x float> %sub.i
    791 }
    792 
    793 ; Test insert element into an undef vector
    794 define <8 x i8> @scalar_to_vector.v8i8(i8 %a) {
    795 ; CHECK-LABEL: scalar_to_vector.v8i8:
    796 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    797   %b = insertelement <8 x i8> undef, i8 %a, i32 0
    798   ret <8 x i8> %b
    799 }
    800 
    801 define <16 x i8> @scalar_to_vector.v16i8(i8 %a) {
    802 ; CHECK-LABEL: scalar_to_vector.v16i8:
    803 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    804   %b = insertelement <16 x i8> undef, i8 %a, i32 0
    805   ret <16 x i8> %b
    806 }
    807 
    808 define <4 x i16> @scalar_to_vector.v4i16(i16 %a) {
    809 ; CHECK-LABEL: scalar_to_vector.v4i16:
    810 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    811   %b = insertelement <4 x i16> undef, i16 %a, i32 0
    812   ret <4 x i16> %b
    813 }
    814 
    815 define <8 x i16> @scalar_to_vector.v8i16(i16 %a) {
    816 ; CHECK-LABEL: scalar_to_vector.v8i16:
    817 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    818   %b = insertelement <8 x i16> undef, i16 %a, i32 0
    819   ret <8 x i16> %b
    820 }
    821 
    822 define <2 x i32> @scalar_to_vector.v2i32(i32 %a) {
    823 ; CHECK-LABEL: scalar_to_vector.v2i32:
    824 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    825   %b = insertelement <2 x i32> undef, i32 %a, i32 0
    826   ret <2 x i32> %b
    827 }
    828 
    829 define <4 x i32> @scalar_to_vector.v4i32(i32 %a) {
    830 ; CHECK-LABEL: scalar_to_vector.v4i32:
    831 ; CHECK: fmov {{s[0-9]+}}, {{w[0-9]+}}
    832   %b = insertelement <4 x i32> undef, i32 %a, i32 0
    833   ret <4 x i32> %b
    834 }
    835 
    836 define <2 x i64> @scalar_to_vector.v2i64(i64 %a) {
    837 ; CHECK-LABEL: scalar_to_vector.v2i64:
    838 ; CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}}
    839   %b = insertelement <2 x i64> undef, i64 %a, i32 0
    840   ret <2 x i64> %b
    841 }
    842 
    843 define <8 x i8> @testDUP.v1i8(<1 x i8> %a) {
    844 ; CHECK-LABEL: testDUP.v1i8:
    845 ; CHECK: dup v0.8b, v0.b[0]
    846   %b = extractelement <1 x i8> %a, i32 0
    847   %c = insertelement <8 x i8> undef, i8 %b, i32 0
    848   %d = insertelement <8 x i8> %c, i8 %b, i32 1
    849   %e = insertelement <8 x i8> %d, i8 %b, i32 2
    850   %f = insertelement <8 x i8> %e, i8 %b, i32 3
    851   %g = insertelement <8 x i8> %f, i8 %b, i32 4
    852   %h = insertelement <8 x i8> %g, i8 %b, i32 5
    853   %i = insertelement <8 x i8> %h, i8 %b, i32 6
    854   %j = insertelement <8 x i8> %i, i8 %b, i32 7
    855   ret <8 x i8> %j
    856 }
    857 
    858 define <8 x i16> @testDUP.v1i16(<1 x i16> %a) {
    859 ; CHECK-LABEL: testDUP.v1i16:
    860 ; CHECK: dup v0.8h, v0.h[0]
    861   %b = extractelement <1 x i16> %a, i32 0
    862   %c = insertelement <8 x i16> undef, i16 %b, i32 0
    863   %d = insertelement <8 x i16> %c, i16 %b, i32 1
    864   %e = insertelement <8 x i16> %d, i16 %b, i32 2
    865   %f = insertelement <8 x i16> %e, i16 %b, i32 3
    866   %g = insertelement <8 x i16> %f, i16 %b, i32 4
    867   %h = insertelement <8 x i16> %g, i16 %b, i32 5
    868   %i = insertelement <8 x i16> %h, i16 %b, i32 6
    869   %j = insertelement <8 x i16> %i, i16 %b, i32 7
    870   ret <8 x i16> %j
    871 }
    872 
    873 define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
    874 ; CHECK-LABEL: testDUP.v1i32:
    875 ; CHECK: dup v0.4s, v0.s[0]
    876   %b = extractelement <1 x i32> %a, i32 0
    877   %c = insertelement <4 x i32> undef, i32 %b, i32 0
    878   %d = insertelement <4 x i32> %c, i32 %b, i32 1
    879   %e = insertelement <4 x i32> %d, i32 %b, i32 2
    880   %f = insertelement <4 x i32> %e, i32 %b, i32 3
    881   ret <4 x i32> %f
    882 }
    883 
    884 define <8 x i8> @getl(<16 x i8> %x) #0 {
    885 ; CHECK-LABEL: getl:
    886 ; CHECK: ret
    887   %vecext = extractelement <16 x i8> %x, i32 0
    888   %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
    889   %vecext1 = extractelement <16 x i8> %x, i32 1
    890   %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
    891   %vecext3 = extractelement <16 x i8> %x, i32 2
    892   %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
    893   %vecext5 = extractelement <16 x i8> %x, i32 3
    894   %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
    895   %vecext7 = extractelement <16 x i8> %x, i32 4
    896   %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
    897   %vecext9 = extractelement <16 x i8> %x, i32 5
    898   %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
    899   %vecext11 = extractelement <16 x i8> %x, i32 6
    900   %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
    901   %vecext13 = extractelement <16 x i8> %x, i32 7
    902   %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
    903   ret <8 x i8> %vecinit14
    904 }
    905 
    906 define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) {
    907 ; CHECK-LABEL: test_dup_v2i32_v4i16:
    908 ; CHECK: dup v0.4h, v0.h[2]
    909 entry:
    910   %x = extractelement <2 x i32> %a, i32 1
    911   %vget_lane = trunc i32 %x to i16
    912   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    913   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    914   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    915   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    916   ret <4 x i16> %vecinit3.i
    917 }
    918 
    919 define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) {
    920 ; CHECK-LABEL: test_dup_v4i32_v8i16:
    921 ; CHECK: dup v0.8h, v0.h[6]
    922 entry:
    923   %x = extractelement <4 x i32> %a, i32 3
    924   %vget_lane = trunc i32 %x to i16
    925   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    926   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    927   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    928   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    929   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    930   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    931   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    932   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    933   ret <8 x i16> %vecinit7.i
    934 }
    935 
    936 define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) {
    937 ; CHECK-LABEL: test_dup_v1i64_v4i16:
    938 ; CHECK: dup v0.4h, v0.h[0]
    939 entry:
    940   %x = extractelement <1 x i64> %a, i32 0
    941   %vget_lane = trunc i64 %x to i16
    942   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    943   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    944   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    945   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    946   ret <4 x i16> %vecinit3.i
    947 }
    948 
    949 define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) {
    950 ; CHECK-LABEL: test_dup_v1i64_v2i32:
    951 ; CHECK: dup v0.2s, v0.s[0]
    952 entry:
    953   %x = extractelement <1 x i64> %a, i32 0
    954   %vget_lane = trunc i64 %x to i32
    955   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
    956   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
    957   ret <2 x i32> %vecinit1.i
    958 }
    959 
    960 define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) {
    961 ; CHECK-LABEL: test_dup_v2i64_v8i16:
    962 ; CHECK: dup v0.8h, v0.h[4]
    963 entry:
    964   %x = extractelement <2 x i64> %a, i32 1
    965   %vget_lane = trunc i64 %x to i16
    966   %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0
    967   %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1
    968   %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    969   %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3
    970   %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4
    971   %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5
    972   %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6
    973   %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7
    974   ret <8 x i16> %vecinit7.i
    975 }
    976 
    977 define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) {
    978 ; CHECK-LABEL: test_dup_v2i64_v4i32:
    979 ; CHECK: dup v0.4s, v0.s[2]
    980 entry:
    981   %x = extractelement <2 x i64> %a, i32 1
    982   %vget_lane = trunc i64 %x to i32
    983   %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0
    984   %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1
    985   %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2
    986   %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3
    987   ret <4 x i32> %vecinit3.i
    988 }
    989 
    990 define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) {
    991 ; CHECK-LABEL: test_dup_v4i32_v4i16:
    992 ; CHECK: dup v0.4h, v0.h[2]
    993 entry:
    994   %x = extractelement <4 x i32> %a, i32 1
    995   %vget_lane = trunc i32 %x to i16
    996   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
    997   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
    998   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
    999   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1000   ret <4 x i16> %vecinit3.i
   1001 }
   1002 
   1003 define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) {
   1004 ; CHECK-LABEL: test_dup_v2i64_v4i16:
   1005 ; CHECK: dup v0.4h, v0.h[0]
   1006 entry:
   1007   %x = extractelement <2 x i64> %a, i32 0
   1008   %vget_lane = trunc i64 %x to i16
   1009   %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0
   1010   %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1
   1011   %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2
   1012   %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3
   1013   ret <4 x i16> %vecinit3.i
   1014 }
   1015 
   1016 define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) {
   1017 ; CHECK-LABEL: test_dup_v2i64_v2i32:
   1018 ; CHECK: dup v0.2s, v0.s[0]
   1019 entry:
   1020   %x = extractelement <2 x i64> %a, i32 0
   1021   %vget_lane = trunc i64 %x to i32
   1022   %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0
   1023   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1
   1024   ret <2 x i32> %vecinit1.i
   1025 }
   1026 
   1027 
   1028 define <2 x float> @test_scalar_to_vector_f32_to_v2f32(<2 x float> %a) {
   1029 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v2f32:
   1030 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1031 ; CHECK-NEXT: ret
   1032 entry:
   1033   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1034   %1 = insertelement <1 x float> undef, float %0, i32 0
   1035   %2 = extractelement <1 x float> %1, i32 0
   1036   %vecinit1.i = insertelement <2 x float> undef, float %2, i32 0
   1037   ret <2 x float> %vecinit1.i
   1038 }
   1039 
   1040 define <4 x float> @test_scalar_to_vector_f32_to_v4f32(<2 x float> %a) {
   1041 ; CHECK-LABEL: test_scalar_to_vector_f32_to_v4f32:
   1042 ; CHECK: fmaxp s{{[0-9]+}}, v{{[0-9]+}}.2s
   1043 ; CHECK-NEXT: ret
   1044 entry:
   1045   %0 = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a)
   1046   %1 = insertelement <1 x float> undef, float %0, i32 0
   1047   %2 = extractelement <1 x float> %1, i32 0
   1048   %vecinit1.i = insertelement <4 x float> undef, float %2, i32 0
   1049   ret <4 x float> %vecinit1.i
   1050 }
   1051 
   1052 declare float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float>)
   1053 
   1054 define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) {
   1055 ; CHECK-LABEL: test_concat_undef_v1i32:
   1056 ; CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0]
   1057 entry:
   1058   %0 = extractelement <2 x i32> %a, i32 0
   1059   %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1
   1060   ret <2 x i32> %vecinit1.i
   1061 }
   1062 
   1063 declare i32 @llvm.aarch64.neon.sqabs.i32(i32) #4
   1064 
   1065 define <2 x i32> @test_concat_v1i32_undef(i32 %a) {
   1066 ; CHECK-LABEL: test_concat_v1i32_undef:
   1067 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1068 ; CHECK-NEXT: ret
   1069 entry:
   1070   %b = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1071   %vecinit.i432 = insertelement <2 x i32> undef, i32 %b, i32 0
   1072   ret <2 x i32> %vecinit.i432
   1073 }
   1074 
   1075 define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) {
   1076 ; CHECK-LABEL: test_concat_same_v1i32_v1i32:
   1077 ; CHECK: dup v{{[0-9]+}}.2s, v{{[0-9]+}}.s[0]
   1078 entry:
   1079   %0 = extractelement <2 x i32> %a, i32 0
   1080   %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0
   1081   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1
   1082   ret <2 x i32> %vecinit1.i
   1083 }
   1084 
   1085 define <2 x i32> @test_concat_diff_v1i32_v1i32(i32 %a, i32 %b) {
   1086 ; CHECK-LABEL: test_concat_diff_v1i32_v1i32:
   1087 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1088 ; CHECK: sqabs s{{[0-9]+}}, s{{[0-9]+}}
   1089 ; CHECK-NEXT: zip1 {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
   1090 entry:
   1091   %c = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a)
   1092   %d = insertelement <2 x i32> undef, i32 %c, i32 0
   1093   %e = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %b)
   1094   %f = insertelement <2 x i32> undef, i32 %e, i32 0
   1095   %h = shufflevector <2 x i32> %d, <2 x i32> %f, <2 x i32> <i32 0, i32 2>
   1096   ret <2 x i32> %h
   1097 }
   1098 
   1099 define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
   1100 ; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
   1101 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1102 entry:
   1103   %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1104   ret <16 x i8> %vecinit30
   1105 }
   1106 
   1107 define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
   1108 ; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
   1109 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1110 entry:
   1111   %vecext = extractelement <8 x i8> %x, i32 0
   1112   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1113   %vecext1 = extractelement <8 x i8> %x, i32 1
   1114   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1115   %vecext3 = extractelement <8 x i8> %x, i32 2
   1116   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1117   %vecext5 = extractelement <8 x i8> %x, i32 3
   1118   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1119   %vecext7 = extractelement <8 x i8> %x, i32 4
   1120   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1121   %vecext9 = extractelement <8 x i8> %x, i32 5
   1122   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1123   %vecext11 = extractelement <8 x i8> %x, i32 6
   1124   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1125   %vecext13 = extractelement <8 x i8> %x, i32 7
   1126   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1127   %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   1128   ret <16 x i8> %vecinit30
   1129 }
   1130 
   1131 define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
   1132 ; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
   1133 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1134 entry:
   1135   %vecext = extractelement <16 x i8> %x, i32 0
   1136   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1137   %vecext1 = extractelement <16 x i8> %x, i32 1
   1138   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1139   %vecext3 = extractelement <16 x i8> %x, i32 2
   1140   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1141   %vecext5 = extractelement <16 x i8> %x, i32 3
   1142   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1143   %vecext7 = extractelement <16 x i8> %x, i32 4
   1144   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1145   %vecext9 = extractelement <16 x i8> %x, i32 5
   1146   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1147   %vecext11 = extractelement <16 x i8> %x, i32 6
   1148   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1149   %vecext13 = extractelement <16 x i8> %x, i32 7
   1150   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1151   %vecext15 = extractelement <8 x i8> %y, i32 0
   1152   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1153   %vecext17 = extractelement <8 x i8> %y, i32 1
   1154   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1155   %vecext19 = extractelement <8 x i8> %y, i32 2
   1156   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1157   %vecext21 = extractelement <8 x i8> %y, i32 3
   1158   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1159   %vecext23 = extractelement <8 x i8> %y, i32 4
   1160   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1161   %vecext25 = extractelement <8 x i8> %y, i32 5
   1162   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1163   %vecext27 = extractelement <8 x i8> %y, i32 6
   1164   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1165   %vecext29 = extractelement <8 x i8> %y, i32 7
   1166   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1167   ret <16 x i8> %vecinit30
   1168 }
   1169 
   1170 define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
   1171 ; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
   1172 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1173 entry:
   1174   %vecext = extractelement <8 x i8> %x, i32 0
   1175   %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
   1176   %vecext1 = extractelement <8 x i8> %x, i32 1
   1177   %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
   1178   %vecext3 = extractelement <8 x i8> %x, i32 2
   1179   %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
   1180   %vecext5 = extractelement <8 x i8> %x, i32 3
   1181   %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
   1182   %vecext7 = extractelement <8 x i8> %x, i32 4
   1183   %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
   1184   %vecext9 = extractelement <8 x i8> %x, i32 5
   1185   %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
   1186   %vecext11 = extractelement <8 x i8> %x, i32 6
   1187   %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
   1188   %vecext13 = extractelement <8 x i8> %x, i32 7
   1189   %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
   1190   %vecext15 = extractelement <8 x i8> %y, i32 0
   1191   %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
   1192   %vecext17 = extractelement <8 x i8> %y, i32 1
   1193   %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
   1194   %vecext19 = extractelement <8 x i8> %y, i32 2
   1195   %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
   1196   %vecext21 = extractelement <8 x i8> %y, i32 3
   1197   %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
   1198   %vecext23 = extractelement <8 x i8> %y, i32 4
   1199   %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
   1200   %vecext25 = extractelement <8 x i8> %y, i32 5
   1201   %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
   1202   %vecext27 = extractelement <8 x i8> %y, i32 6
   1203   %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
   1204   %vecext29 = extractelement <8 x i8> %y, i32 7
   1205   %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
   1206   ret <16 x i8> %vecinit30
   1207 }
   1208 
   1209 define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
   1210 ; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
   1211 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1212 entry:
   1213   %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1214   ret <8 x i16> %vecinit14
   1215 }
   1216 
   1217 define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
   1218 ; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
   1219 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1220 entry:
   1221   %vecext = extractelement <4 x i16> %x, i32 0
   1222   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1223   %vecext1 = extractelement <4 x i16> %x, i32 1
   1224   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1225   %vecext3 = extractelement <4 x i16> %x, i32 2
   1226   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1227   %vecext5 = extractelement <4 x i16> %x, i32 3
   1228   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1229   %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   1230   ret <8 x i16> %vecinit14
   1231 }
   1232 
   1233 define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
   1234 ; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
   1235 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1236 entry:
   1237   %vecext = extractelement <8 x i16> %x, i32 0
   1238   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1239   %vecext1 = extractelement <8 x i16> %x, i32 1
   1240   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1241   %vecext3 = extractelement <8 x i16> %x, i32 2
   1242   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1243   %vecext5 = extractelement <8 x i16> %x, i32 3
   1244   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1245   %vecext7 = extractelement <4 x i16> %y, i32 0
   1246   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1247   %vecext9 = extractelement <4 x i16> %y, i32 1
   1248   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1249   %vecext11 = extractelement <4 x i16> %y, i32 2
   1250   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1251   %vecext13 = extractelement <4 x i16> %y, i32 3
   1252   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1253   ret <8 x i16> %vecinit14
   1254 }
   1255 
   1256 define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
   1257 ; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
   1258 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1259 entry:
   1260   %vecext = extractelement <4 x i16> %x, i32 0
   1261   %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
   1262   %vecext1 = extractelement <4 x i16> %x, i32 1
   1263   %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
   1264   %vecext3 = extractelement <4 x i16> %x, i32 2
   1265   %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
   1266   %vecext5 = extractelement <4 x i16> %x, i32 3
   1267   %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
   1268   %vecext7 = extractelement <4 x i16> %y, i32 0
   1269   %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
   1270   %vecext9 = extractelement <4 x i16> %y, i32 1
   1271   %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
   1272   %vecext11 = extractelement <4 x i16> %y, i32 2
   1273   %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
   1274   %vecext13 = extractelement <4 x i16> %y, i32 3
   1275   %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
   1276   ret <8 x i16> %vecinit14
   1277 }
   1278 
   1279 define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
   1280 ; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
   1281 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1282 entry:
   1283   %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1284   ret <4 x i32> %vecinit6
   1285 }
   1286 
   1287 define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
   1288 ; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
   1289 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1290 entry:
   1291   %vecext = extractelement <2 x i32> %x, i32 0
   1292   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1293   %vecext1 = extractelement <2 x i32> %x, i32 1
   1294   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1295   %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   1296   ret <4 x i32> %vecinit6
   1297 }
   1298 
   1299 define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
   1300 ; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
   1301 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1302 entry:
   1303   %vecext = extractelement <4 x i32> %x, i32 0
   1304   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   1305   %vecext1 = extractelement <4 x i32> %x, i32 1
   1306   %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
   1307   %vecext3 = extractelement <2 x i32> %y, i32 0
   1308   %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
   1309   %vecext5 = extractelement <2 x i32> %y, i32 1
   1310   %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
   1311   ret <4 x i32> %vecinit6
   1312 }
   1313 
   1314 define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
   1315 ; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
   1316 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1317 entry:
   1318   %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1319   ret <4 x i32> %vecinit6
   1320 }
   1321 
   1322 define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
   1323 ; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
   1324 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1325 entry:
   1326   %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1327   ret <2 x i64> %vecinit2
   1328 }
   1329 
   1330 define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
   1331 ; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
   1332 ; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
   1333 entry:
   1334   %vecext = extractelement <1 x i64> %x, i32 0
   1335   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1336   %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
   1337   ret <2 x i64> %vecinit2
   1338 }
   1339 
   1340 define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
   1341 ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
   1342 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1343 entry:
   1344   %vecext = extractelement <2 x i64> %x, i32 0
   1345   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1346   %vecext1 = extractelement <1 x i64> %y, i32 0
   1347   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1348   ret <2 x i64> %vecinit2
   1349 }
   1350 
   1351 define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
   1352 ; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
   1353 ; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
   1354 entry:
   1355   %vecext = extractelement <1 x i64> %x, i32 0
   1356   %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
   1357   %vecext1 = extractelement <1 x i64> %y, i32 0
   1358   %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
   1359   ret <2 x i64> %vecinit2
   1360 }
   1361 
   1362 
   1363 define <4 x i16> @concat_vector_v4i16_const() {
   1364 ; CHECK-LABEL: concat_vector_v4i16_const:
   1365 ; CHECK: movi {{d[0-9]+}}, #0
   1366  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer
   1367  ret <4 x i16> %r
   1368 }
   1369 
   1370 define <4 x i16> @concat_vector_v4i16_const_one() {
   1371 ; CHECK-LABEL: concat_vector_v4i16_const_one:
   1372 ; CHECK: movi {{v[0-9]+}}.4h, #0x1
   1373  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer
   1374  ret <4 x i16> %r
   1375 }
   1376 
   1377 define <4 x i32> @concat_vector_v4i32_const() {
   1378 ; CHECK-LABEL: concat_vector_v4i32_const:
   1379 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1380  %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer
   1381  ret <4 x i32> %r
   1382 }
   1383 
   1384 define <8 x i8> @concat_vector_v8i8_const() {
   1385 ; CHECK-LABEL: concat_vector_v8i8_const:
   1386 ; CHECK: movi {{d[0-9]+}}, #0
   1387  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer
   1388  ret <8 x i8> %r
   1389 }
   1390 
   1391 define <8 x i16> @concat_vector_v8i16_const() {
   1392 ; CHECK-LABEL: concat_vector_v8i16_const:
   1393 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1394  %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer
   1395  ret <8 x i16> %r
   1396 }
   1397 
   1398 define <8 x i16> @concat_vector_v8i16_const_one() {
   1399 ; CHECK-LABEL: concat_vector_v8i16_const_one:
   1400 ; CHECK: movi {{v[0-9]+}}.8h, #0x1
   1401  %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer
   1402  ret <8 x i16> %r
   1403 }
   1404 
   1405 define <16 x i8> @concat_vector_v16i8_const() {
   1406 ; CHECK-LABEL: concat_vector_v16i8_const:
   1407 ; CHECK: movi {{v[0-9]+}}.2d, #0
   1408  %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer
   1409  ret <16 x i8> %r
   1410 }
   1411 
   1412 define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) {
   1413 ; CHECK-LABEL: concat_vector_v4i16:
   1414 ; CHECK: dup v0.4h, v0.h[0]
   1415  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer
   1416  ret <4 x i16> %r
   1417 }
   1418 
   1419 define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) {
   1420 ; CHECK-LABEL: concat_vector_v4i32:
   1421 ; CHECK: dup v0.4s, v0.s[0]
   1422  %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer
   1423  ret <4 x i32> %r
   1424 }
   1425 
   1426 define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) {
   1427 ; CHECK-LABEL: concat_vector_v8i8:
   1428 ; CHECK: dup v0.8b, v0.b[0]
   1429  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer
   1430  ret <8 x i8> %r
   1431 }
   1432 
   1433 define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) {
   1434 ; CHECK-LABEL: concat_vector_v8i16:
   1435 ; CHECK: dup v0.8h, v0.h[0]
   1436  %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer
   1437  ret <8 x i16> %r
   1438 }
   1439 
   1440 define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) {
   1441 ; CHECK-LABEL: concat_vector_v16i8:
   1442 ; CHECK: dup v0.16b, v0.b[0]
   1443  %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer
   1444  ret <16 x i8> %r
   1445 }
   1446