Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
      2 
      3 @ptr = global i8* null
      4 
      5 define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
      6 ; CHECK-LABEL: test_v8i8_pre_load:
      7 ; CHECK: ldr d0, [x0, #40]!
      8   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
      9   %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
     10   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
     11   ret <8 x i8> %val
     12 }
     13 
     14 define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
     15 ; CHECK-LABEL: test_v8i8_post_load:
     16 ; CHECK: ldr d0, [x0], #40
     17   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
     18   %val = load <8 x i8>, <8 x i8>* %addr, align 8
     19   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
     20   ret <8 x i8> %val
     21 }
     22 
     23 define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
     24 ; CHECK-LABEL: test_v8i8_pre_store:
     25 ; CHECK: str d0, [x0, #40]!
     26   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
     27   store <8 x i8> %in, <8 x i8>* %newaddr, align 8
     28   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
     29   ret void
     30 }
     31 
     32 define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
     33 ; CHECK-LABEL: test_v8i8_post_store:
     34 ; CHECK: str d0, [x0], #40
     35   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
     36   store <8 x i8> %in, <8 x i8>* %addr, align 8
     37   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
     38   ret void
     39 }
     40 
     41 define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
     42 ; CHECK-LABEL: test_v4i16_pre_load:
     43 ; CHECK: ldr d0, [x0, #40]!
     44   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
     45   %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
     46   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
     47   ret <4 x i16> %val
     48 }
     49 
     50 define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
     51 ; CHECK-LABEL: test_v4i16_post_load:
     52 ; CHECK: ldr d0, [x0], #40
     53   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
     54   %val = load <4 x i16>, <4 x i16>* %addr, align 8
     55   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
     56   ret <4 x i16> %val
     57 }
     58 
     59 define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
     60 ; CHECK-LABEL: test_v4i16_pre_store:
     61 ; CHECK: str d0, [x0, #40]!
     62   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
     63   store <4 x i16> %in, <4 x i16>* %newaddr, align 8
     64   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
     65   ret void
     66 }
     67 
     68 define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
     69 ; CHECK-LABEL: test_v4i16_post_store:
     70 ; CHECK: str d0, [x0], #40
     71   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
     72   store <4 x i16> %in, <4 x i16>* %addr, align 8
     73   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
     74   ret void
     75 }
     76 
     77 define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
     78 ; CHECK-LABEL: test_v2i32_pre_load:
     79 ; CHECK: ldr d0, [x0, #40]!
     80   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
     81   %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
     82   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
     83   ret <2 x i32> %val
     84 }
     85 
     86 define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
     87 ; CHECK-LABEL: test_v2i32_post_load:
     88 ; CHECK: ldr d0, [x0], #40
     89   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
     90   %val = load <2 x i32>, <2 x i32>* %addr, align 8
     91   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
     92   ret <2 x i32> %val
     93 }
     94 
     95 define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
     96 ; CHECK-LABEL: test_v2i32_pre_store:
     97 ; CHECK: str d0, [x0, #40]!
     98   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
     99   store <2 x i32> %in, <2 x i32>* %newaddr, align 8
    100   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
    101   ret void
    102 }
    103 
    104 define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
    105 ; CHECK-LABEL: test_v2i32_post_store:
    106 ; CHECK: str d0, [x0], #40
    107   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
    108   store <2 x i32> %in, <2 x i32>* %addr, align 8
    109   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
    110   ret void
    111 }
    112 
    113 define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
    114 ; CHECK-LABEL: test_v2f32_pre_load:
    115 ; CHECK: ldr d0, [x0, #40]!
    116   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
    117   %val = load <2 x float>, <2 x float>* %newaddr, align 8
    118   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
    119   ret <2 x float> %val
    120 }
    121 
    122 define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
    123 ; CHECK-LABEL: test_v2f32_post_load:
    124 ; CHECK: ldr d0, [x0], #40
    125   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
    126   %val = load <2 x float>, <2 x float>* %addr, align 8
    127   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
    128   ret <2 x float> %val
    129 }
    130 
    131 define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
    132 ; CHECK-LABEL: test_v2f32_pre_store:
    133 ; CHECK: str d0, [x0, #40]!
    134   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
    135   store <2 x float> %in, <2 x float>* %newaddr, align 8
    136   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
    137   ret void
    138 }
    139 
    140 define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
    141 ; CHECK-LABEL: test_v2f32_post_store:
    142 ; CHECK: str d0, [x0], #40
    143   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
    144   store <2 x float> %in, <2 x float>* %addr, align 8
    145   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
    146   ret void
    147 }
    148 
    149 define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
    150 ; CHECK-LABEL: test_v1i64_pre_load:
    151 ; CHECK: ldr d0, [x0, #40]!
    152   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
    153   %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
    154   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
    155   ret <1 x i64> %val
    156 }
    157 
    158 define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
    159 ; CHECK-LABEL: test_v1i64_post_load:
    160 ; CHECK: ldr d0, [x0], #40
    161   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
    162   %val = load <1 x i64>, <1 x i64>* %addr, align 8
    163   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
    164   ret <1 x i64> %val
    165 }
    166 
    167 define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
    168 ; CHECK-LABEL: test_v1i64_pre_store:
    169 ; CHECK: str d0, [x0, #40]!
    170   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
    171   store <1 x i64> %in, <1 x i64>* %newaddr, align 8
    172   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
    173   ret void
    174 }
    175 
    176 define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
    177 ; CHECK-LABEL: test_v1i64_post_store:
    178 ; CHECK: str d0, [x0], #40
    179   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
    180   store <1 x i64> %in, <1 x i64>* %addr, align 8
    181   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
    182   ret void
    183 }
    184 
    185 define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
    186 ; CHECK-LABEL: test_v16i8_pre_load:
    187 ; CHECK: ldr q0, [x0, #80]!
    188   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
    189   %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
    190   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
    191   ret <16 x i8> %val
    192 }
    193 
    194 define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
    195 ; CHECK-LABEL: test_v16i8_post_load:
    196 ; CHECK: ldr q0, [x0], #80
    197   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
    198   %val = load <16 x i8>, <16 x i8>* %addr, align 8
    199   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
    200   ret <16 x i8> %val
    201 }
    202 
    203 define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
    204 ; CHECK-LABEL: test_v16i8_pre_store:
    205 ; CHECK: str q0, [x0, #80]!
    206   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
    207   store <16 x i8> %in, <16 x i8>* %newaddr, align 8
    208   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
    209   ret void
    210 }
    211 
    212 define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
    213 ; CHECK-LABEL: test_v16i8_post_store:
    214 ; CHECK: str q0, [x0], #80
    215   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
    216   store <16 x i8> %in, <16 x i8>* %addr, align 8
    217   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
    218   ret void
    219 }
    220 
    221 define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
    222 ; CHECK-LABEL: test_v8i16_pre_load:
    223 ; CHECK: ldr q0, [x0, #80]!
    224   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
    225   %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
    226   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
    227   ret <8 x i16> %val
    228 }
    229 
    230 define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
    231 ; CHECK-LABEL: test_v8i16_post_load:
    232 ; CHECK: ldr q0, [x0], #80
    233   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
    234   %val = load <8 x i16>, <8 x i16>* %addr, align 8
    235   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
    236   ret <8 x i16> %val
    237 }
    238 
    239 define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
    240 ; CHECK-LABEL: test_v8i16_pre_store:
    241 ; CHECK: str q0, [x0, #80]!
    242   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
    243   store <8 x i16> %in, <8 x i16>* %newaddr, align 8
    244   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
    245   ret void
    246 }
    247 
    248 define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
    249 ; CHECK-LABEL: test_v8i16_post_store:
    250 ; CHECK: str q0, [x0], #80
    251   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
    252   store <8 x i16> %in, <8 x i16>* %addr, align 8
    253   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
    254   ret void
    255 }
    256 
    257 define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
    258 ; CHECK-LABEL: test_v4i32_pre_load:
    259 ; CHECK: ldr q0, [x0, #80]!
    260   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
    261   %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
    262   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
    263   ret <4 x i32> %val
    264 }
    265 
    266 define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
    267 ; CHECK-LABEL: test_v4i32_post_load:
    268 ; CHECK: ldr q0, [x0], #80
    269   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
    270   %val = load <4 x i32>, <4 x i32>* %addr, align 8
    271   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
    272   ret <4 x i32> %val
    273 }
    274 
    275 define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
    276 ; CHECK-LABEL: test_v4i32_pre_store:
    277 ; CHECK: str q0, [x0, #80]!
    278   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
    279   store <4 x i32> %in, <4 x i32>* %newaddr, align 8
    280   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
    281   ret void
    282 }
    283 
    284 define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
    285 ; CHECK-LABEL: test_v4i32_post_store:
    286 ; CHECK: str q0, [x0], #80
    287   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
    288   store <4 x i32> %in, <4 x i32>* %addr, align 8
    289   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
    290   ret void
    291 }
    292 
    293 
    294 define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
    295 ; CHECK-LABEL: test_v4f32_pre_load:
    296 ; CHECK: ldr q0, [x0, #80]!
    297   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
    298   %val = load <4 x float>, <4 x float>* %newaddr, align 8
    299   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
    300   ret <4 x float> %val
    301 }
    302 
    303 define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
    304 ; CHECK-LABEL: test_v4f32_post_load:
    305 ; CHECK: ldr q0, [x0], #80
    306   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
    307   %val = load <4 x float>, <4 x float>* %addr, align 8
    308   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
    309   ret <4 x float> %val
    310 }
    311 
    312 define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
    313 ; CHECK-LABEL: test_v4f32_pre_store:
    314 ; CHECK: str q0, [x0, #80]!
    315   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
    316   store <4 x float> %in, <4 x float>* %newaddr, align 8
    317   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
    318   ret void
    319 }
    320 
    321 define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
    322 ; CHECK-LABEL: test_v4f32_post_store:
    323 ; CHECK: str q0, [x0], #80
    324   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
    325   store <4 x float> %in, <4 x float>* %addr, align 8
    326   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
    327   ret void
    328 }
    329 
    330 
    331 define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
    332 ; CHECK-LABEL: test_v2i64_pre_load:
    333 ; CHECK: ldr q0, [x0, #80]!
    334   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
    335   %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
    336   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
    337   ret <2 x i64> %val
    338 }
    339 
    340 define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
    341 ; CHECK-LABEL: test_v2i64_post_load:
    342 ; CHECK: ldr q0, [x0], #80
    343   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
    344   %val = load <2 x i64>, <2 x i64>* %addr, align 8
    345   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
    346   ret <2 x i64> %val
    347 }
    348 
    349 define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
    350 ; CHECK-LABEL: test_v2i64_pre_store:
    351 ; CHECK: str q0, [x0, #80]!
    352   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
    353   store <2 x i64> %in, <2 x i64>* %newaddr, align 8
    354   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
    355   ret void
    356 }
    357 
    358 define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
    359 ; CHECK-LABEL: test_v2i64_post_store:
    360 ; CHECK: str q0, [x0], #80
    361   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
    362   store <2 x i64> %in, <2 x i64>* %addr, align 8
    363   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
    364   ret void
    365 }
    366 
    367 
    368 define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
    369 ; CHECK-LABEL: test_v2f64_pre_load:
    370 ; CHECK: ldr q0, [x0, #80]!
    371   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
    372   %val = load <2 x double>, <2 x double>* %newaddr, align 8
    373   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
    374   ret <2 x double> %val
    375 }
    376 
    377 define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
    378 ; CHECK-LABEL: test_v2f64_post_load:
    379 ; CHECK: ldr q0, [x0], #80
    380   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
    381   %val = load <2 x double>, <2 x double>* %addr, align 8
    382   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
    383   ret <2 x double> %val
    384 }
    385 
    386 define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
    387 ; CHECK-LABEL: test_v2f64_pre_store:
    388 ; CHECK: str q0, [x0, #80]!
    389   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
    390   store <2 x double> %in, <2 x double>* %newaddr, align 8
    391   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
    392   ret void
    393 }
    394 
    395 define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
    396 ; CHECK-LABEL: test_v2f64_post_store:
    397 ; CHECK: str q0, [x0], #80
    398   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
    399   store <2 x double> %in, <2 x double>* %addr, align 8
    400   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
    401   ret void
    402 }
    403 
    404 define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
    405 ; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
    406 ; CHECK: st1.b { v0 }[3], [x0], #1
    407   %elt = extractelement <16 x i8> %in, i32 3
    408   store i8 %elt, i8* %addr
    409 
    410   %newaddr = getelementptr i8, i8* %addr, i32 1
    411   ret i8* %newaddr
    412 }
    413 
    414 define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
    415 ; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
    416 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
    417 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
    418   %elt = extractelement <16 x i8> %in, i32 3
    419   store i8 %elt, i8* %addr
    420 
    421   %newaddr = getelementptr i8, i8* %addr, i32 2
    422   ret i8* %newaddr
    423 }
    424 
    425 
    426 define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
    427 ; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
    428 ; CHECK: st1.h { v0 }[3], [x0], #2
    429   %elt = extractelement <8 x i16> %in, i32 3
    430   store i16 %elt, i16* %addr
    431 
    432   %newaddr = getelementptr i16, i16* %addr, i32 1
    433   ret i16* %newaddr
    434 }
    435 
    436 define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
    437 ; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
    438 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
    439 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
    440   %elt = extractelement <8 x i16> %in, i32 3
    441   store i16 %elt, i16* %addr
    442 
    443   %newaddr = getelementptr i16, i16* %addr, i32 2
    444   ret i16* %newaddr
    445 }
    446 
    447 define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
    448 ; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
    449 ; CHECK: st1.s { v0 }[3], [x0], #4
    450   %elt = extractelement <4 x i32> %in, i32 3
    451   store i32 %elt, i32* %addr
    452 
    453   %newaddr = getelementptr i32, i32* %addr, i32 1
    454   ret i32* %newaddr
    455 }
    456 
    457 define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
    458 ; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
    459 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
    460 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
    461   %elt = extractelement <4 x i32> %in, i32 3
    462   store i32 %elt, i32* %addr
    463 
    464   %newaddr = getelementptr i32, i32* %addr, i32 2
    465   ret i32* %newaddr
    466 }
    467 
    468 define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
    469 ; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
    470 ; CHECK: st1.s { v0 }[3], [x0], #4
    471   %elt = extractelement <4 x float> %in, i32 3
    472   store float %elt, float* %addr
    473 
    474   %newaddr = getelementptr float, float* %addr, i32 1
    475   ret float* %newaddr
    476 }
    477 
    478 define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
    479 ; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
    480 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
    481 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
    482   %elt = extractelement <4 x float> %in, i32 3
    483   store float %elt, float* %addr
    484 
    485   %newaddr = getelementptr float, float* %addr, i32 2
    486   ret float* %newaddr
    487 }
    488 
    489 define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
    490 ; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
    491 ; CHECK: st1.d { v0 }[1], [x0], #8
    492   %elt = extractelement <2 x i64> %in, i64 1
    493   store i64 %elt, i64* %addr
    494 
    495   %newaddr = getelementptr i64, i64* %addr, i64 1
    496   ret i64* %newaddr
    497 }
    498 
    499 define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
    500 ; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
    501 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
    502 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
    503   %elt = extractelement <2 x i64> %in, i64 1
    504   store i64 %elt, i64* %addr
    505 
    506   %newaddr = getelementptr i64, i64* %addr, i64 2
    507   ret i64* %newaddr
    508 }
    509 
    510 define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
    511 ; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
    512 ; CHECK: st1.d { v0 }[1], [x0], #8
    513   %elt = extractelement <2 x double> %in, i32 1
    514   store double %elt, double* %addr
    515 
    516   %newaddr = getelementptr double, double* %addr, i32 1
    517   ret double* %newaddr
    518 }
    519 
    520 define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
    521 ; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
    522 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10
    523 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
    524   %elt = extractelement <2 x double> %in, i32 1
    525   store double %elt, double* %addr
    526 
    527   %newaddr = getelementptr double, double* %addr, i32 2
    528   ret double* %newaddr
    529 }
    530 
    531 define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
    532 ; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
    533 ; CHECK: st1.b { v0 }[3], [x0], #1
    534   %elt = extractelement <8 x i8> %in, i32 3
    535   store i8 %elt, i8* %addr
    536 
    537   %newaddr = getelementptr i8, i8* %addr, i32 1
    538   ret i8* %newaddr
    539 }
    540 
    541 define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
    542 ; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
    543 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2
    544 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
    545   %elt = extractelement <8 x i8> %in, i32 3
    546   store i8 %elt, i8* %addr
    547 
    548   %newaddr = getelementptr i8, i8* %addr, i32 2
    549   ret i8* %newaddr
    550 }
    551 
    552 define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
    553 ; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
    554 ; CHECK: st1.h { v0 }[3], [x0], #2
    555   %elt = extractelement <4 x i16> %in, i32 3
    556   store i16 %elt, i16* %addr
    557 
    558   %newaddr = getelementptr i16, i16* %addr, i32 1
    559   ret i16* %newaddr
    560 }
    561 
    562 define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
    563 ; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
    564 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4
    565 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
    566   %elt = extractelement <4 x i16> %in, i32 3
    567   store i16 %elt, i16* %addr
    568 
    569   %newaddr = getelementptr i16, i16* %addr, i32 2
    570   ret i16* %newaddr
    571 }
    572 
    573 define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
    574 ; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
    575 ; CHECK: st1.s { v0 }[1], [x0], #4
    576   %elt = extractelement <2 x i32> %in, i32 1
    577   store i32 %elt, i32* %addr
    578 
    579   %newaddr = getelementptr i32, i32* %addr, i32 1
    580   ret i32* %newaddr
    581 }
    582 
    583 define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
    584 ; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
    585 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
    586 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
    587   %elt = extractelement <2 x i32> %in, i32 1
    588   store i32 %elt, i32* %addr
    589 
    590   %newaddr = getelementptr i32, i32* %addr, i32 2
    591   ret i32* %newaddr
    592 }
    593 
    594 define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
    595 ; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
    596 ; CHECK: st1.s { v0 }[1], [x0], #4
    597   %elt = extractelement <2 x float> %in, i32 1
    598   store float %elt, float* %addr
    599 
    600   %newaddr = getelementptr float, float* %addr, i32 1
    601   ret float* %newaddr
    602 }
    603 
    604 define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
    605 ; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
    606 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8
    607 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
    608   %elt = extractelement <2 x float> %in, i32 1
    609   store float %elt, float* %addr
    610 
    611   %newaddr = getelementptr float, float* %addr, i32 2
    612   ret float* %newaddr
    613 }
    614 
    615 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
    616 ;CHECK-LABEL: test_v16i8_post_imm_ld2:
    617 ;CHECK: ld2.16b { v0, v1 }, [x0], #32
    618   %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
    619   %tmp = getelementptr i8, i8* %A, i32 32
    620   store i8* %tmp, i8** %ptr
    621   ret { <16 x i8>, <16 x i8> } %ld2
    622 }
    623 
    624 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
    625 ;CHECK-LABEL: test_v16i8_post_reg_ld2:
    626 ;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
    627   %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
    628   %tmp = getelementptr i8, i8* %A, i64 %inc
    629   store i8* %tmp, i8** %ptr
    630   ret { <16 x i8>, <16 x i8> } %ld2
    631 }
    632 
    633 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
    634 
    635 
    636 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
    637 ;CHECK-LABEL: test_v8i8_post_imm_ld2:
    638 ;CHECK: ld2.8b { v0, v1 }, [x0], #16
    639   %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
    640   %tmp = getelementptr i8, i8* %A, i32 16
    641   store i8* %tmp, i8** %ptr
    642   ret { <8 x i8>, <8 x i8> } %ld2
    643 }
    644 
    645 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
    646 ;CHECK-LABEL: test_v8i8_post_reg_ld2:
    647 ;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
    648   %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
    649   %tmp = getelementptr i8, i8* %A, i64 %inc
    650   store i8* %tmp, i8** %ptr
    651   ret { <8 x i8>, <8 x i8> } %ld2
    652 }
    653 
    654 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
    655 
    656 
    657 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
    658 ;CHECK-LABEL: test_v8i16_post_imm_ld2:
    659 ;CHECK: ld2.8h { v0, v1 }, [x0], #32
    660   %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
    661   %tmp = getelementptr i16, i16* %A, i32 16
    662   store i16* %tmp, i16** %ptr
    663   ret { <8 x i16>, <8 x i16> } %ld2
    664 }
    665 
    666 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
    667 ;CHECK-LABEL: test_v8i16_post_reg_ld2:
    668 ;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
    669   %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
    670   %tmp = getelementptr i16, i16* %A, i64 %inc
    671   store i16* %tmp, i16** %ptr
    672   ret { <8 x i16>, <8 x i16> } %ld2
    673 }
    674 
    675 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
    676 
    677 
    678 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
    679 ;CHECK-LABEL: test_v4i16_post_imm_ld2:
    680 ;CHECK: ld2.4h { v0, v1 }, [x0], #16
    681   %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
    682   %tmp = getelementptr i16, i16* %A, i32 8
    683   store i16* %tmp, i16** %ptr
    684   ret { <4 x i16>, <4 x i16> } %ld2
    685 }
    686 
    687 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
    688 ;CHECK-LABEL: test_v4i16_post_reg_ld2:
    689 ;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
    690   %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
    691   %tmp = getelementptr i16, i16* %A, i64 %inc
    692   store i16* %tmp, i16** %ptr
    693   ret { <4 x i16>, <4 x i16> } %ld2
    694 }
    695 
    696 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
    697 
    698 
    699 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
    700 ;CHECK-LABEL: test_v4i32_post_imm_ld2:
    701 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
    702   %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
    703   %tmp = getelementptr i32, i32* %A, i32 8
    704   store i32* %tmp, i32** %ptr
    705   ret { <4 x i32>, <4 x i32> } %ld2
    706 }
    707 
    708 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
    709 ;CHECK-LABEL: test_v4i32_post_reg_ld2:
    710 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
    711   %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
    712   %tmp = getelementptr i32, i32* %A, i64 %inc
    713   store i32* %tmp, i32** %ptr
    714   ret { <4 x i32>, <4 x i32> } %ld2
    715 }
    716 
    717 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
    718 
    719 
    720 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
    721 ;CHECK-LABEL: test_v2i32_post_imm_ld2:
    722 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
    723   %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
    724   %tmp = getelementptr i32, i32* %A, i32 4
    725   store i32* %tmp, i32** %ptr
    726   ret { <2 x i32>, <2 x i32> } %ld2
    727 }
    728 
    729 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
    730 ;CHECK-LABEL: test_v2i32_post_reg_ld2:
    731 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
    732   %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
    733   %tmp = getelementptr i32, i32* %A, i64 %inc
    734   store i32* %tmp, i32** %ptr
    735   ret { <2 x i32>, <2 x i32> } %ld2
    736 }
    737 
    738 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
    739 
    740 
    741 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
    742 ;CHECK-LABEL: test_v2i64_post_imm_ld2:
    743 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
    744   %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
    745   %tmp = getelementptr i64, i64* %A, i32 4
    746   store i64* %tmp, i64** %ptr
    747   ret { <2 x i64>, <2 x i64> } %ld2
    748 }
    749 
    750 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
    751 ;CHECK-LABEL: test_v2i64_post_reg_ld2:
    752 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
    753   %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
    754   %tmp = getelementptr i64, i64* %A, i64 %inc
    755   store i64* %tmp, i64** %ptr
    756   ret { <2 x i64>, <2 x i64> } %ld2
    757 }
    758 
    759 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
    760 
    761 
    762 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
    763 ;CHECK-LABEL: test_v1i64_post_imm_ld2:
    764 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
    765   %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
    766   %tmp = getelementptr i64, i64* %A, i32 2
    767   store i64* %tmp, i64** %ptr
    768   ret { <1 x i64>, <1 x i64> } %ld2
    769 }
    770 
    771 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
    772 ;CHECK-LABEL: test_v1i64_post_reg_ld2:
    773 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
    774   %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
    775   %tmp = getelementptr i64, i64* %A, i64 %inc
    776   store i64* %tmp, i64** %ptr
    777   ret { <1 x i64>, <1 x i64> } %ld2
    778 }
    779 
    780 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
    781 
    782 
    783 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
    784 ;CHECK-LABEL: test_v4f32_post_imm_ld2:
    785 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
    786   %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
    787   %tmp = getelementptr float, float* %A, i32 8
    788   store float* %tmp, float** %ptr
    789   ret { <4 x float>, <4 x float> } %ld2
    790 }
    791 
    792 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
    793 ;CHECK-LABEL: test_v4f32_post_reg_ld2:
    794 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
    795   %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
    796   %tmp = getelementptr float, float* %A, i64 %inc
    797   store float* %tmp, float** %ptr
    798   ret { <4 x float>, <4 x float> } %ld2
    799 }
    800 
    801 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
    802 
    803 
    804 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
    805 ;CHECK-LABEL: test_v2f32_post_imm_ld2:
    806 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
    807   %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
    808   %tmp = getelementptr float, float* %A, i32 4
    809   store float* %tmp, float** %ptr
    810   ret { <2 x float>, <2 x float> } %ld2
    811 }
    812 
    813 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
    814 ;CHECK-LABEL: test_v2f32_post_reg_ld2:
    815 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
    816   %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
    817   %tmp = getelementptr float, float* %A, i64 %inc
    818   store float* %tmp, float** %ptr
    819   ret { <2 x float>, <2 x float> } %ld2
    820 }
    821 
    822 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
    823 
    824 
    825 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
    826 ;CHECK-LABEL: test_v2f64_post_imm_ld2:
    827 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
    828   %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
    829   %tmp = getelementptr double, double* %A, i32 4
    830   store double* %tmp, double** %ptr
    831   ret { <2 x double>, <2 x double> } %ld2
    832 }
    833 
    834 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
    835 ;CHECK-LABEL: test_v2f64_post_reg_ld2:
    836 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
    837   %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
    838   %tmp = getelementptr double, double* %A, i64 %inc
    839   store double* %tmp, double** %ptr
    840   ret { <2 x double>, <2 x double> } %ld2
    841 }
    842 
    843 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
    844 
    845 
    846 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
    847 ;CHECK-LABEL: test_v1f64_post_imm_ld2:
    848 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
    849   %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
    850   %tmp = getelementptr double, double* %A, i32 2
    851   store double* %tmp, double** %ptr
    852   ret { <1 x double>, <1 x double> } %ld2
    853 }
    854 
    855 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
    856 ;CHECK-LABEL: test_v1f64_post_reg_ld2:
    857 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
    858   %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
    859   %tmp = getelementptr double, double* %A, i64 %inc
    860   store double* %tmp, double** %ptr
    861   ret { <1 x double>, <1 x double> } %ld2
    862 }
    863 
    864 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
    865 
    866 
    867 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
    868 ;CHECK-LABEL: test_v16i8_post_imm_ld3:
    869 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
    870   %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
    871   %tmp = getelementptr i8, i8* %A, i32 48
    872   store i8* %tmp, i8** %ptr
    873   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
    874 }
    875 
    876 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
    877 ;CHECK-LABEL: test_v16i8_post_reg_ld3:
    878 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
    879   %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
    880   %tmp = getelementptr i8, i8* %A, i64 %inc
    881   store i8* %tmp, i8** %ptr
    882   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
    883 }
    884 
    885 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
    886 
    887 
    888 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
    889 ;CHECK-LABEL: test_v8i8_post_imm_ld3:
    890 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
    891   %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
    892   %tmp = getelementptr i8, i8* %A, i32 24
    893   store i8* %tmp, i8** %ptr
    894   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
    895 }
    896 
    897 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
    898 ;CHECK-LABEL: test_v8i8_post_reg_ld3:
    899 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
    900   %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
    901   %tmp = getelementptr i8, i8* %A, i64 %inc
    902   store i8* %tmp, i8** %ptr
    903   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
    904 }
    905 
    906 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
    907 
    908 
    909 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
    910 ;CHECK-LABEL: test_v8i16_post_imm_ld3:
    911 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
    912   %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
    913   %tmp = getelementptr i16, i16* %A, i32 24
    914   store i16* %tmp, i16** %ptr
    915   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
    916 }
    917 
    918 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
    919 ;CHECK-LABEL: test_v8i16_post_reg_ld3:
    920 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
    921   %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
    922   %tmp = getelementptr i16, i16* %A, i64 %inc
    923   store i16* %tmp, i16** %ptr
    924   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
    925 }
    926 
    927 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
    928 
    929 
    930 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
    931 ;CHECK-LABEL: test_v4i16_post_imm_ld3:
    932 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
    933   %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
    934   %tmp = getelementptr i16, i16* %A, i32 12
    935   store i16* %tmp, i16** %ptr
    936   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
    937 }
    938 
    939 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
    940 ;CHECK-LABEL: test_v4i16_post_reg_ld3:
    941 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
    942   %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
    943   %tmp = getelementptr i16, i16* %A, i64 %inc
    944   store i16* %tmp, i16** %ptr
    945   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
    946 }
    947 
    948 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
    949 
    950 
    951 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
    952 ;CHECK-LABEL: test_v4i32_post_imm_ld3:
    953 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
    954   %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
    955   %tmp = getelementptr i32, i32* %A, i32 12
    956   store i32* %tmp, i32** %ptr
    957   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
    958 }
    959 
    960 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
    961 ;CHECK-LABEL: test_v4i32_post_reg_ld3:
    962 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
    963   %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
    964   %tmp = getelementptr i32, i32* %A, i64 %inc
    965   store i32* %tmp, i32** %ptr
    966   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
    967 }
    968 
    969 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
    970 
    971 
    972 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
    973 ;CHECK-LABEL: test_v2i32_post_imm_ld3:
    974 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
    975   %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
    976   %tmp = getelementptr i32, i32* %A, i32 6
    977   store i32* %tmp, i32** %ptr
    978   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
    979 }
    980 
    981 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
    982 ;CHECK-LABEL: test_v2i32_post_reg_ld3:
    983 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
    984   %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
    985   %tmp = getelementptr i32, i32* %A, i64 %inc
    986   store i32* %tmp, i32** %ptr
    987   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
    988 }
    989 
    990 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
    991 
    992 
    993 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
    994 ;CHECK-LABEL: test_v2i64_post_imm_ld3:
    995 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
    996   %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
    997   %tmp = getelementptr i64, i64* %A, i32 6
    998   store i64* %tmp, i64** %ptr
    999   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   1000 }
   1001 
   1002 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
   1003 ;CHECK-LABEL: test_v2i64_post_reg_ld3:
   1004 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1005   %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
   1006   %tmp = getelementptr i64, i64* %A, i64 %inc
   1007   store i64* %tmp, i64** %ptr
   1008   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   1009 }
   1010 
   1011 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
   1012 
   1013 
   1014 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
   1015 ;CHECK-LABEL: test_v1i64_post_imm_ld3:
   1016 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
   1017   %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   1018   %tmp = getelementptr i64, i64* %A, i32 3
   1019   store i64* %tmp, i64** %ptr
   1020   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   1021 }
   1022 
   1023 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
   1024 ;CHECK-LABEL: test_v1i64_post_reg_ld3:
   1025 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1026   %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
   1027   %tmp = getelementptr i64, i64* %A, i64 %inc
   1028   store i64* %tmp, i64** %ptr
   1029   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   1030 }
   1031 
   1032 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
   1033 
   1034 
   1035 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
   1036 ;CHECK-LABEL: test_v4f32_post_imm_ld3:
   1037 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
   1038   %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   1039   %tmp = getelementptr float, float* %A, i32 12
   1040   store float* %tmp, float** %ptr
   1041   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   1042 }
   1043 
   1044 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
   1045 ;CHECK-LABEL: test_v4f32_post_reg_ld3:
   1046 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1047   %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
   1048   %tmp = getelementptr float, float* %A, i64 %inc
   1049   store float* %tmp, float** %ptr
   1050   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   1051 }
   1052 
   1053 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
   1054 
   1055 
   1056 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
   1057 ;CHECK-LABEL: test_v2f32_post_imm_ld3:
   1058 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
   1059   %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   1060   %tmp = getelementptr float, float* %A, i32 6
   1061   store float* %tmp, float** %ptr
   1062   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   1063 }
   1064 
   1065 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
   1066 ;CHECK-LABEL: test_v2f32_post_reg_ld3:
   1067 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1068   %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
   1069   %tmp = getelementptr float, float* %A, i64 %inc
   1070   store float* %tmp, float** %ptr
   1071   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   1072 }
   1073 
   1074 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
   1075 
   1076 
   1077 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
   1078 ;CHECK-LABEL: test_v2f64_post_imm_ld3:
   1079 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
   1080   %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   1081   %tmp = getelementptr double, double* %A, i32 6
   1082   store double* %tmp, double** %ptr
   1083   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   1084 }
   1085 
   1086 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
   1087 ;CHECK-LABEL: test_v2f64_post_reg_ld3:
   1088 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1089   %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
   1090   %tmp = getelementptr double, double* %A, i64 %inc
   1091   store double* %tmp, double** %ptr
   1092   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   1093 }
   1094 
   1095 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
   1096 
   1097 
   1098 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
   1099 ;CHECK-LABEL: test_v1f64_post_imm_ld3:
   1100 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
   1101   %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   1102   %tmp = getelementptr double, double* %A, i32 3
   1103   store double* %tmp, double** %ptr
   1104   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   1105 }
   1106 
   1107 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
   1108 ;CHECK-LABEL: test_v1f64_post_reg_ld3:
   1109 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1110   %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
   1111   %tmp = getelementptr double, double* %A, i64 %inc
   1112   store double* %tmp, double** %ptr
   1113   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   1114 }
   1115 
   1116 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
   1117 
   1118 
   1119 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
   1120 ;CHECK-LABEL: test_v16i8_post_imm_ld4:
   1121 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
   1122   %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   1123   %tmp = getelementptr i8, i8* %A, i32 64
   1124   store i8* %tmp, i8** %ptr
   1125   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   1126 }
   1127 
   1128 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
   1129 ;CHECK-LABEL: test_v16i8_post_reg_ld4:
   1130 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1131   %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
   1132   %tmp = getelementptr i8, i8* %A, i64 %inc
   1133   store i8* %tmp, i8** %ptr
   1134   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   1135 }
   1136 
   1137 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
   1138 
   1139 
   1140 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
   1141 ;CHECK-LABEL: test_v8i8_post_imm_ld4:
   1142 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
   1143   %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   1144   %tmp = getelementptr i8, i8* %A, i32 32
   1145   store i8* %tmp, i8** %ptr
   1146   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   1147 }
   1148 
   1149 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
   1150 ;CHECK-LABEL: test_v8i8_post_reg_ld4:
   1151 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1152   %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
   1153   %tmp = getelementptr i8, i8* %A, i64 %inc
   1154   store i8* %tmp, i8** %ptr
   1155   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   1156 }
   1157 
   1158 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
   1159 
   1160 
   1161 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
   1162 ;CHECK-LABEL: test_v8i16_post_imm_ld4:
   1163 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
   1164   %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   1165   %tmp = getelementptr i16, i16* %A, i32 32
   1166   store i16* %tmp, i16** %ptr
   1167   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   1168 }
   1169 
   1170 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
   1171 ;CHECK-LABEL: test_v8i16_post_reg_ld4:
   1172 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1173   %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
   1174   %tmp = getelementptr i16, i16* %A, i64 %inc
   1175   store i16* %tmp, i16** %ptr
   1176   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   1177 }
   1178 
   1179 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
   1180 
   1181 
   1182 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
   1183 ;CHECK-LABEL: test_v4i16_post_imm_ld4:
   1184 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
   1185   %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   1186   %tmp = getelementptr i16, i16* %A, i32 16
   1187   store i16* %tmp, i16** %ptr
   1188   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   1189 }
   1190 
   1191 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
   1192 ;CHECK-LABEL: test_v4i16_post_reg_ld4:
   1193 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1194   %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
   1195   %tmp = getelementptr i16, i16* %A, i64 %inc
   1196   store i16* %tmp, i16** %ptr
   1197   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   1198 }
   1199 
   1200 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
   1201 
   1202 
   1203 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
   1204 ;CHECK-LABEL: test_v4i32_post_imm_ld4:
   1205 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
   1206   %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   1207   %tmp = getelementptr i32, i32* %A, i32 16
   1208   store i32* %tmp, i32** %ptr
   1209   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   1210 }
   1211 
   1212 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
   1213 ;CHECK-LABEL: test_v4i32_post_reg_ld4:
   1214 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1215   %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
   1216   %tmp = getelementptr i32, i32* %A, i64 %inc
   1217   store i32* %tmp, i32** %ptr
   1218   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   1219 }
   1220 
   1221 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
   1222 
   1223 
   1224 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
   1225 ;CHECK-LABEL: test_v2i32_post_imm_ld4:
   1226 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
   1227   %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   1228   %tmp = getelementptr i32, i32* %A, i32 8
   1229   store i32* %tmp, i32** %ptr
   1230   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   1231 }
   1232 
   1233 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
   1234 ;CHECK-LABEL: test_v2i32_post_reg_ld4:
   1235 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1236   %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
   1237   %tmp = getelementptr i32, i32* %A, i64 %inc
   1238   store i32* %tmp, i32** %ptr
   1239   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   1240 }
   1241 
   1242 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
   1243 
   1244 
   1245 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
   1246 ;CHECK-LABEL: test_v2i64_post_imm_ld4:
   1247 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
   1248   %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   1249   %tmp = getelementptr i64, i64* %A, i32 8
   1250   store i64* %tmp, i64** %ptr
   1251   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   1252 }
   1253 
   1254 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
   1255 ;CHECK-LABEL: test_v2i64_post_reg_ld4:
   1256 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1257   %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
   1258   %tmp = getelementptr i64, i64* %A, i64 %inc
   1259   store i64* %tmp, i64** %ptr
   1260   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   1261 }
   1262 
   1263 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
   1264 
   1265 
   1266 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
   1267 ;CHECK-LABEL: test_v1i64_post_imm_ld4:
   1268 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
   1269   %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   1270   %tmp = getelementptr i64, i64* %A, i32 4
   1271   store i64* %tmp, i64** %ptr
   1272   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   1273 }
   1274 
   1275 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
   1276 ;CHECK-LABEL: test_v1i64_post_reg_ld4:
   1277 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1278   %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
   1279   %tmp = getelementptr i64, i64* %A, i64 %inc
   1280   store i64* %tmp, i64** %ptr
   1281   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   1282 }
   1283 
   1284 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
   1285 
   1286 
   1287 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
   1288 ;CHECK-LABEL: test_v4f32_post_imm_ld4:
   1289 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
   1290   %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   1291   %tmp = getelementptr float, float* %A, i32 16
   1292   store float* %tmp, float** %ptr
   1293   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   1294 }
   1295 
   1296 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
   1297 ;CHECK-LABEL: test_v4f32_post_reg_ld4:
   1298 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1299   %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
   1300   %tmp = getelementptr float, float* %A, i64 %inc
   1301   store float* %tmp, float** %ptr
   1302   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   1303 }
   1304 
   1305 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
   1306 
   1307 
   1308 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
   1309 ;CHECK-LABEL: test_v2f32_post_imm_ld4:
   1310 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
   1311   %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   1312   %tmp = getelementptr float, float* %A, i32 8
   1313   store float* %tmp, float** %ptr
   1314   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   1315 }
   1316 
   1317 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
   1318 ;CHECK-LABEL: test_v2f32_post_reg_ld4:
   1319 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1320   %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
   1321   %tmp = getelementptr float, float* %A, i64 %inc
   1322   store float* %tmp, float** %ptr
   1323   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   1324 }
   1325 
   1326 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
   1327 
   1328 
   1329 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
   1330 ;CHECK-LABEL: test_v2f64_post_imm_ld4:
   1331 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
   1332   %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   1333   %tmp = getelementptr double, double* %A, i32 8
   1334   store double* %tmp, double** %ptr
   1335   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   1336 }
   1337 
   1338 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
   1339 ;CHECK-LABEL: test_v2f64_post_reg_ld4:
   1340 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1341   %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
   1342   %tmp = getelementptr double, double* %A, i64 %inc
   1343   store double* %tmp, double** %ptr
   1344   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   1345 }
   1346 
   1347 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
   1348 
   1349 
   1350 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
   1351 ;CHECK-LABEL: test_v1f64_post_imm_ld4:
   1352 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
   1353   %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   1354   %tmp = getelementptr double, double* %A, i32 4
   1355   store double* %tmp, double** %ptr
   1356   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   1357 }
   1358 
   1359 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
   1360 ;CHECK-LABEL: test_v1f64_post_reg_ld4:
   1361 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1362   %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
   1363   %tmp = getelementptr double, double* %A, i64 %inc
   1364   store double* %tmp, double** %ptr
   1365   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   1366 }
   1367 
   1368 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
   1369 
   1370 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
   1371 ;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
   1372 ;CHECK: ld1.16b { v0, v1 }, [x0], #32
   1373   %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   1374   %tmp = getelementptr i8, i8* %A, i32 32
   1375   store i8* %tmp, i8** %ptr
   1376   ret { <16 x i8>, <16 x i8> } %ld1x2
   1377 }
   1378 
   1379 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
   1380 ;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
   1381 ;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
   1382   %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
   1383   %tmp = getelementptr i8, i8* %A, i64 %inc
   1384   store i8* %tmp, i8** %ptr
   1385   ret { <16 x i8>, <16 x i8> } %ld1x2
   1386 }
   1387 
   1388 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
   1389 
   1390 
   1391 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
   1392 ;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
   1393 ;CHECK: ld1.8b { v0, v1 }, [x0], #16
   1394   %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   1395   %tmp = getelementptr i8, i8* %A, i32 16
   1396   store i8* %tmp, i8** %ptr
   1397   ret { <8 x i8>, <8 x i8> } %ld1x2
   1398 }
   1399 
   1400 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
   1401 ;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
   1402 ;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
   1403   %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
   1404   %tmp = getelementptr i8, i8* %A, i64 %inc
   1405   store i8* %tmp, i8** %ptr
   1406   ret { <8 x i8>, <8 x i8> } %ld1x2
   1407 }
   1408 
   1409 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
   1410 
   1411 
   1412 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
   1413 ;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
   1414 ;CHECK: ld1.8h { v0, v1 }, [x0], #32
   1415   %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   1416   %tmp = getelementptr i16, i16* %A, i32 16
   1417   store i16* %tmp, i16** %ptr
   1418   ret { <8 x i16>, <8 x i16> } %ld1x2
   1419 }
   1420 
   1421 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
   1422 ;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
   1423 ;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
   1424   %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
   1425   %tmp = getelementptr i16, i16* %A, i64 %inc
   1426   store i16* %tmp, i16** %ptr
   1427   ret { <8 x i16>, <8 x i16> } %ld1x2
   1428 }
   1429 
   1430 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
   1431 
   1432 
   1433 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
   1434 ;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
   1435 ;CHECK: ld1.4h { v0, v1 }, [x0], #16
   1436   %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   1437   %tmp = getelementptr i16, i16* %A, i32 8
   1438   store i16* %tmp, i16** %ptr
   1439   ret { <4 x i16>, <4 x i16> } %ld1x2
   1440 }
   1441 
   1442 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
   1443 ;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
   1444 ;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
   1445   %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
   1446   %tmp = getelementptr i16, i16* %A, i64 %inc
   1447   store i16* %tmp, i16** %ptr
   1448   ret { <4 x i16>, <4 x i16> } %ld1x2
   1449 }
   1450 
   1451 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
   1452 
   1453 
   1454 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
   1455 ;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
   1456 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
   1457   %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   1458   %tmp = getelementptr i32, i32* %A, i32 8
   1459   store i32* %tmp, i32** %ptr
   1460   ret { <4 x i32>, <4 x i32> } %ld1x2
   1461 }
   1462 
   1463 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
   1464 ;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
   1465 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
   1466   %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
   1467   %tmp = getelementptr i32, i32* %A, i64 %inc
   1468   store i32* %tmp, i32** %ptr
   1469   ret { <4 x i32>, <4 x i32> } %ld1x2
   1470 }
   1471 
   1472 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
   1473 
   1474 
   1475 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
   1476 ;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
   1477 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
   1478   %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   1479   %tmp = getelementptr i32, i32* %A, i32 4
   1480   store i32* %tmp, i32** %ptr
   1481   ret { <2 x i32>, <2 x i32> } %ld1x2
   1482 }
   1483 
   1484 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
   1485 ;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
   1486 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
   1487   %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
   1488   %tmp = getelementptr i32, i32* %A, i64 %inc
   1489   store i32* %tmp, i32** %ptr
   1490   ret { <2 x i32>, <2 x i32> } %ld1x2
   1491 }
   1492 
   1493 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
   1494 
   1495 
   1496 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
   1497 ;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
   1498 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
   1499   %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   1500   %tmp = getelementptr i64, i64* %A, i32 4
   1501   store i64* %tmp, i64** %ptr
   1502   ret { <2 x i64>, <2 x i64> } %ld1x2
   1503 }
   1504 
   1505 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
   1506 ;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
   1507 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
   1508   %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
   1509   %tmp = getelementptr i64, i64* %A, i64 %inc
   1510   store i64* %tmp, i64** %ptr
   1511   ret { <2 x i64>, <2 x i64> } %ld1x2
   1512 }
   1513 
   1514 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
   1515 
   1516 
   1517 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
   1518 ;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
   1519 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
   1520   %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   1521   %tmp = getelementptr i64, i64* %A, i32 2
   1522   store i64* %tmp, i64** %ptr
   1523   ret { <1 x i64>, <1 x i64> } %ld1x2
   1524 }
   1525 
   1526 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
   1527 ;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
   1528 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   1529   %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
   1530   %tmp = getelementptr i64, i64* %A, i64 %inc
   1531   store i64* %tmp, i64** %ptr
   1532   ret { <1 x i64>, <1 x i64> } %ld1x2
   1533 }
   1534 
   1535 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
   1536 
   1537 
   1538 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
   1539 ;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
   1540 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
   1541   %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   1542   %tmp = getelementptr float, float* %A, i32 8
   1543   store float* %tmp, float** %ptr
   1544   ret { <4 x float>, <4 x float> } %ld1x2
   1545 }
   1546 
   1547 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
   1548 ;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
   1549 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
   1550   %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
   1551   %tmp = getelementptr float, float* %A, i64 %inc
   1552   store float* %tmp, float** %ptr
   1553   ret { <4 x float>, <4 x float> } %ld1x2
   1554 }
   1555 
   1556 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
   1557 
   1558 
   1559 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
   1560 ;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
   1561 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
   1562   %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   1563   %tmp = getelementptr float, float* %A, i32 4
   1564   store float* %tmp, float** %ptr
   1565   ret { <2 x float>, <2 x float> } %ld1x2
   1566 }
   1567 
   1568 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
   1569 ;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
   1570 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
   1571   %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
   1572   %tmp = getelementptr float, float* %A, i64 %inc
   1573   store float* %tmp, float** %ptr
   1574   ret { <2 x float>, <2 x float> } %ld1x2
   1575 }
   1576 
   1577 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
   1578 
   1579 
   1580 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
   1581 ;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
   1582 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
   1583   %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   1584   %tmp = getelementptr double, double* %A, i32 4
   1585   store double* %tmp, double** %ptr
   1586   ret { <2 x double>, <2 x double> } %ld1x2
   1587 }
   1588 
   1589 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
   1590 ;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
   1591 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
   1592   %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
   1593   %tmp = getelementptr double, double* %A, i64 %inc
   1594   store double* %tmp, double** %ptr
   1595   ret { <2 x double>, <2 x double> } %ld1x2
   1596 }
   1597 
   1598 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
   1599 
   1600 
   1601 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
   1602 ;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
   1603 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
   1604   %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   1605   %tmp = getelementptr double, double* %A, i32 2
   1606   store double* %tmp, double** %ptr
   1607   ret { <1 x double>, <1 x double> } %ld1x2
   1608 }
   1609 
   1610 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
   1611 ;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
   1612 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   1613   %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
   1614   %tmp = getelementptr double, double* %A, i64 %inc
   1615   store double* %tmp, double** %ptr
   1616   ret { <1 x double>, <1 x double> } %ld1x2
   1617 }
   1618 
   1619 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
   1620 
   1621 
   1622 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
   1623 ;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
   1624 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
   1625   %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   1626   %tmp = getelementptr i8, i8* %A, i32 48
   1627   store i8* %tmp, i8** %ptr
   1628   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
   1629 }
   1630 
   1631 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
   1632 ;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
   1633 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1634   %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
   1635   %tmp = getelementptr i8, i8* %A, i64 %inc
   1636   store i8* %tmp, i8** %ptr
   1637   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
   1638 }
   1639 
   1640 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
   1641 
   1642 
   1643 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
   1644 ;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
   1645 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
   1646   %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   1647   %tmp = getelementptr i8, i8* %A, i32 24
   1648   store i8* %tmp, i8** %ptr
   1649   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
   1650 }
   1651 
   1652 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
   1653 ;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
   1654 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1655   %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
   1656   %tmp = getelementptr i8, i8* %A, i64 %inc
   1657   store i8* %tmp, i8** %ptr
   1658   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
   1659 }
   1660 
   1661 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
   1662 
   1663 
   1664 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
   1665 ;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
   1666 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
   1667   %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   1668   %tmp = getelementptr i16, i16* %A, i32 24
   1669   store i16* %tmp, i16** %ptr
   1670   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
   1671 }
   1672 
   1673 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
   1674 ;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
   1675 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1676   %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
   1677   %tmp = getelementptr i16, i16* %A, i64 %inc
   1678   store i16* %tmp, i16** %ptr
   1679   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
   1680 }
   1681 
   1682 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
   1683 
   1684 
   1685 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
   1686 ;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
   1687 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
   1688   %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   1689   %tmp = getelementptr i16, i16* %A, i32 12
   1690   store i16* %tmp, i16** %ptr
   1691   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
   1692 }
   1693 
   1694 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
   1695 ;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
   1696 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1697   %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
   1698   %tmp = getelementptr i16, i16* %A, i64 %inc
   1699   store i16* %tmp, i16** %ptr
   1700   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
   1701 }
   1702 
   1703 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
   1704 
   1705 
   1706 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
   1707 ;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
   1708 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
   1709   %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   1710   %tmp = getelementptr i32, i32* %A, i32 12
   1711   store i32* %tmp, i32** %ptr
   1712   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
   1713 }
   1714 
   1715 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
   1716 ;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
   1717 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1718   %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
   1719   %tmp = getelementptr i32, i32* %A, i64 %inc
   1720   store i32* %tmp, i32** %ptr
   1721   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
   1722 }
   1723 
   1724 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
   1725 
   1726 
   1727 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
   1728 ;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
   1729 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
   1730   %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   1731   %tmp = getelementptr i32, i32* %A, i32 6
   1732   store i32* %tmp, i32** %ptr
   1733   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
   1734 }
   1735 
   1736 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
   1737 ;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
   1738 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1739   %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
   1740   %tmp = getelementptr i32, i32* %A, i64 %inc
   1741   store i32* %tmp, i32** %ptr
   1742   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
   1743 }
   1744 
   1745 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
   1746 
   1747 
   1748 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
   1749 ;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
   1750 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
   1751   %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   1752   %tmp = getelementptr i64, i64* %A, i32 6
   1753   store i64* %tmp, i64** %ptr
   1754   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
   1755 }
   1756 
   1757 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
   1758 ;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
   1759 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1760   %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
   1761   %tmp = getelementptr i64, i64* %A, i64 %inc
   1762   store i64* %tmp, i64** %ptr
   1763   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
   1764 }
   1765 
   1766 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
   1767 
   1768 
   1769 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
   1770 ;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
   1771 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
   1772   %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   1773   %tmp = getelementptr i64, i64* %A, i32 3
   1774   store i64* %tmp, i64** %ptr
   1775   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
   1776 }
   1777 
   1778 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
   1779 ;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
   1780 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1781   %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
   1782   %tmp = getelementptr i64, i64* %A, i64 %inc
   1783   store i64* %tmp, i64** %ptr
   1784   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
   1785 }
   1786 
   1787 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
   1788 
   1789 
   1790 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
   1791 ;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
   1792 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
   1793   %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   1794   %tmp = getelementptr float, float* %A, i32 12
   1795   store float* %tmp, float** %ptr
   1796   ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
   1797 }
   1798 
   1799 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
   1800 ;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
   1801 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1802   %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
   1803   %tmp = getelementptr float, float* %A, i64 %inc
   1804   store float* %tmp, float** %ptr
   1805   ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
   1806 }
   1807 
   1808 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
   1809 
   1810 
   1811 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
   1812 ;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
   1813 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
   1814   %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   1815   %tmp = getelementptr float, float* %A, i32 6
   1816   store float* %tmp, float** %ptr
   1817   ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
   1818 }
   1819 
   1820 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
   1821 ;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
   1822 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1823   %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
   1824   %tmp = getelementptr float, float* %A, i64 %inc
   1825   store float* %tmp, float** %ptr
   1826   ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
   1827 }
   1828 
   1829 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
   1830 
   1831 
   1832 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
   1833 ;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
   1834 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
   1835   %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   1836   %tmp = getelementptr double, double* %A, i32 6
   1837   store double* %tmp, double** %ptr
   1838   ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
   1839 }
   1840 
   1841 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
   1842 ;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
   1843 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1844   %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
   1845   %tmp = getelementptr double, double* %A, i64 %inc
   1846   store double* %tmp, double** %ptr
   1847   ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
   1848 }
   1849 
   1850 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
   1851 
   1852 
   1853 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
   1854 ;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
   1855 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
   1856   %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   1857   %tmp = getelementptr double, double* %A, i32 3
   1858   store double* %tmp, double** %ptr
   1859   ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
   1860 }
   1861 
   1862 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
   1863 ;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
   1864 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   1865   %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
   1866   %tmp = getelementptr double, double* %A, i64 %inc
   1867   store double* %tmp, double** %ptr
   1868   ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
   1869 }
   1870 
   1871 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
   1872 
   1873 
   1874 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
   1875 ;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
   1876 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
   1877   %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   1878   %tmp = getelementptr i8, i8* %A, i32 64
   1879   store i8* %tmp, i8** %ptr
   1880   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
   1881 }
   1882 
   1883 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
   1884 ;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
   1885 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1886   %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
   1887   %tmp = getelementptr i8, i8* %A, i64 %inc
   1888   store i8* %tmp, i8** %ptr
   1889   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
   1890 }
   1891 
   1892 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
   1893 
   1894 
   1895 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
   1896 ;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
   1897 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
   1898   %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   1899   %tmp = getelementptr i8, i8* %A, i32 32
   1900   store i8* %tmp, i8** %ptr
   1901   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
   1902 }
   1903 
   1904 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
   1905 ;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
   1906 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1907   %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
   1908   %tmp = getelementptr i8, i8* %A, i64 %inc
   1909   store i8* %tmp, i8** %ptr
   1910   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
   1911 }
   1912 
   1913 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
   1914 
   1915 
   1916 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
   1917 ;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
   1918 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
   1919   %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   1920   %tmp = getelementptr i16, i16* %A, i32 32
   1921   store i16* %tmp, i16** %ptr
   1922   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
   1923 }
   1924 
   1925 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
   1926 ;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
   1927 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1928   %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
   1929   %tmp = getelementptr i16, i16* %A, i64 %inc
   1930   store i16* %tmp, i16** %ptr
   1931   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
   1932 }
   1933 
   1934 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
   1935 
   1936 
   1937 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
   1938 ;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
   1939 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
   1940   %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   1941   %tmp = getelementptr i16, i16* %A, i32 16
   1942   store i16* %tmp, i16** %ptr
   1943   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
   1944 }
   1945 
   1946 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
   1947 ;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
   1948 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1949   %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
   1950   %tmp = getelementptr i16, i16* %A, i64 %inc
   1951   store i16* %tmp, i16** %ptr
   1952   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
   1953 }
   1954 
   1955 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
   1956 
   1957 
   1958 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
   1959 ;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
   1960 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
   1961   %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   1962   %tmp = getelementptr i32, i32* %A, i32 16
   1963   store i32* %tmp, i32** %ptr
   1964   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
   1965 }
   1966 
   1967 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
   1968 ;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
   1969 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1970   %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
   1971   %tmp = getelementptr i32, i32* %A, i64 %inc
   1972   store i32* %tmp, i32** %ptr
   1973   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
   1974 }
   1975 
   1976 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
   1977 
   1978 
   1979 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
   1980 ;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
   1981 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
   1982   %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   1983   %tmp = getelementptr i32, i32* %A, i32 8
   1984   store i32* %tmp, i32** %ptr
   1985   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
   1986 }
   1987 
   1988 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
   1989 ;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
   1990 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   1991   %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
   1992   %tmp = getelementptr i32, i32* %A, i64 %inc
   1993   store i32* %tmp, i32** %ptr
   1994   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
   1995 }
   1996 
   1997 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
   1998 
   1999 
   2000 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
   2001 ;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
   2002 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
   2003   %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   2004   %tmp = getelementptr i64, i64* %A, i32 8
   2005   store i64* %tmp, i64** %ptr
   2006   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
   2007 }
   2008 
   2009 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
   2010 ;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
   2011 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2012   %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
   2013   %tmp = getelementptr i64, i64* %A, i64 %inc
   2014   store i64* %tmp, i64** %ptr
   2015   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
   2016 }
   2017 
   2018 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
   2019 
   2020 
   2021 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
   2022 ;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
   2023 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
   2024   %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   2025   %tmp = getelementptr i64, i64* %A, i32 4
   2026   store i64* %tmp, i64** %ptr
   2027   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
   2028 }
   2029 
   2030 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
   2031 ;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
   2032 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2033   %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
   2034   %tmp = getelementptr i64, i64* %A, i64 %inc
   2035   store i64* %tmp, i64** %ptr
   2036   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
   2037 }
   2038 
   2039 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
   2040 
   2041 
   2042 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
   2043 ;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
   2044 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
   2045   %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   2046   %tmp = getelementptr float, float* %A, i32 16
   2047   store float* %tmp, float** %ptr
   2048   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
   2049 }
   2050 
   2051 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
   2052 ;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
   2053 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2054   %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
   2055   %tmp = getelementptr float, float* %A, i64 %inc
   2056   store float* %tmp, float** %ptr
   2057   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
   2058 }
   2059 
   2060 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
   2061 
   2062 
   2063 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
   2064 ;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
   2065 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
   2066   %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   2067   %tmp = getelementptr float, float* %A, i32 8
   2068   store float* %tmp, float** %ptr
   2069   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
   2070 }
   2071 
   2072 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
   2073 ;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
   2074 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2075   %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
   2076   %tmp = getelementptr float, float* %A, i64 %inc
   2077   store float* %tmp, float** %ptr
   2078   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
   2079 }
   2080 
   2081 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
   2082 
   2083 
   2084 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
   2085 ;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
   2086 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
   2087   %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   2088   %tmp = getelementptr double, double* %A, i32 8
   2089   store double* %tmp, double** %ptr
   2090   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
   2091 }
   2092 
   2093 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
   2094 ;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
   2095 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2096   %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
   2097   %tmp = getelementptr double, double* %A, i64 %inc
   2098   store double* %tmp, double** %ptr
   2099   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
   2100 }
   2101 
   2102 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
   2103 
   2104 
   2105 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
   2106 ;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
   2107 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
   2108   %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   2109   %tmp = getelementptr double, double* %A, i32 4
   2110   store double* %tmp, double** %ptr
   2111   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
   2112 }
   2113 
   2114 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
   2115 ;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
   2116 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2117   %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
   2118   %tmp = getelementptr double, double* %A, i64 %inc
   2119   store double* %tmp, double** %ptr
   2120   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
   2121 }
   2122 
   2123 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
   2124 
   2125 
   2126 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
   2127 ;CHECK-LABEL: test_v16i8_post_imm_ld2r:
   2128 ;CHECK: ld2r.16b { v0, v1 }, [x0], #2
   2129   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   2130   %tmp = getelementptr i8, i8* %A, i32 2
   2131   store i8* %tmp, i8** %ptr
   2132   ret { <16 x i8>, <16 x i8> } %ld2
   2133 }
   2134 
   2135 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2136 ;CHECK-LABEL: test_v16i8_post_reg_ld2r:
   2137 ;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
   2138   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
   2139   %tmp = getelementptr i8, i8* %A, i64 %inc
   2140   store i8* %tmp, i8** %ptr
   2141   ret { <16 x i8>, <16 x i8> } %ld2
   2142 }
   2143 
   2144 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
   2145 
   2146 
   2147 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
   2148 ;CHECK-LABEL: test_v8i8_post_imm_ld2r:
   2149 ;CHECK: ld2r.8b { v0, v1 }, [x0], #2
   2150   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   2151   %tmp = getelementptr i8, i8* %A, i32 2
   2152   store i8* %tmp, i8** %ptr
   2153   ret { <8 x i8>, <8 x i8> } %ld2
   2154 }
   2155 
   2156 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2157 ;CHECK-LABEL: test_v8i8_post_reg_ld2r:
   2158 ;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
   2159   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
   2160   %tmp = getelementptr i8, i8* %A, i64 %inc
   2161   store i8* %tmp, i8** %ptr
   2162   ret { <8 x i8>, <8 x i8> } %ld2
   2163 }
   2164 
   2165 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
   2166 
   2167 
   2168 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
   2169 ;CHECK-LABEL: test_v8i16_post_imm_ld2r:
   2170 ;CHECK: ld2r.8h { v0, v1 }, [x0], #4
   2171   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   2172   %tmp = getelementptr i16, i16* %A, i32 2
   2173   store i16* %tmp, i16** %ptr
   2174   ret { <8 x i16>, <8 x i16> } %ld2
   2175 }
   2176 
   2177 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2178 ;CHECK-LABEL: test_v8i16_post_reg_ld2r:
   2179 ;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
   2180   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
   2181   %tmp = getelementptr i16, i16* %A, i64 %inc
   2182   store i16* %tmp, i16** %ptr
   2183   ret { <8 x i16>, <8 x i16> } %ld2
   2184 }
   2185 
   2186 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
   2187 
   2188 
   2189 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
   2190 ;CHECK-LABEL: test_v4i16_post_imm_ld2r:
   2191 ;CHECK: ld2r.4h { v0, v1 }, [x0], #4
   2192   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   2193   %tmp = getelementptr i16, i16* %A, i32 2
   2194   store i16* %tmp, i16** %ptr
   2195   ret { <4 x i16>, <4 x i16> } %ld2
   2196 }
   2197 
   2198 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2199 ;CHECK-LABEL: test_v4i16_post_reg_ld2r:
   2200 ;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
   2201   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
   2202   %tmp = getelementptr i16, i16* %A, i64 %inc
   2203   store i16* %tmp, i16** %ptr
   2204   ret { <4 x i16>, <4 x i16> } %ld2
   2205 }
   2206 
   2207 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
   2208 
   2209 
   2210 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
   2211 ;CHECK-LABEL: test_v4i32_post_imm_ld2r:
   2212 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
   2213   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   2214   %tmp = getelementptr i32, i32* %A, i32 2
   2215   store i32* %tmp, i32** %ptr
   2216   ret { <4 x i32>, <4 x i32> } %ld2
   2217 }
   2218 
   2219 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2220 ;CHECK-LABEL: test_v4i32_post_reg_ld2r:
   2221 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
   2222   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
   2223   %tmp = getelementptr i32, i32* %A, i64 %inc
   2224   store i32* %tmp, i32** %ptr
   2225   ret { <4 x i32>, <4 x i32> } %ld2
   2226 }
   2227 
   2228 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
   2229 
   2230 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
   2231 ;CHECK-LABEL: test_v2i32_post_imm_ld2r:
   2232 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
   2233   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   2234   %tmp = getelementptr i32, i32* %A, i32 2
   2235   store i32* %tmp, i32** %ptr
   2236   ret { <2 x i32>, <2 x i32> } %ld2
   2237 }
   2238 
   2239 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2240 ;CHECK-LABEL: test_v2i32_post_reg_ld2r:
   2241 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
   2242   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
   2243   %tmp = getelementptr i32, i32* %A, i64 %inc
   2244   store i32* %tmp, i32** %ptr
   2245   ret { <2 x i32>, <2 x i32> } %ld2
   2246 }
   2247 
   2248 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
   2249 
   2250 
   2251 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
   2252 ;CHECK-LABEL: test_v2i64_post_imm_ld2r:
   2253 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
   2254   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   2255   %tmp = getelementptr i64, i64* %A, i32 2
   2256   store i64* %tmp, i64** %ptr
   2257   ret { <2 x i64>, <2 x i64> } %ld2
   2258 }
   2259 
   2260 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2261 ;CHECK-LABEL: test_v2i64_post_reg_ld2r:
   2262 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
   2263   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
   2264   %tmp = getelementptr i64, i64* %A, i64 %inc
   2265   store i64* %tmp, i64** %ptr
   2266   ret { <2 x i64>, <2 x i64> } %ld2
   2267 }
   2268 
   2269 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
   2270 
   2271 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
   2272 ;CHECK-LABEL: test_v1i64_post_imm_ld2r:
   2273 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
   2274   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   2275   %tmp = getelementptr i64, i64* %A, i32 2
   2276   store i64* %tmp, i64** %ptr
   2277   ret { <1 x i64>, <1 x i64> } %ld2
   2278 }
   2279 
   2280 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2281 ;CHECK-LABEL: test_v1i64_post_reg_ld2r:
   2282 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
   2283   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
   2284   %tmp = getelementptr i64, i64* %A, i64 %inc
   2285   store i64* %tmp, i64** %ptr
   2286   ret { <1 x i64>, <1 x i64> } %ld2
   2287 }
   2288 
   2289 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
   2290 
   2291 
   2292 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
   2293 ;CHECK-LABEL: test_v4f32_post_imm_ld2r:
   2294 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
   2295   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   2296   %tmp = getelementptr float, float* %A, i32 2
   2297   store float* %tmp, float** %ptr
   2298   ret { <4 x float>, <4 x float> } %ld2
   2299 }
   2300 
   2301 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
   2302 ;CHECK-LABEL: test_v4f32_post_reg_ld2r:
   2303 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
   2304   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
   2305   %tmp = getelementptr float, float* %A, i64 %inc
   2306   store float* %tmp, float** %ptr
   2307   ret { <4 x float>, <4 x float> } %ld2
   2308 }
   2309 
   2310 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
   2311 
   2312 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
   2313 ;CHECK-LABEL: test_v2f32_post_imm_ld2r:
   2314 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
   2315   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   2316   %tmp = getelementptr float, float* %A, i32 2
   2317   store float* %tmp, float** %ptr
   2318   ret { <2 x float>, <2 x float> } %ld2
   2319 }
   2320 
   2321 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
   2322 ;CHECK-LABEL: test_v2f32_post_reg_ld2r:
   2323 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
   2324   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
   2325   %tmp = getelementptr float, float* %A, i64 %inc
   2326   store float* %tmp, float** %ptr
   2327   ret { <2 x float>, <2 x float> } %ld2
   2328 }
   2329 
   2330 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
   2331 
   2332 
   2333 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
   2334 ;CHECK-LABEL: test_v2f64_post_imm_ld2r:
   2335 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
   2336   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   2337   %tmp = getelementptr double, double* %A, i32 2
   2338   store double* %tmp, double** %ptr
   2339   ret { <2 x double>, <2 x double> } %ld2
   2340 }
   2341 
   2342 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
   2343 ;CHECK-LABEL: test_v2f64_post_reg_ld2r:
   2344 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
   2345   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
   2346   %tmp = getelementptr double, double* %A, i64 %inc
   2347   store double* %tmp, double** %ptr
   2348   ret { <2 x double>, <2 x double> } %ld2
   2349 }
   2350 
   2351 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
   2352 
   2353 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
   2354 ;CHECK-LABEL: test_v1f64_post_imm_ld2r:
   2355 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
   2356   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   2357   %tmp = getelementptr double, double* %A, i32 2
   2358   store double* %tmp, double** %ptr
   2359   ret { <1 x double>, <1 x double> } %ld2
   2360 }
   2361 
   2362 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
   2363 ;CHECK-LABEL: test_v1f64_post_reg_ld2r:
   2364 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
   2365   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
   2366   %tmp = getelementptr double, double* %A, i64 %inc
   2367   store double* %tmp, double** %ptr
   2368   ret { <1 x double>, <1 x double> } %ld2
   2369 }
   2370 
   2371 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
   2372 
   2373 
   2374 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
   2375 ;CHECK-LABEL: test_v16i8_post_imm_ld3r:
   2376 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
   2377   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   2378   %tmp = getelementptr i8, i8* %A, i32 3
   2379   store i8* %tmp, i8** %ptr
   2380   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
   2381 }
   2382 
   2383 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2384 ;CHECK-LABEL: test_v16i8_post_reg_ld3r:
   2385 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2386   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
   2387   %tmp = getelementptr i8, i8* %A, i64 %inc
   2388   store i8* %tmp, i8** %ptr
   2389   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
   2390 }
   2391 
   2392 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
   2393 
   2394 
   2395 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
   2396 ;CHECK-LABEL: test_v8i8_post_imm_ld3r:
   2397 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
   2398   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   2399   %tmp = getelementptr i8, i8* %A, i32 3
   2400   store i8* %tmp, i8** %ptr
   2401   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
   2402 }
   2403 
   2404 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2405 ;CHECK-LABEL: test_v8i8_post_reg_ld3r:
   2406 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2407   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
   2408   %tmp = getelementptr i8, i8* %A, i64 %inc
   2409   store i8* %tmp, i8** %ptr
   2410   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
   2411 }
   2412 
   2413 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
   2414 
   2415 
   2416 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
   2417 ;CHECK-LABEL: test_v8i16_post_imm_ld3r:
   2418 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
   2419   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   2420   %tmp = getelementptr i16, i16* %A, i32 3
   2421   store i16* %tmp, i16** %ptr
   2422   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
   2423 }
   2424 
   2425 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2426 ;CHECK-LABEL: test_v8i16_post_reg_ld3r:
   2427 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2428   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
   2429   %tmp = getelementptr i16, i16* %A, i64 %inc
   2430   store i16* %tmp, i16** %ptr
   2431   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
   2432 }
   2433 
   2434 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
   2435 
   2436 
   2437 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
   2438 ;CHECK-LABEL: test_v4i16_post_imm_ld3r:
   2439 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
   2440   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   2441   %tmp = getelementptr i16, i16* %A, i32 3
   2442   store i16* %tmp, i16** %ptr
   2443   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
   2444 }
   2445 
   2446 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2447 ;CHECK-LABEL: test_v4i16_post_reg_ld3r:
   2448 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2449   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
   2450   %tmp = getelementptr i16, i16* %A, i64 %inc
   2451   store i16* %tmp, i16** %ptr
   2452   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
   2453 }
   2454 
   2455 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
   2456 
   2457 
   2458 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
   2459 ;CHECK-LABEL: test_v4i32_post_imm_ld3r:
   2460 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
   2461   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   2462   %tmp = getelementptr i32, i32* %A, i32 3
   2463   store i32* %tmp, i32** %ptr
   2464   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
   2465 }
   2466 
   2467 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2468 ;CHECK-LABEL: test_v4i32_post_reg_ld3r:
   2469 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2470   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
   2471   %tmp = getelementptr i32, i32* %A, i64 %inc
   2472   store i32* %tmp, i32** %ptr
   2473   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
   2474 }
   2475 
   2476 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
   2477 
   2478 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
   2479 ;CHECK-LABEL: test_v2i32_post_imm_ld3r:
   2480 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
   2481   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   2482   %tmp = getelementptr i32, i32* %A, i32 3
   2483   store i32* %tmp, i32** %ptr
   2484   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
   2485 }
   2486 
   2487 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2488 ;CHECK-LABEL: test_v2i32_post_reg_ld3r:
   2489 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2490   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
   2491   %tmp = getelementptr i32, i32* %A, i64 %inc
   2492   store i32* %tmp, i32** %ptr
   2493   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
   2494 }
   2495 
   2496 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
   2497 
   2498 
   2499 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
   2500 ;CHECK-LABEL: test_v2i64_post_imm_ld3r:
   2501 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
   2502   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   2503   %tmp = getelementptr i64, i64* %A, i32 3
   2504   store i64* %tmp, i64** %ptr
   2505   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   2506 }
   2507 
   2508 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2509 ;CHECK-LABEL: test_v2i64_post_reg_ld3r:
   2510 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2511   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
   2512   %tmp = getelementptr i64, i64* %A, i64 %inc
   2513   store i64* %tmp, i64** %ptr
   2514   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   2515 }
   2516 
   2517 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
   2518 
   2519 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
   2520 ;CHECK-LABEL: test_v1i64_post_imm_ld3r:
   2521 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
   2522   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   2523   %tmp = getelementptr i64, i64* %A, i32 3
   2524   store i64* %tmp, i64** %ptr
   2525   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   2526 }
   2527 
   2528 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2529 ;CHECK-LABEL: test_v1i64_post_reg_ld3r:
   2530 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2531   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
   2532   %tmp = getelementptr i64, i64* %A, i64 %inc
   2533   store i64* %tmp, i64** %ptr
   2534   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   2535 }
   2536 
   2537 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
   2538 
   2539 
   2540 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
   2541 ;CHECK-LABEL: test_v4f32_post_imm_ld3r:
   2542 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
   2543   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   2544   %tmp = getelementptr float, float* %A, i32 3
   2545   store float* %tmp, float** %ptr
   2546   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   2547 }
   2548 
   2549 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
   2550 ;CHECK-LABEL: test_v4f32_post_reg_ld3r:
   2551 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2552   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
   2553   %tmp = getelementptr float, float* %A, i64 %inc
   2554   store float* %tmp, float** %ptr
   2555   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   2556 }
   2557 
   2558 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
   2559 
   2560 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
   2561 ;CHECK-LABEL: test_v2f32_post_imm_ld3r:
   2562 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
   2563   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   2564   %tmp = getelementptr float, float* %A, i32 3
   2565   store float* %tmp, float** %ptr
   2566   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   2567 }
   2568 
   2569 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
   2570 ;CHECK-LABEL: test_v2f32_post_reg_ld3r:
   2571 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2572   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
   2573   %tmp = getelementptr float, float* %A, i64 %inc
   2574   store float* %tmp, float** %ptr
   2575   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   2576 }
   2577 
   2578 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
   2579 
   2580 
   2581 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
   2582 ;CHECK-LABEL: test_v2f64_post_imm_ld3r:
   2583 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
   2584   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   2585   %tmp = getelementptr double, double* %A, i32 3
   2586   store double* %tmp, double** %ptr
   2587   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   2588 }
   2589 
   2590 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
   2591 ;CHECK-LABEL: test_v2f64_post_reg_ld3r:
   2592 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2593   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
   2594   %tmp = getelementptr double, double* %A, i64 %inc
   2595   store double* %tmp, double** %ptr
   2596   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   2597 }
   2598 
   2599 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
   2600 
   2601 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
   2602 ;CHECK-LABEL: test_v1f64_post_imm_ld3r:
   2603 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
   2604   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   2605   %tmp = getelementptr double, double* %A, i32 3
   2606   store double* %tmp, double** %ptr
   2607   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   2608 }
   2609 
   2610 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
   2611 ;CHECK-LABEL: test_v1f64_post_reg_ld3r:
   2612 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   2613   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
   2614   %tmp = getelementptr double, double* %A, i64 %inc
   2615   store double* %tmp, double** %ptr
   2616   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   2617 }
   2618 
   2619 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
   2620 
   2621 
   2622 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
   2623 ;CHECK-LABEL: test_v16i8_post_imm_ld4r:
   2624 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
   2625   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   2626   %tmp = getelementptr i8, i8* %A, i32 4
   2627   store i8* %tmp, i8** %ptr
   2628   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   2629 }
   2630 
   2631 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2632 ;CHECK-LABEL: test_v16i8_post_reg_ld4r:
   2633 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2634   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
   2635   %tmp = getelementptr i8, i8* %A, i64 %inc
   2636   store i8* %tmp, i8** %ptr
   2637   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   2638 }
   2639 
   2640 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
   2641 
   2642 
   2643 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
   2644 ;CHECK-LABEL: test_v8i8_post_imm_ld4r:
   2645 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
   2646   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   2647   %tmp = getelementptr i8, i8* %A, i32 4
   2648   store i8* %tmp, i8** %ptr
   2649   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   2650 }
   2651 
   2652 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
   2653 ;CHECK-LABEL: test_v8i8_post_reg_ld4r:
   2654 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2655   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
   2656   %tmp = getelementptr i8, i8* %A, i64 %inc
   2657   store i8* %tmp, i8** %ptr
   2658   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   2659 }
   2660 
   2661 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
   2662 
   2663 
   2664 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
   2665 ;CHECK-LABEL: test_v8i16_post_imm_ld4r:
   2666 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
   2667   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   2668   %tmp = getelementptr i16, i16* %A, i32 4
   2669   store i16* %tmp, i16** %ptr
   2670   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   2671 }
   2672 
   2673 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2674 ;CHECK-LABEL: test_v8i16_post_reg_ld4r:
   2675 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2676   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
   2677   %tmp = getelementptr i16, i16* %A, i64 %inc
   2678   store i16* %tmp, i16** %ptr
   2679   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   2680 }
   2681 
   2682 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
   2683 
   2684 
   2685 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
   2686 ;CHECK-LABEL: test_v4i16_post_imm_ld4r:
   2687 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
   2688   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   2689   %tmp = getelementptr i16, i16* %A, i32 4
   2690   store i16* %tmp, i16** %ptr
   2691   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   2692 }
   2693 
   2694 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
   2695 ;CHECK-LABEL: test_v4i16_post_reg_ld4r:
   2696 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2697   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
   2698   %tmp = getelementptr i16, i16* %A, i64 %inc
   2699   store i16* %tmp, i16** %ptr
   2700   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   2701 }
   2702 
   2703 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
   2704 
   2705 
   2706 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
   2707 ;CHECK-LABEL: test_v4i32_post_imm_ld4r:
   2708 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
   2709   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   2710   %tmp = getelementptr i32, i32* %A, i32 4
   2711   store i32* %tmp, i32** %ptr
   2712   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   2713 }
   2714 
   2715 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2716 ;CHECK-LABEL: test_v4i32_post_reg_ld4r:
   2717 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2718   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
   2719   %tmp = getelementptr i32, i32* %A, i64 %inc
   2720   store i32* %tmp, i32** %ptr
   2721   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   2722 }
   2723 
   2724 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
   2725 
   2726 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
   2727 ;CHECK-LABEL: test_v2i32_post_imm_ld4r:
   2728 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
   2729   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   2730   %tmp = getelementptr i32, i32* %A, i32 4
   2731   store i32* %tmp, i32** %ptr
   2732   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   2733 }
   2734 
   2735 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
   2736 ;CHECK-LABEL: test_v2i32_post_reg_ld4r:
   2737 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2738   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
   2739   %tmp = getelementptr i32, i32* %A, i64 %inc
   2740   store i32* %tmp, i32** %ptr
   2741   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   2742 }
   2743 
   2744 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
   2745 
   2746 
   2747 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
   2748 ;CHECK-LABEL: test_v2i64_post_imm_ld4r:
   2749 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
   2750   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   2751   %tmp = getelementptr i64, i64* %A, i32 4
   2752   store i64* %tmp, i64** %ptr
   2753   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   2754 }
   2755 
   2756 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2757 ;CHECK-LABEL: test_v2i64_post_reg_ld4r:
   2758 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2759   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
   2760   %tmp = getelementptr i64, i64* %A, i64 %inc
   2761   store i64* %tmp, i64** %ptr
   2762   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   2763 }
   2764 
   2765 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
   2766 
   2767 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
   2768 ;CHECK-LABEL: test_v1i64_post_imm_ld4r:
   2769 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
   2770   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   2771   %tmp = getelementptr i64, i64* %A, i32 4
   2772   store i64* %tmp, i64** %ptr
   2773   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   2774 }
   2775 
   2776 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
   2777 ;CHECK-LABEL: test_v1i64_post_reg_ld4r:
   2778 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2779   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
   2780   %tmp = getelementptr i64, i64* %A, i64 %inc
   2781   store i64* %tmp, i64** %ptr
   2782   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   2783 }
   2784 
   2785 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
   2786 
   2787 
   2788 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
   2789 ;CHECK-LABEL: test_v4f32_post_imm_ld4r:
   2790 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
   2791   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   2792   %tmp = getelementptr float, float* %A, i32 4
   2793   store float* %tmp, float** %ptr
   2794   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   2795 }
   2796 
   2797 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
   2798 ;CHECK-LABEL: test_v4f32_post_reg_ld4r:
   2799 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2800   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
   2801   %tmp = getelementptr float, float* %A, i64 %inc
   2802   store float* %tmp, float** %ptr
   2803   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   2804 }
   2805 
   2806 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
   2807 
   2808 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
   2809 ;CHECK-LABEL: test_v2f32_post_imm_ld4r:
   2810 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
   2811   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   2812   %tmp = getelementptr float, float* %A, i32 4
   2813   store float* %tmp, float** %ptr
   2814   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   2815 }
   2816 
   2817 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
   2818 ;CHECK-LABEL: test_v2f32_post_reg_ld4r:
   2819 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2820   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
   2821   %tmp = getelementptr float, float* %A, i64 %inc
   2822   store float* %tmp, float** %ptr
   2823   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   2824 }
   2825 
   2826 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
   2827 
   2828 
   2829 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
   2830 ;CHECK-LABEL: test_v2f64_post_imm_ld4r:
   2831 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
   2832   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   2833   %tmp = getelementptr double, double* %A, i32 4
   2834   store double* %tmp, double** %ptr
   2835   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   2836 }
   2837 
   2838 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
   2839 ;CHECK-LABEL: test_v2f64_post_reg_ld4r:
   2840 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2841   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
   2842   %tmp = getelementptr double, double* %A, i64 %inc
   2843   store double* %tmp, double** %ptr
   2844   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   2845 }
   2846 
   2847 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
   2848 
   2849 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
   2850 ;CHECK-LABEL: test_v1f64_post_imm_ld4r:
   2851 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
   2852   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   2853   %tmp = getelementptr double, double* %A, i32 4
   2854   store double* %tmp, double** %ptr
   2855   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   2856 }
   2857 
   2858 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
   2859 ;CHECK-LABEL: test_v1f64_post_reg_ld4r:
   2860 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   2861   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
   2862   %tmp = getelementptr double, double* %A, i64 %inc
   2863   store double* %tmp, double** %ptr
   2864   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   2865 }
   2866 
   2867 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
   2868 
   2869 
   2870 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
   2871 ;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
   2872 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
   2873   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   2874   %tmp = getelementptr i8, i8* %A, i32 2
   2875   store i8* %tmp, i8** %ptr
   2876   ret { <16 x i8>, <16 x i8> } %ld2
   2877 }
   2878 
   2879 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
   2880 ;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
   2881 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
   2882   %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   2883   %tmp = getelementptr i8, i8* %A, i64 %inc
   2884   store i8* %tmp, i8** %ptr
   2885   ret { <16 x i8>, <16 x i8> } %ld2
   2886 }
   2887 
   2888 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
   2889 
   2890 
   2891 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
   2892 ;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
   2893 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
   2894   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   2895   %tmp = getelementptr i8, i8* %A, i32 2
   2896   store i8* %tmp, i8** %ptr
   2897   ret { <8 x i8>, <8 x i8> } %ld2
   2898 }
   2899 
   2900 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
   2901 ;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
   2902 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
   2903   %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   2904   %tmp = getelementptr i8, i8* %A, i64 %inc
   2905   store i8* %tmp, i8** %ptr
   2906   ret { <8 x i8>, <8 x i8> } %ld2
   2907 }
   2908 
   2909 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
   2910 
   2911 
   2912 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
   2913 ;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
   2914 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
   2915   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   2916   %tmp = getelementptr i16, i16* %A, i32 2
   2917   store i16* %tmp, i16** %ptr
   2918   ret { <8 x i16>, <8 x i16> } %ld2
   2919 }
   2920 
   2921 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
   2922 ;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
   2923 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
   2924   %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   2925   %tmp = getelementptr i16, i16* %A, i64 %inc
   2926   store i16* %tmp, i16** %ptr
   2927   ret { <8 x i16>, <8 x i16> } %ld2
   2928 }
   2929 
   2930 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
   2931 
   2932 
   2933 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
   2934 ;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
   2935 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
   2936   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   2937   %tmp = getelementptr i16, i16* %A, i32 2
   2938   store i16* %tmp, i16** %ptr
   2939   ret { <4 x i16>, <4 x i16> } %ld2
   2940 }
   2941 
   2942 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
   2943 ;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
   2944 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
   2945   %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   2946   %tmp = getelementptr i16, i16* %A, i64 %inc
   2947   store i16* %tmp, i16** %ptr
   2948   ret { <4 x i16>, <4 x i16> } %ld2
   2949 }
   2950 
   2951 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
   2952 
   2953 
   2954 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
   2955 ;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
   2956 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
   2957   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   2958   %tmp = getelementptr i32, i32* %A, i32 2
   2959   store i32* %tmp, i32** %ptr
   2960   ret { <4 x i32>, <4 x i32> } %ld2
   2961 }
   2962 
   2963 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
   2964 ;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
   2965 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   2966   %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   2967   %tmp = getelementptr i32, i32* %A, i64 %inc
   2968   store i32* %tmp, i32** %ptr
   2969   ret { <4 x i32>, <4 x i32> } %ld2
   2970 }
   2971 
   2972 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
   2973 
   2974 
   2975 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
   2976 ;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
   2977 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
   2978   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   2979   %tmp = getelementptr i32, i32* %A, i32 2
   2980   store i32* %tmp, i32** %ptr
   2981   ret { <2 x i32>, <2 x i32> } %ld2
   2982 }
   2983 
   2984 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
   2985 ;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
   2986 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   2987   %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   2988   %tmp = getelementptr i32, i32* %A, i64 %inc
   2989   store i32* %tmp, i32** %ptr
   2990   ret { <2 x i32>, <2 x i32> } %ld2
   2991 }
   2992 
   2993 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
   2994 
   2995 
   2996 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
   2997 ;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
   2998 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
   2999   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   3000   %tmp = getelementptr i64, i64* %A, i32 2
   3001   store i64* %tmp, i64** %ptr
   3002   ret { <2 x i64>, <2 x i64> } %ld2
   3003 }
   3004 
   3005 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
   3006 ;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
   3007 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   3008   %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   3009   %tmp = getelementptr i64, i64* %A, i64 %inc
   3010   store i64* %tmp, i64** %ptr
   3011   ret { <2 x i64>, <2 x i64> } %ld2
   3012 }
   3013 
   3014 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
   3015 
   3016 
   3017 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
   3018 ;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
   3019 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
   3020   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   3021   %tmp = getelementptr i64, i64* %A, i32 2
   3022   store i64* %tmp, i64** %ptr
   3023   ret { <1 x i64>, <1 x i64> } %ld2
   3024 }
   3025 
   3026 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
   3027 ;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
   3028 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   3029   %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   3030   %tmp = getelementptr i64, i64* %A, i64 %inc
   3031   store i64* %tmp, i64** %ptr
   3032   ret { <1 x i64>, <1 x i64> } %ld2
   3033 }
   3034 
   3035 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
   3036 
   3037 
   3038 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
   3039 ;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
   3040 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
   3041   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   3042   %tmp = getelementptr float, float* %A, i32 2
   3043   store float* %tmp, float** %ptr
   3044   ret { <4 x float>, <4 x float> } %ld2
   3045 }
   3046 
   3047 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
   3048 ;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
   3049 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   3050   %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   3051   %tmp = getelementptr float, float* %A, i64 %inc
   3052   store float* %tmp, float** %ptr
   3053   ret { <4 x float>, <4 x float> } %ld2
   3054 }
   3055 
   3056 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
   3057 
   3058 
   3059 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
   3060 ;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
   3061 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
   3062   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   3063   %tmp = getelementptr float, float* %A, i32 2
   3064   store float* %tmp, float** %ptr
   3065   ret { <2 x float>, <2 x float> } %ld2
   3066 }
   3067 
   3068 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
   3069 ;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
   3070 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   3071   %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   3072   %tmp = getelementptr float, float* %A, i64 %inc
   3073   store float* %tmp, float** %ptr
   3074   ret { <2 x float>, <2 x float> } %ld2
   3075 }
   3076 
   3077 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
   3078 
   3079 
   3080 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
   3081 ;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
   3082 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
   3083   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   3084   %tmp = getelementptr double, double* %A, i32 2
   3085   store double* %tmp, double** %ptr
   3086   ret { <2 x double>, <2 x double> } %ld2
   3087 }
   3088 
   3089 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
   3090 ;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
   3091 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   3092   %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   3093   %tmp = getelementptr double, double* %A, i64 %inc
   3094   store double* %tmp, double** %ptr
   3095   ret { <2 x double>, <2 x double> } %ld2
   3096 }
   3097 
   3098 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
   3099 
   3100 
   3101 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
   3102 ;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
   3103 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
   3104   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   3105   %tmp = getelementptr double, double* %A, i32 2
   3106   store double* %tmp, double** %ptr
   3107   ret { <1 x double>, <1 x double> } %ld2
   3108 }
   3109 
   3110 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
   3111 ;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
   3112 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   3113   %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   3114   %tmp = getelementptr double, double* %A, i64 %inc
   3115   store double* %tmp, double** %ptr
   3116   ret { <1 x double>, <1 x double> } %ld2
   3117 }
   3118 
   3119 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
   3120 
   3121 
   3122 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
   3123 ;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
   3124 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
   3125   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   3126   %tmp = getelementptr i8, i8* %A, i32 3
   3127   store i8* %tmp, i8** %ptr
   3128   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
   3129 }
   3130 
   3131 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
   3132 ;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
   3133 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3134   %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   3135   %tmp = getelementptr i8, i8* %A, i64 %inc
   3136   store i8* %tmp, i8** %ptr
   3137   ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
   3138 }
   3139 
   3140 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
   3141 
   3142 
   3143 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
   3144 ;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
   3145 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
   3146   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   3147   %tmp = getelementptr i8, i8* %A, i32 3
   3148   store i8* %tmp, i8** %ptr
   3149   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
   3150 }
   3151 
   3152 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
   3153 ;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
   3154 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3155   %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   3156   %tmp = getelementptr i8, i8* %A, i64 %inc
   3157   store i8* %tmp, i8** %ptr
   3158   ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
   3159 }
   3160 
   3161 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
   3162 
   3163 
   3164 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
   3165 ;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
   3166 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
   3167   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   3168   %tmp = getelementptr i16, i16* %A, i32 3
   3169   store i16* %tmp, i16** %ptr
   3170   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
   3171 }
   3172 
   3173 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
   3174 ;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
   3175 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3176   %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   3177   %tmp = getelementptr i16, i16* %A, i64 %inc
   3178   store i16* %tmp, i16** %ptr
   3179   ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
   3180 }
   3181 
   3182 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
   3183 
   3184 
   3185 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
   3186 ;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
   3187 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
   3188   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   3189   %tmp = getelementptr i16, i16* %A, i32 3
   3190   store i16* %tmp, i16** %ptr
   3191   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
   3192 }
   3193 
   3194 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
   3195 ;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
   3196 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3197   %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   3198   %tmp = getelementptr i16, i16* %A, i64 %inc
   3199   store i16* %tmp, i16** %ptr
   3200   ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
   3201 }
   3202 
   3203 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
   3204 
   3205 
   3206 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
   3207 ;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
   3208 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
   3209   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   3210   %tmp = getelementptr i32, i32* %A, i32 3
   3211   store i32* %tmp, i32** %ptr
   3212   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
   3213 }
   3214 
   3215 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
   3216 ;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
   3217 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3218   %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   3219   %tmp = getelementptr i32, i32* %A, i64 %inc
   3220   store i32* %tmp, i32** %ptr
   3221   ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
   3222 }
   3223 
   3224 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
   3225 
   3226 
   3227 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
   3228 ;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
   3229 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
   3230   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   3231   %tmp = getelementptr i32, i32* %A, i32 3
   3232   store i32* %tmp, i32** %ptr
   3233   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
   3234 }
   3235 
   3236 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
   3237 ;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
   3238 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3239   %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   3240   %tmp = getelementptr i32, i32* %A, i64 %inc
   3241   store i32* %tmp, i32** %ptr
   3242   ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
   3243 }
   3244 
   3245 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
   3246 
   3247 
   3248 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
   3249 ;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
   3250 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
   3251   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   3252   %tmp = getelementptr i64, i64* %A, i32 3
   3253   store i64* %tmp, i64** %ptr
   3254   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   3255 }
   3256 
   3257 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
   3258 ;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
   3259 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3260   %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   3261   %tmp = getelementptr i64, i64* %A, i64 %inc
   3262   store i64* %tmp, i64** %ptr
   3263   ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
   3264 }
   3265 
   3266 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
   3267 
   3268 
   3269 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
   3270 ;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
   3271 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
   3272   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   3273   %tmp = getelementptr i64, i64* %A, i32 3
   3274   store i64* %tmp, i64** %ptr
   3275   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   3276 }
   3277 
   3278 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
   3279 ;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
   3280 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3281   %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   3282   %tmp = getelementptr i64, i64* %A, i64 %inc
   3283   store i64* %tmp, i64** %ptr
   3284   ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
   3285 }
   3286 
   3287 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
   3288 
   3289 
   3290 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
   3291 ;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
   3292 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
   3293   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   3294   %tmp = getelementptr float, float* %A, i32 3
   3295   store float* %tmp, float** %ptr
   3296   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   3297 }
   3298 
   3299 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
   3300 ;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
   3301 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3302   %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   3303   %tmp = getelementptr float, float* %A, i64 %inc
   3304   store float* %tmp, float** %ptr
   3305   ret { <4 x float>, <4 x float>, <4 x float> } %ld3
   3306 }
   3307 
   3308 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
   3309 
   3310 
   3311 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
   3312 ;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
   3313 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
   3314   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   3315   %tmp = getelementptr float, float* %A, i32 3
   3316   store float* %tmp, float** %ptr
   3317   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   3318 }
   3319 
   3320 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
   3321 ;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
   3322 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3323   %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   3324   %tmp = getelementptr float, float* %A, i64 %inc
   3325   store float* %tmp, float** %ptr
   3326   ret { <2 x float>, <2 x float>, <2 x float> } %ld3
   3327 }
   3328 
   3329 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
   3330 
   3331 
   3332 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
   3333 ;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
   3334 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
   3335   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   3336   %tmp = getelementptr double, double* %A, i32 3
   3337   store double* %tmp, double** %ptr
   3338   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   3339 }
   3340 
   3341 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
   3342 ;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
   3343 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3344   %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   3345   %tmp = getelementptr double, double* %A, i64 %inc
   3346   store double* %tmp, double** %ptr
   3347   ret { <2 x double>, <2 x double>, <2 x double> } %ld3
   3348 }
   3349 
   3350 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
   3351 
   3352 
   3353 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
   3354 ;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
   3355 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
   3356   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   3357   %tmp = getelementptr double, double* %A, i32 3
   3358   store double* %tmp, double** %ptr
   3359   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   3360 }
   3361 
   3362 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
   3363 ;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
   3364 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   3365   %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   3366   %tmp = getelementptr double, double* %A, i64 %inc
   3367   store double* %tmp, double** %ptr
   3368   ret { <1 x double>, <1 x double>, <1 x double> } %ld3
   3369 }
   3370 
   3371 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
   3372 
   3373 
   3374 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
   3375 ;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
   3376 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
   3377   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   3378   %tmp = getelementptr i8, i8* %A, i32 4
   3379   store i8* %tmp, i8** %ptr
   3380   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   3381 }
   3382 
   3383 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
   3384 ;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
   3385 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3386   %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   3387   %tmp = getelementptr i8, i8* %A, i64 %inc
   3388   store i8* %tmp, i8** %ptr
   3389   ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
   3390 }
   3391 
   3392 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
   3393 
   3394 
   3395 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
   3396 ;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
   3397 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
   3398   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   3399   %tmp = getelementptr i8, i8* %A, i32 4
   3400   store i8* %tmp, i8** %ptr
   3401   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   3402 }
   3403 
   3404 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
   3405 ;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
   3406 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3407   %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   3408   %tmp = getelementptr i8, i8* %A, i64 %inc
   3409   store i8* %tmp, i8** %ptr
   3410   ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
   3411 }
   3412 
   3413 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
   3414 
   3415 
   3416 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
   3417 ;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
   3418 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
   3419   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   3420   %tmp = getelementptr i16, i16* %A, i32 4
   3421   store i16* %tmp, i16** %ptr
   3422   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   3423 }
   3424 
   3425 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
   3426 ;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
   3427 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3428   %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   3429   %tmp = getelementptr i16, i16* %A, i64 %inc
   3430   store i16* %tmp, i16** %ptr
   3431   ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
   3432 }
   3433 
   3434 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
   3435 
   3436 
   3437 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
   3438 ;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
   3439 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
   3440   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   3441   %tmp = getelementptr i16, i16* %A, i32 4
   3442   store i16* %tmp, i16** %ptr
   3443   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   3444 }
   3445 
   3446 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
   3447 ;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
   3448 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3449   %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   3450   %tmp = getelementptr i16, i16* %A, i64 %inc
   3451   store i16* %tmp, i16** %ptr
   3452   ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
   3453 }
   3454 
   3455 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
   3456 
   3457 
   3458 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
   3459 ;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
   3460 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
   3461   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   3462   %tmp = getelementptr i32, i32* %A, i32 4
   3463   store i32* %tmp, i32** %ptr
   3464   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   3465 }
   3466 
   3467 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
   3468 ;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
   3469 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3470   %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   3471   %tmp = getelementptr i32, i32* %A, i64 %inc
   3472   store i32* %tmp, i32** %ptr
   3473   ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
   3474 }
   3475 
   3476 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
   3477 
   3478 
   3479 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
   3480 ;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
   3481 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
   3482   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   3483   %tmp = getelementptr i32, i32* %A, i32 4
   3484   store i32* %tmp, i32** %ptr
   3485   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   3486 }
   3487 
   3488 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
   3489 ;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
   3490 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3491   %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   3492   %tmp = getelementptr i32, i32* %A, i64 %inc
   3493   store i32* %tmp, i32** %ptr
   3494   ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
   3495 }
   3496 
   3497 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
   3498 
   3499 
   3500 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
   3501 ;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
   3502 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
   3503   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   3504   %tmp = getelementptr i64, i64* %A, i32 4
   3505   store i64* %tmp, i64** %ptr
   3506   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   3507 }
   3508 
   3509 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
   3510 ;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
   3511 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3512   %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   3513   %tmp = getelementptr i64, i64* %A, i64 %inc
   3514   store i64* %tmp, i64** %ptr
   3515   ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
   3516 }
   3517 
   3518 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
   3519 
   3520 
   3521 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
   3522 ;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
   3523 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
   3524   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   3525   %tmp = getelementptr i64, i64* %A, i32 4
   3526   store i64* %tmp, i64** %ptr
   3527   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   3528 }
   3529 
   3530 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
   3531 ;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
   3532 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3533   %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   3534   %tmp = getelementptr i64, i64* %A, i64 %inc
   3535   store i64* %tmp, i64** %ptr
   3536   ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
   3537 }
   3538 
   3539 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
   3540 
   3541 
   3542 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
   3543 ;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
   3544 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
   3545   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   3546   %tmp = getelementptr float, float* %A, i32 4
   3547   store float* %tmp, float** %ptr
   3548   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   3549 }
   3550 
   3551 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
   3552 ;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
   3553 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3554   %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   3555   %tmp = getelementptr float, float* %A, i64 %inc
   3556   store float* %tmp, float** %ptr
   3557   ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
   3558 }
   3559 
   3560 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
   3561 
   3562 
   3563 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
   3564 ;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
   3565 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
   3566   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   3567   %tmp = getelementptr float, float* %A, i32 4
   3568   store float* %tmp, float** %ptr
   3569   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   3570 }
   3571 
   3572 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
   3573 ;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
   3574 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3575   %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   3576   %tmp = getelementptr float, float* %A, i64 %inc
   3577   store float* %tmp, float** %ptr
   3578   ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
   3579 }
   3580 
   3581 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
   3582 
   3583 
   3584 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
   3585 ;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
   3586 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
   3587   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   3588   %tmp = getelementptr double, double* %A, i32 4
   3589   store double* %tmp, double** %ptr
   3590   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   3591 }
   3592 
   3593 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
   3594 ;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
   3595 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3596   %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   3597   %tmp = getelementptr double, double* %A, i64 %inc
   3598   store double* %tmp, double** %ptr
   3599   ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
   3600 }
   3601 
   3602 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
   3603 
   3604 
   3605 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
   3606 ;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
   3607 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
   3608   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   3609   %tmp = getelementptr double, double* %A, i32 4
   3610   store double* %tmp, double** %ptr
   3611   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   3612 }
   3613 
   3614 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
   3615 ;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
   3616 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   3617   %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   3618   %tmp = getelementptr double, double* %A, i64 %inc
   3619   store double* %tmp, double** %ptr
   3620   ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
   3621 }
   3622 
   3623 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
   3624 
   3625 
   3626 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
   3627 ;CHECK-LABEL: test_v16i8_post_imm_st2:
   3628 ;CHECK: st2.16b { v0, v1 }, [x0], #32
   3629   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   3630   %tmp = getelementptr i8, i8* %A, i32 32
   3631   ret i8* %tmp
   3632 }
   3633 
   3634 define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
   3635 ;CHECK-LABEL: test_v16i8_post_reg_st2:
   3636 ;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
   3637   call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   3638   %tmp = getelementptr i8, i8* %A, i64 %inc
   3639   ret i8* %tmp
   3640 }
   3641 
   3642 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
   3643 
   3644 
   3645 define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
   3646 ;CHECK-LABEL: test_v8i8_post_imm_st2:
   3647 ;CHECK: st2.8b { v0, v1 }, [x0], #16
   3648   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   3649   %tmp = getelementptr i8, i8* %A, i32 16
   3650   ret i8* %tmp
   3651 }
   3652 
   3653 define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
   3654 ;CHECK-LABEL: test_v8i8_post_reg_st2:
   3655 ;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
   3656   call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   3657   %tmp = getelementptr i8, i8* %A, i64 %inc
   3658   ret i8* %tmp
   3659 }
   3660 
   3661 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
   3662 
   3663 
   3664 define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
   3665 ;CHECK-LABEL: test_v8i16_post_imm_st2:
   3666 ;CHECK: st2.8h { v0, v1 }, [x0], #32
   3667   call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   3668   %tmp = getelementptr i16, i16* %A, i32 16
   3669   ret i16* %tmp
   3670 }
   3671 
   3672 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
   3673 ;CHECK-LABEL: test_v8i16_post_reg_st2:
   3674 ;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
   3675   call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   3676   %tmp = getelementptr i16, i16* %A, i64 %inc
   3677   ret i16* %tmp
   3678 }
   3679 
   3680 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
   3681 
   3682 
   3683 define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
   3684 ;CHECK-LABEL: test_v4i16_post_imm_st2:
   3685 ;CHECK: st2.4h { v0, v1 }, [x0], #16
   3686   call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   3687   %tmp = getelementptr i16, i16* %A, i32 8
   3688   ret i16* %tmp
   3689 }
   3690 
   3691 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
   3692 ;CHECK-LABEL: test_v4i16_post_reg_st2:
   3693 ;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
   3694   call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   3695   %tmp = getelementptr i16, i16* %A, i64 %inc
   3696   ret i16* %tmp
   3697 }
   3698 
   3699 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
   3700 
   3701 
   3702 define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
   3703 ;CHECK-LABEL: test_v4i32_post_imm_st2:
   3704 ;CHECK: st2.4s { v0, v1 }, [x0], #32
   3705   call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   3706   %tmp = getelementptr i32, i32* %A, i32 8
   3707   ret i32* %tmp
   3708 }
   3709 
   3710 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
   3711 ;CHECK-LABEL: test_v4i32_post_reg_st2:
   3712 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
   3713   call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   3714   %tmp = getelementptr i32, i32* %A, i64 %inc
   3715   ret i32* %tmp
   3716 }
   3717 
   3718 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
   3719 
   3720 
   3721 define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
   3722 ;CHECK-LABEL: test_v2i32_post_imm_st2:
   3723 ;CHECK: st2.2s { v0, v1 }, [x0], #16
   3724   call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   3725   %tmp = getelementptr i32, i32* %A, i32 4
   3726   ret i32* %tmp
   3727 }
   3728 
   3729 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
   3730 ;CHECK-LABEL: test_v2i32_post_reg_st2:
   3731 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
   3732   call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   3733   %tmp = getelementptr i32, i32* %A, i64 %inc
   3734   ret i32* %tmp
   3735 }
   3736 
   3737 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
   3738 
   3739 
   3740 define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
   3741 ;CHECK-LABEL: test_v2i64_post_imm_st2:
   3742 ;CHECK: st2.2d { v0, v1 }, [x0], #32
   3743   call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   3744   %tmp = getelementptr i64, i64* %A, i64 4
   3745   ret i64* %tmp
   3746 }
   3747 
   3748 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
   3749 ;CHECK-LABEL: test_v2i64_post_reg_st2:
   3750 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
   3751   call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   3752   %tmp = getelementptr i64, i64* %A, i64 %inc
   3753   ret i64* %tmp
   3754 }
   3755 
   3756 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
   3757 
   3758 
   3759 define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
   3760 ;CHECK-LABEL: test_v1i64_post_imm_st2:
   3761 ;CHECK: st1.1d { v0, v1 }, [x0], #16
   3762   call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   3763   %tmp = getelementptr i64, i64* %A, i64 2
   3764   ret i64* %tmp
   3765 }
   3766 
   3767 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
   3768 ;CHECK-LABEL: test_v1i64_post_reg_st2:
   3769 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   3770   call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   3771   %tmp = getelementptr i64, i64* %A, i64 %inc
   3772   ret i64* %tmp
   3773 }
   3774 
   3775 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
   3776 
   3777 
   3778 define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
   3779 ;CHECK-LABEL: test_v4f32_post_imm_st2:
   3780 ;CHECK: st2.4s { v0, v1 }, [x0], #32
   3781   call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   3782   %tmp = getelementptr float, float* %A, i32 8
   3783   ret float* %tmp
   3784 }
   3785 
   3786 define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
   3787 ;CHECK-LABEL: test_v4f32_post_reg_st2:
   3788 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
   3789   call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   3790   %tmp = getelementptr float, float* %A, i64 %inc
   3791   ret float* %tmp
   3792 }
   3793 
   3794 declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
   3795 
   3796 
   3797 define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
   3798 ;CHECK-LABEL: test_v2f32_post_imm_st2:
   3799 ;CHECK: st2.2s { v0, v1 }, [x0], #16
   3800   call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   3801   %tmp = getelementptr float, float* %A, i32 4
   3802   ret float* %tmp
   3803 }
   3804 
   3805 define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
   3806 ;CHECK-LABEL: test_v2f32_post_reg_st2:
   3807 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
   3808   call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   3809   %tmp = getelementptr float, float* %A, i64 %inc
   3810   ret float* %tmp
   3811 }
   3812 
   3813 declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
   3814 
   3815 
   3816 define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
   3817 ;CHECK-LABEL: test_v2f64_post_imm_st2:
   3818 ;CHECK: st2.2d { v0, v1 }, [x0], #32
   3819   call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   3820   %tmp = getelementptr double, double* %A, i64 4
   3821   ret double* %tmp
   3822 }
   3823 
   3824 define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
   3825 ;CHECK-LABEL: test_v2f64_post_reg_st2:
   3826 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
   3827   call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   3828   %tmp = getelementptr double, double* %A, i64 %inc
   3829   ret double* %tmp
   3830 }
   3831 
   3832 declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
   3833 
   3834 
   3835 define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
   3836 ;CHECK-LABEL: test_v1f64_post_imm_st2:
   3837 ;CHECK: st1.1d { v0, v1 }, [x0], #16
   3838   call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   3839   %tmp = getelementptr double, double* %A, i64 2
   3840   ret double* %tmp
   3841 }
   3842 
   3843 define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
   3844 ;CHECK-LABEL: test_v1f64_post_reg_st2:
   3845 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   3846   call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   3847   %tmp = getelementptr double, double* %A, i64 %inc
   3848   ret double* %tmp
   3849 }
   3850 
   3851 declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
   3852 
   3853 
   3854 define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
   3855 ;CHECK-LABEL: test_v16i8_post_imm_st3:
   3856 ;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
   3857   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   3858   %tmp = getelementptr i8, i8* %A, i32 48
   3859   ret i8* %tmp
   3860 }
   3861 
   3862 define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
   3863 ;CHECK-LABEL: test_v16i8_post_reg_st3:
   3864 ;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3865   call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   3866   %tmp = getelementptr i8, i8* %A, i64 %inc
   3867   ret i8* %tmp
   3868 }
   3869 
   3870 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
   3871 
   3872 
   3873 define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
   3874 ;CHECK-LABEL: test_v8i8_post_imm_st3:
   3875 ;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
   3876   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   3877   %tmp = getelementptr i8, i8* %A, i32 24
   3878   ret i8* %tmp
   3879 }
   3880 
   3881 define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
   3882 ;CHECK-LABEL: test_v8i8_post_reg_st3:
   3883 ;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3884   call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   3885   %tmp = getelementptr i8, i8* %A, i64 %inc
   3886   ret i8* %tmp
   3887 }
   3888 
   3889 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
   3890 
   3891 
   3892 define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
   3893 ;CHECK-LABEL: test_v8i16_post_imm_st3:
   3894 ;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
   3895   call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   3896   %tmp = getelementptr i16, i16* %A, i32 24
   3897   ret i16* %tmp
   3898 }
   3899 
   3900 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
   3901 ;CHECK-LABEL: test_v8i16_post_reg_st3:
   3902 ;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3903   call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   3904   %tmp = getelementptr i16, i16* %A, i64 %inc
   3905   ret i16* %tmp
   3906 }
   3907 
   3908 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
   3909 
   3910 
   3911 define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
   3912 ;CHECK-LABEL: test_v4i16_post_imm_st3:
   3913 ;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
   3914   call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   3915   %tmp = getelementptr i16, i16* %A, i32 12
   3916   ret i16* %tmp
   3917 }
   3918 
   3919 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
   3920 ;CHECK-LABEL: test_v4i16_post_reg_st3:
   3921 ;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3922   call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   3923   %tmp = getelementptr i16, i16* %A, i64 %inc
   3924   ret i16* %tmp
   3925 }
   3926 
   3927 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
   3928 
   3929 
   3930 define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
   3931 ;CHECK-LABEL: test_v4i32_post_imm_st3:
   3932 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
   3933   call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   3934   %tmp = getelementptr i32, i32* %A, i32 12
   3935   ret i32* %tmp
   3936 }
   3937 
   3938 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
   3939 ;CHECK-LABEL: test_v4i32_post_reg_st3:
   3940 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3941   call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   3942   %tmp = getelementptr i32, i32* %A, i64 %inc
   3943   ret i32* %tmp
   3944 }
   3945 
   3946 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
   3947 
   3948 
   3949 define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
   3950 ;CHECK-LABEL: test_v2i32_post_imm_st3:
   3951 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
   3952   call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   3953   %tmp = getelementptr i32, i32* %A, i32 6
   3954   ret i32* %tmp
   3955 }
   3956 
   3957 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
   3958 ;CHECK-LABEL: test_v2i32_post_reg_st3:
   3959 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3960   call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   3961   %tmp = getelementptr i32, i32* %A, i64 %inc
   3962   ret i32* %tmp
   3963 }
   3964 
   3965 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
   3966 
   3967 
   3968 define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
   3969 ;CHECK-LABEL: test_v2i64_post_imm_st3:
   3970 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
   3971   call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   3972   %tmp = getelementptr i64, i64* %A, i64 6
   3973   ret i64* %tmp
   3974 }
   3975 
   3976 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
   3977 ;CHECK-LABEL: test_v2i64_post_reg_st3:
   3978 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3979   call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   3980   %tmp = getelementptr i64, i64* %A, i64 %inc
   3981   ret i64* %tmp
   3982 }
   3983 
   3984 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
   3985 
   3986 
   3987 define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
   3988 ;CHECK-LABEL: test_v1i64_post_imm_st3:
   3989 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
   3990   call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   3991   %tmp = getelementptr i64, i64* %A, i64 3
   3992   ret i64* %tmp
   3993 }
   3994 
   3995 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
   3996 ;CHECK-LABEL: test_v1i64_post_reg_st3:
   3997 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   3998   call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   3999   %tmp = getelementptr i64, i64* %A, i64 %inc
   4000   ret i64* %tmp
   4001 }
   4002 
   4003 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
   4004 
   4005 
   4006 define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
   4007 ;CHECK-LABEL: test_v4f32_post_imm_st3:
   4008 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
   4009   call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   4010   %tmp = getelementptr float, float* %A, i32 12
   4011   ret float* %tmp
   4012 }
   4013 
   4014 define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
   4015 ;CHECK-LABEL: test_v4f32_post_reg_st3:
   4016 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4017   call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   4018   %tmp = getelementptr float, float* %A, i64 %inc
   4019   ret float* %tmp
   4020 }
   4021 
   4022 declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
   4023 
   4024 
   4025 define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
   4026 ;CHECK-LABEL: test_v2f32_post_imm_st3:
   4027 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
   4028   call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   4029   %tmp = getelementptr float, float* %A, i32 6
   4030   ret float* %tmp
   4031 }
   4032 
   4033 define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
   4034 ;CHECK-LABEL: test_v2f32_post_reg_st3:
   4035 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4036   call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   4037   %tmp = getelementptr float, float* %A, i64 %inc
   4038   ret float* %tmp
   4039 }
   4040 
   4041 declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
   4042 
   4043 
   4044 define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
   4045 ;CHECK-LABEL: test_v2f64_post_imm_st3:
   4046 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
   4047   call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   4048   %tmp = getelementptr double, double* %A, i64 6
   4049   ret double* %tmp
   4050 }
   4051 
   4052 define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
   4053 ;CHECK-LABEL: test_v2f64_post_reg_st3:
   4054 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4055   call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   4056   %tmp = getelementptr double, double* %A, i64 %inc
   4057   ret double* %tmp
   4058 }
   4059 
   4060 declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
   4061 
   4062 
   4063 define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
   4064 ;CHECK-LABEL: test_v1f64_post_imm_st3:
   4065 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
   4066   call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   4067   %tmp = getelementptr double, double* %A, i64 3
   4068   ret double* %tmp
   4069 }
   4070 
   4071 define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
   4072 ;CHECK-LABEL: test_v1f64_post_reg_st3:
   4073 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4074   call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   4075   %tmp = getelementptr double, double* %A, i64 %inc
   4076   ret double* %tmp
   4077 }
   4078 
   4079 declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
   4080 
   4081 
   4082 define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
   4083 ;CHECK-LABEL: test_v16i8_post_imm_st4:
   4084 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
   4085   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   4086   %tmp = getelementptr i8, i8* %A, i32 64
   4087   ret i8* %tmp
   4088 }
   4089 
   4090 define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
   4091 ;CHECK-LABEL: test_v16i8_post_reg_st4:
   4092 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4093   call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   4094   %tmp = getelementptr i8, i8* %A, i64 %inc
   4095   ret i8* %tmp
   4096 }
   4097 
   4098 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
   4099 
   4100 
   4101 define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
   4102 ;CHECK-LABEL: test_v8i8_post_imm_st4:
   4103 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
   4104   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   4105   %tmp = getelementptr i8, i8* %A, i32 32
   4106   ret i8* %tmp
   4107 }
   4108 
   4109 define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
   4110 ;CHECK-LABEL: test_v8i8_post_reg_st4:
   4111 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4112   call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   4113   %tmp = getelementptr i8, i8* %A, i64 %inc
   4114   ret i8* %tmp
   4115 }
   4116 
   4117 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
   4118 
   4119 
   4120 define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
   4121 ;CHECK-LABEL: test_v8i16_post_imm_st4:
   4122 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
   4123   call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   4124   %tmp = getelementptr i16, i16* %A, i32 32
   4125   ret i16* %tmp
   4126 }
   4127 
   4128 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
   4129 ;CHECK-LABEL: test_v8i16_post_reg_st4:
   4130 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4131   call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   4132   %tmp = getelementptr i16, i16* %A, i64 %inc
   4133   ret i16* %tmp
   4134 }
   4135 
   4136 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
   4137 
   4138 
   4139 define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
   4140 ;CHECK-LABEL: test_v4i16_post_imm_st4:
   4141 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
   4142   call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   4143   %tmp = getelementptr i16, i16* %A, i32 16
   4144   ret i16* %tmp
   4145 }
   4146 
   4147 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
   4148 ;CHECK-LABEL: test_v4i16_post_reg_st4:
   4149 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4150   call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   4151   %tmp = getelementptr i16, i16* %A, i64 %inc
   4152   ret i16* %tmp
   4153 }
   4154 
   4155 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
   4156 
   4157 
   4158 define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
   4159 ;CHECK-LABEL: test_v4i32_post_imm_st4:
   4160 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
   4161   call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   4162   %tmp = getelementptr i32, i32* %A, i32 16
   4163   ret i32* %tmp
   4164 }
   4165 
   4166 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
   4167 ;CHECK-LABEL: test_v4i32_post_reg_st4:
   4168 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4169   call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   4170   %tmp = getelementptr i32, i32* %A, i64 %inc
   4171   ret i32* %tmp
   4172 }
   4173 
   4174 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
   4175 
   4176 
   4177 define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
   4178 ;CHECK-LABEL: test_v2i32_post_imm_st4:
   4179 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
   4180   call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   4181   %tmp = getelementptr i32, i32* %A, i32 8
   4182   ret i32* %tmp
   4183 }
   4184 
   4185 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
   4186 ;CHECK-LABEL: test_v2i32_post_reg_st4:
   4187 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4188   call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   4189   %tmp = getelementptr i32, i32* %A, i64 %inc
   4190   ret i32* %tmp
   4191 }
   4192 
   4193 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
   4194 
   4195 
   4196 define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
   4197 ;CHECK-LABEL: test_v2i64_post_imm_st4:
   4198 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
   4199   call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   4200   %tmp = getelementptr i64, i64* %A, i64 8
   4201   ret i64* %tmp
   4202 }
   4203 
   4204 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
   4205 ;CHECK-LABEL: test_v2i64_post_reg_st4:
   4206 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4207   call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   4208   %tmp = getelementptr i64, i64* %A, i64 %inc
   4209   ret i64* %tmp
   4210 }
   4211 
   4212 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
   4213 
   4214 
   4215 define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
   4216 ;CHECK-LABEL: test_v1i64_post_imm_st4:
   4217 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
   4218   call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   4219   %tmp = getelementptr i64, i64* %A, i64 4
   4220   ret i64* %tmp
   4221 }
   4222 
   4223 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
   4224 ;CHECK-LABEL: test_v1i64_post_reg_st4:
   4225 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4226   call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   4227   %tmp = getelementptr i64, i64* %A, i64 %inc
   4228   ret i64* %tmp
   4229 }
   4230 
   4231 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
   4232 
   4233 
   4234 define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
   4235 ;CHECK-LABEL: test_v4f32_post_imm_st4:
   4236 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
   4237   call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   4238   %tmp = getelementptr float, float* %A, i32 16
   4239   ret float* %tmp
   4240 }
   4241 
   4242 define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
   4243 ;CHECK-LABEL: test_v4f32_post_reg_st4:
   4244 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4245   call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   4246   %tmp = getelementptr float, float* %A, i64 %inc
   4247   ret float* %tmp
   4248 }
   4249 
   4250 declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
   4251 
   4252 
   4253 define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
   4254 ;CHECK-LABEL: test_v2f32_post_imm_st4:
   4255 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
   4256   call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   4257   %tmp = getelementptr float, float* %A, i32 8
   4258   ret float* %tmp
   4259 }
   4260 
   4261 define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
   4262 ;CHECK-LABEL: test_v2f32_post_reg_st4:
   4263 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4264   call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   4265   %tmp = getelementptr float, float* %A, i64 %inc
   4266   ret float* %tmp
   4267 }
   4268 
   4269 declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
   4270 
   4271 
   4272 define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
   4273 ;CHECK-LABEL: test_v2f64_post_imm_st4:
   4274 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
   4275   call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   4276   %tmp = getelementptr double, double* %A, i64 8
   4277   ret double* %tmp
   4278 }
   4279 
   4280 define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
   4281 ;CHECK-LABEL: test_v2f64_post_reg_st4:
   4282 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4283   call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   4284   %tmp = getelementptr double, double* %A, i64 %inc
   4285   ret double* %tmp
   4286 }
   4287 
   4288 declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
   4289 
   4290 
   4291 define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
   4292 ;CHECK-LABEL: test_v1f64_post_imm_st4:
   4293 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
   4294   call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   4295   %tmp = getelementptr double, double* %A, i64 4
   4296   ret double* %tmp
   4297 }
   4298 
   4299 define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
   4300 ;CHECK-LABEL: test_v1f64_post_reg_st4:
   4301 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4302   call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   4303   %tmp = getelementptr double, double* %A, i64 %inc
   4304   ret double* %tmp
   4305 }
   4306 
   4307 declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
   4308 
   4309 
   4310 define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
   4311 ;CHECK-LABEL: test_v16i8_post_imm_st1x2:
   4312 ;CHECK: st1.16b { v0, v1 }, [x0], #32
   4313   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   4314   %tmp = getelementptr i8, i8* %A, i32 32
   4315   ret i8* %tmp
   4316 }
   4317 
   4318 define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
   4319 ;CHECK-LABEL: test_v16i8_post_reg_st1x2:
   4320 ;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
   4321   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
   4322   %tmp = getelementptr i8, i8* %A, i64 %inc
   4323   ret i8* %tmp
   4324 }
   4325 
   4326 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
   4327 
   4328 
   4329 define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
   4330 ;CHECK-LABEL: test_v8i8_post_imm_st1x2:
   4331 ;CHECK: st1.8b { v0, v1 }, [x0], #16
   4332   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   4333   %tmp = getelementptr i8, i8* %A, i32 16
   4334   ret i8* %tmp
   4335 }
   4336 
   4337 define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
   4338 ;CHECK-LABEL: test_v8i8_post_reg_st1x2:
   4339 ;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
   4340   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
   4341   %tmp = getelementptr i8, i8* %A, i64 %inc
   4342   ret i8* %tmp
   4343 }
   4344 
   4345 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
   4346 
   4347 
   4348 define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
   4349 ;CHECK-LABEL: test_v8i16_post_imm_st1x2:
   4350 ;CHECK: st1.8h { v0, v1 }, [x0], #32
   4351   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   4352   %tmp = getelementptr i16, i16* %A, i32 16
   4353   ret i16* %tmp
   4354 }
   4355 
   4356 define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
   4357 ;CHECK-LABEL: test_v8i16_post_reg_st1x2:
   4358 ;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
   4359   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
   4360   %tmp = getelementptr i16, i16* %A, i64 %inc
   4361   ret i16* %tmp
   4362 }
   4363 
   4364 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
   4365 
   4366 
   4367 define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
   4368 ;CHECK-LABEL: test_v4i16_post_imm_st1x2:
   4369 ;CHECK: st1.4h { v0, v1 }, [x0], #16
   4370   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   4371   %tmp = getelementptr i16, i16* %A, i32 8
   4372   ret i16* %tmp
   4373 }
   4374 
   4375 define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
   4376 ;CHECK-LABEL: test_v4i16_post_reg_st1x2:
   4377 ;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
   4378   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
   4379   %tmp = getelementptr i16, i16* %A, i64 %inc
   4380   ret i16* %tmp
   4381 }
   4382 
   4383 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
   4384 
   4385 
   4386 define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
   4387 ;CHECK-LABEL: test_v4i32_post_imm_st1x2:
   4388 ;CHECK: st1.4s { v0, v1 }, [x0], #32
   4389   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   4390   %tmp = getelementptr i32, i32* %A, i32 8
   4391   ret i32* %tmp
   4392 }
   4393 
   4394 define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
   4395 ;CHECK-LABEL: test_v4i32_post_reg_st1x2:
   4396 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
   4397   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
   4398   %tmp = getelementptr i32, i32* %A, i64 %inc
   4399   ret i32* %tmp
   4400 }
   4401 
   4402 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
   4403 
   4404 
   4405 define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
   4406 ;CHECK-LABEL: test_v2i32_post_imm_st1x2:
   4407 ;CHECK: st1.2s { v0, v1 }, [x0], #16
   4408   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   4409   %tmp = getelementptr i32, i32* %A, i32 4
   4410   ret i32* %tmp
   4411 }
   4412 
   4413 define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
   4414 ;CHECK-LABEL: test_v2i32_post_reg_st1x2:
   4415 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
   4416   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
   4417   %tmp = getelementptr i32, i32* %A, i64 %inc
   4418   ret i32* %tmp
   4419 }
   4420 
   4421 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
   4422 
   4423 
   4424 define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
   4425 ;CHECK-LABEL: test_v2i64_post_imm_st1x2:
   4426 ;CHECK: st1.2d { v0, v1 }, [x0], #32
   4427   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   4428   %tmp = getelementptr i64, i64* %A, i64 4
   4429   ret i64* %tmp
   4430 }
   4431 
   4432 define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
   4433 ;CHECK-LABEL: test_v2i64_post_reg_st1x2:
   4434 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
   4435   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
   4436   %tmp = getelementptr i64, i64* %A, i64 %inc
   4437   ret i64* %tmp
   4438 }
   4439 
   4440 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
   4441 
   4442 
   4443 define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
   4444 ;CHECK-LABEL: test_v1i64_post_imm_st1x2:
   4445 ;CHECK: st1.1d { v0, v1 }, [x0], #16
   4446   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   4447   %tmp = getelementptr i64, i64* %A, i64 2
   4448   ret i64* %tmp
   4449 }
   4450 
   4451 define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
   4452 ;CHECK-LABEL: test_v1i64_post_reg_st1x2:
   4453 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   4454   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
   4455   %tmp = getelementptr i64, i64* %A, i64 %inc
   4456   ret i64* %tmp
   4457 }
   4458 
   4459 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
   4460 
   4461 
   4462 define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
   4463 ;CHECK-LABEL: test_v4f32_post_imm_st1x2:
   4464 ;CHECK: st1.4s { v0, v1 }, [x0], #32
   4465   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   4466   %tmp = getelementptr float, float* %A, i32 8
   4467   ret float* %tmp
   4468 }
   4469 
   4470 define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
   4471 ;CHECK-LABEL: test_v4f32_post_reg_st1x2:
   4472 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
   4473   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
   4474   %tmp = getelementptr float, float* %A, i64 %inc
   4475   ret float* %tmp
   4476 }
   4477 
   4478 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
   4479 
   4480 
   4481 define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
   4482 ;CHECK-LABEL: test_v2f32_post_imm_st1x2:
   4483 ;CHECK: st1.2s { v0, v1 }, [x0], #16
   4484   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   4485   %tmp = getelementptr float, float* %A, i32 4
   4486   ret float* %tmp
   4487 }
   4488 
   4489 define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
   4490 ;CHECK-LABEL: test_v2f32_post_reg_st1x2:
   4491 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
   4492   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
   4493   %tmp = getelementptr float, float* %A, i64 %inc
   4494   ret float* %tmp
   4495 }
   4496 
   4497 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
   4498 
   4499 
   4500 define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
   4501 ;CHECK-LABEL: test_v2f64_post_imm_st1x2:
   4502 ;CHECK: st1.2d { v0, v1 }, [x0], #32
   4503   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   4504   %tmp = getelementptr double, double* %A, i64 4
   4505   ret double* %tmp
   4506 }
   4507 
   4508 define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
   4509 ;CHECK-LABEL: test_v2f64_post_reg_st1x2:
   4510 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
   4511   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
   4512   %tmp = getelementptr double, double* %A, i64 %inc
   4513   ret double* %tmp
   4514 }
   4515 
   4516 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
   4517 
   4518 
   4519 define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
   4520 ;CHECK-LABEL: test_v1f64_post_imm_st1x2:
   4521 ;CHECK: st1.1d { v0, v1 }, [x0], #16
   4522   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   4523   %tmp = getelementptr double, double* %A, i64 2
   4524   ret double* %tmp
   4525 }
   4526 
   4527 define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
   4528 ;CHECK-LABEL: test_v1f64_post_reg_st1x2:
   4529 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
   4530   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
   4531   %tmp = getelementptr double, double* %A, i64 %inc
   4532   ret double* %tmp
   4533 }
   4534 
   4535 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
   4536 
   4537 
   4538 define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
   4539 ;CHECK-LABEL: test_v16i8_post_imm_st1x3:
   4540 ;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
   4541   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   4542   %tmp = getelementptr i8, i8* %A, i32 48
   4543   ret i8* %tmp
   4544 }
   4545 
   4546 define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
   4547 ;CHECK-LABEL: test_v16i8_post_reg_st1x3:
   4548 ;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4549   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
   4550   %tmp = getelementptr i8, i8* %A, i64 %inc
   4551   ret i8* %tmp
   4552 }
   4553 
   4554 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
   4555 
   4556 
   4557 define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
   4558 ;CHECK-LABEL: test_v8i8_post_imm_st1x3:
   4559 ;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
   4560   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   4561   %tmp = getelementptr i8, i8* %A, i32 24
   4562   ret i8* %tmp
   4563 }
   4564 
   4565 define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
   4566 ;CHECK-LABEL: test_v8i8_post_reg_st1x3:
   4567 ;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4568   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
   4569   %tmp = getelementptr i8, i8* %A, i64 %inc
   4570   ret i8* %tmp
   4571 }
   4572 
   4573 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
   4574 
   4575 
   4576 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
   4577 ;CHECK-LABEL: test_v8i16_post_imm_st1x3:
   4578 ;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
   4579   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   4580   %tmp = getelementptr i16, i16* %A, i32 24
   4581   ret i16* %tmp
   4582 }
   4583 
   4584 define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
   4585 ;CHECK-LABEL: test_v8i16_post_reg_st1x3:
   4586 ;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4587   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
   4588   %tmp = getelementptr i16, i16* %A, i64 %inc
   4589   ret i16* %tmp
   4590 }
   4591 
   4592 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
   4593 
   4594 
   4595 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
   4596 ;CHECK-LABEL: test_v4i16_post_imm_st1x3:
   4597 ;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
   4598   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   4599   %tmp = getelementptr i16, i16* %A, i32 12
   4600   ret i16* %tmp
   4601 }
   4602 
   4603 define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
   4604 ;CHECK-LABEL: test_v4i16_post_reg_st1x3:
   4605 ;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4606   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
   4607   %tmp = getelementptr i16, i16* %A, i64 %inc
   4608   ret i16* %tmp
   4609 }
   4610 
   4611 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
   4612 
   4613 
   4614 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
   4615 ;CHECK-LABEL: test_v4i32_post_imm_st1x3:
   4616 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
   4617   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   4618   %tmp = getelementptr i32, i32* %A, i32 12
   4619   ret i32* %tmp
   4620 }
   4621 
   4622 define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
   4623 ;CHECK-LABEL: test_v4i32_post_reg_st1x3:
   4624 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4625   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
   4626   %tmp = getelementptr i32, i32* %A, i64 %inc
   4627   ret i32* %tmp
   4628 }
   4629 
   4630 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
   4631 
   4632 
   4633 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
   4634 ;CHECK-LABEL: test_v2i32_post_imm_st1x3:
   4635 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
   4636   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   4637   %tmp = getelementptr i32, i32* %A, i32 6
   4638   ret i32* %tmp
   4639 }
   4640 
   4641 define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
   4642 ;CHECK-LABEL: test_v2i32_post_reg_st1x3:
   4643 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4644   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
   4645   %tmp = getelementptr i32, i32* %A, i64 %inc
   4646   ret i32* %tmp
   4647 }
   4648 
   4649 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
   4650 
   4651 
   4652 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
   4653 ;CHECK-LABEL: test_v2i64_post_imm_st1x3:
   4654 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
   4655   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   4656   %tmp = getelementptr i64, i64* %A, i64 6
   4657   ret i64* %tmp
   4658 }
   4659 
   4660 define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
   4661 ;CHECK-LABEL: test_v2i64_post_reg_st1x3:
   4662 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4663   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
   4664   %tmp = getelementptr i64, i64* %A, i64 %inc
   4665   ret i64* %tmp
   4666 }
   4667 
   4668 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
   4669 
   4670 
   4671 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
   4672 ;CHECK-LABEL: test_v1i64_post_imm_st1x3:
   4673 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
   4674   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   4675   %tmp = getelementptr i64, i64* %A, i64 3
   4676   ret i64* %tmp
   4677 }
   4678 
   4679 define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
   4680 ;CHECK-LABEL: test_v1i64_post_reg_st1x3:
   4681 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4682   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
   4683   %tmp = getelementptr i64, i64* %A, i64 %inc
   4684   ret i64* %tmp
   4685 }
   4686 
   4687 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
   4688 
   4689 
   4690 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
   4691 ;CHECK-LABEL: test_v4f32_post_imm_st1x3:
   4692 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
   4693   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   4694   %tmp = getelementptr float, float* %A, i32 12
   4695   ret float* %tmp
   4696 }
   4697 
   4698 define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
   4699 ;CHECK-LABEL: test_v4f32_post_reg_st1x3:
   4700 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4701   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
   4702   %tmp = getelementptr float, float* %A, i64 %inc
   4703   ret float* %tmp
   4704 }
   4705 
   4706 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
   4707 
   4708 
   4709 define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
   4710 ;CHECK-LABEL: test_v2f32_post_imm_st1x3:
   4711 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
   4712   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   4713   %tmp = getelementptr float, float* %A, i32 6
   4714   ret float* %tmp
   4715 }
   4716 
   4717 define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
   4718 ;CHECK-LABEL: test_v2f32_post_reg_st1x3:
   4719 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4720   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
   4721   %tmp = getelementptr float, float* %A, i64 %inc
   4722   ret float* %tmp
   4723 }
   4724 
   4725 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
   4726 
   4727 
   4728 define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
   4729 ;CHECK-LABEL: test_v2f64_post_imm_st1x3:
   4730 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
   4731   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   4732   %tmp = getelementptr double, double* %A, i64 6
   4733   ret double* %tmp
   4734 }
   4735 
   4736 define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
   4737 ;CHECK-LABEL: test_v2f64_post_reg_st1x3:
   4738 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4739   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
   4740   %tmp = getelementptr double, double* %A, i64 %inc
   4741   ret double* %tmp
   4742 }
   4743 
   4744 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
   4745 
   4746 
   4747 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
   4748 ;CHECK-LABEL: test_v1f64_post_imm_st1x3:
   4749 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
   4750   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   4751   %tmp = getelementptr double, double* %A, i64 3
   4752   ret double* %tmp
   4753 }
   4754 
   4755 define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
   4756 ;CHECK-LABEL: test_v1f64_post_reg_st1x3:
   4757 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
   4758   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
   4759   %tmp = getelementptr double, double* %A, i64 %inc
   4760   ret double* %tmp
   4761 }
   4762 
   4763 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
   4764 
   4765 
   4766 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
   4767 ;CHECK-LABEL: test_v16i8_post_imm_st1x4:
   4768 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
   4769   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   4770   %tmp = getelementptr i8, i8* %A, i32 64
   4771   ret i8* %tmp
   4772 }
   4773 
   4774 define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
   4775 ;CHECK-LABEL: test_v16i8_post_reg_st1x4:
   4776 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4777   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
   4778   %tmp = getelementptr i8, i8* %A, i64 %inc
   4779   ret i8* %tmp
   4780 }
   4781 
   4782 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
   4783 
   4784 
   4785 define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
   4786 ;CHECK-LABEL: test_v8i8_post_imm_st1x4:
   4787 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
   4788   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   4789   %tmp = getelementptr i8, i8* %A, i32 32
   4790   ret i8* %tmp
   4791 }
   4792 
   4793 define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
   4794 ;CHECK-LABEL: test_v8i8_post_reg_st1x4:
   4795 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4796   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
   4797   %tmp = getelementptr i8, i8* %A, i64 %inc
   4798   ret i8* %tmp
   4799 }
   4800 
   4801 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
   4802 
   4803 
   4804 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
   4805 ;CHECK-LABEL: test_v8i16_post_imm_st1x4:
   4806 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
   4807   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   4808   %tmp = getelementptr i16, i16* %A, i32 32
   4809   ret i16* %tmp
   4810 }
   4811 
   4812 define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
   4813 ;CHECK-LABEL: test_v8i16_post_reg_st1x4:
   4814 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4815   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
   4816   %tmp = getelementptr i16, i16* %A, i64 %inc
   4817   ret i16* %tmp
   4818 }
   4819 
   4820 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
   4821 
   4822 
   4823 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
   4824 ;CHECK-LABEL: test_v4i16_post_imm_st1x4:
   4825 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
   4826   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   4827   %tmp = getelementptr i16, i16* %A, i32 16
   4828   ret i16* %tmp
   4829 }
   4830 
   4831 define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
   4832 ;CHECK-LABEL: test_v4i16_post_reg_st1x4:
   4833 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4834   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
   4835   %tmp = getelementptr i16, i16* %A, i64 %inc
   4836   ret i16* %tmp
   4837 }
   4838 
   4839 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>,  i16*)
   4840 
   4841 
   4842 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
   4843 ;CHECK-LABEL: test_v4i32_post_imm_st1x4:
   4844 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
   4845   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   4846   %tmp = getelementptr i32, i32* %A, i32 16
   4847   ret i32* %tmp
   4848 }
   4849 
   4850 define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
   4851 ;CHECK-LABEL: test_v4i32_post_reg_st1x4:
   4852 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4853   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
   4854   %tmp = getelementptr i32, i32* %A, i64 %inc
   4855   ret i32* %tmp
   4856 }
   4857 
   4858 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>,  i32*)
   4859 
   4860 
   4861 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
   4862 ;CHECK-LABEL: test_v2i32_post_imm_st1x4:
   4863 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
   4864   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   4865   %tmp = getelementptr i32, i32* %A, i32 8
   4866   ret i32* %tmp
   4867 }
   4868 
   4869 define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
   4870 ;CHECK-LABEL: test_v2i32_post_reg_st1x4:
   4871 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4872   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
   4873   %tmp = getelementptr i32, i32* %A, i64 %inc
   4874   ret i32* %tmp
   4875 }
   4876 
   4877 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
   4878 
   4879 
   4880 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
   4881 ;CHECK-LABEL: test_v2i64_post_imm_st1x4:
   4882 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
   4883   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   4884   %tmp = getelementptr i64, i64* %A, i64 8
   4885   ret i64* %tmp
   4886 }
   4887 
   4888 define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
   4889 ;CHECK-LABEL: test_v2i64_post_reg_st1x4:
   4890 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4891   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
   4892   %tmp = getelementptr i64, i64* %A, i64 %inc
   4893   ret i64* %tmp
   4894 }
   4895 
   4896 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>,  i64*)
   4897 
   4898 
   4899 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
   4900 ;CHECK-LABEL: test_v1i64_post_imm_st1x4:
   4901 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
   4902   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   4903   %tmp = getelementptr i64, i64* %A, i64 4
   4904   ret i64* %tmp
   4905 }
   4906 
   4907 define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
   4908 ;CHECK-LABEL: test_v1i64_post_reg_st1x4:
   4909 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4910   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
   4911   %tmp = getelementptr i64, i64* %A, i64 %inc
   4912   ret i64* %tmp
   4913 }
   4914 
   4915 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>,  i64*)
   4916 
   4917 
   4918 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
   4919 ;CHECK-LABEL: test_v4f32_post_imm_st1x4:
   4920 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
   4921   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   4922   %tmp = getelementptr float, float* %A, i32 16
   4923   ret float* %tmp
   4924 }
   4925 
   4926 define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
   4927 ;CHECK-LABEL: test_v4f32_post_reg_st1x4:
   4928 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4929   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
   4930   %tmp = getelementptr float, float* %A, i64 %inc
   4931   ret float* %tmp
   4932 }
   4933 
   4934 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
   4935 
   4936 
   4937 define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
   4938 ;CHECK-LABEL: test_v2f32_post_imm_st1x4:
   4939 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
   4940   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   4941   %tmp = getelementptr float, float* %A, i32 8
   4942   ret float* %tmp
   4943 }
   4944 
   4945 define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
   4946 ;CHECK-LABEL: test_v2f32_post_reg_st1x4:
   4947 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4948   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
   4949   %tmp = getelementptr float, float* %A, i64 %inc
   4950   ret float* %tmp
   4951 }
   4952 
   4953 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
   4954 
   4955 
   4956 define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
   4957 ;CHECK-LABEL: test_v2f64_post_imm_st1x4:
   4958 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
   4959   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   4960   %tmp = getelementptr double, double* %A, i64 8
   4961   ret double* %tmp
   4962 }
   4963 
   4964 define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
   4965 ;CHECK-LABEL: test_v2f64_post_reg_st1x4:
   4966 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4967   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
   4968   %tmp = getelementptr double, double* %A, i64 %inc
   4969   ret double* %tmp
   4970 }
   4971 
   4972 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>,  double*)
   4973 
   4974 
   4975 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
   4976 ;CHECK-LABEL: test_v1f64_post_imm_st1x4:
   4977 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
   4978   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   4979   %tmp = getelementptr double, double* %A, i64 4
   4980   ret double* %tmp
   4981 }
   4982 
   4983 define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
   4984 ;CHECK-LABEL: test_v1f64_post_reg_st1x4:
   4985 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
   4986   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
   4987   %tmp = getelementptr double, double* %A, i64 %inc
   4988   ret double* %tmp
   4989 }
   4990 
   4991 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
   4992 
   4993 
   4994 define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
   4995   call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
   4996   %tmp = getelementptr i8, i8* %A, i32 2
   4997   ret i8* %tmp
   4998 }
   4999 
   5000 define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
   5001   call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
   5002   %tmp = getelementptr i8, i8* %A, i64 %inc
   5003   ret i8* %tmp
   5004 }
   5005 
   5006 declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
   5007 
   5008 
   5009 define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
   5010 ;CHECK-LABEL: test_v16i8_post_imm_st2lane:
   5011 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
   5012   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   5013   %tmp = getelementptr i8, i8* %A, i32 2
   5014   ret i8* %tmp
   5015 }
   5016 
   5017 define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
   5018 ;CHECK-LABEL: test_v16i8_post_reg_st2lane:
   5019 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
   5020   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
   5021   %tmp = getelementptr i8, i8* %A, i64 %inc
   5022   ret i8* %tmp
   5023 }
   5024 
   5025 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
   5026 
   5027 
   5028 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
   5029 ;CHECK-LABEL: test_v8i8_post_imm_st2lane:
   5030 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
   5031   call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   5032   %tmp = getelementptr i8, i8* %A, i32 2
   5033   ret i8* %tmp
   5034 }
   5035 
   5036 define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
   5037 ;CHECK-LABEL: test_v8i8_post_reg_st2lane:
   5038 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
   5039   call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
   5040   %tmp = getelementptr i8, i8* %A, i64 %inc
   5041   ret i8* %tmp
   5042 }
   5043 
   5044 declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
   5045 
   5046 
   5047 define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
   5048 ;CHECK-LABEL: test_v8i16_post_imm_st2lane:
   5049 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
   5050   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   5051   %tmp = getelementptr i16, i16* %A, i32 2
   5052   ret i16* %tmp
   5053 }
   5054 
   5055 define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
   5056 ;CHECK-LABEL: test_v8i16_post_reg_st2lane:
   5057 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
   5058   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
   5059   %tmp = getelementptr i16, i16* %A, i64 %inc
   5060   ret i16* %tmp
   5061 }
   5062 
   5063 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
   5064 
   5065 
   5066 define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
   5067 ;CHECK-LABEL: test_v4i16_post_imm_st2lane:
   5068 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
   5069   call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   5070   %tmp = getelementptr i16, i16* %A, i32 2
   5071   ret i16* %tmp
   5072 }
   5073 
   5074 define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
   5075 ;CHECK-LABEL: test_v4i16_post_reg_st2lane:
   5076 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
   5077   call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
   5078   %tmp = getelementptr i16, i16* %A, i64 %inc
   5079   ret i16* %tmp
   5080 }
   5081 
   5082 declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
   5083 
   5084 
   5085 define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
   5086 ;CHECK-LABEL: test_v4i32_post_imm_st2lane:
   5087 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
   5088   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   5089   %tmp = getelementptr i32, i32* %A, i32 2
   5090   ret i32* %tmp
   5091 }
   5092 
   5093 define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
   5094 ;CHECK-LABEL: test_v4i32_post_reg_st2lane:
   5095 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   5096   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
   5097   %tmp = getelementptr i32, i32* %A, i64 %inc
   5098   ret i32* %tmp
   5099 }
   5100 
   5101 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
   5102 
   5103 
   5104 define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
   5105 ;CHECK-LABEL: test_v2i32_post_imm_st2lane:
   5106 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
   5107   call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   5108   %tmp = getelementptr i32, i32* %A, i32 2
   5109   ret i32* %tmp
   5110 }
   5111 
   5112 define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
   5113 ;CHECK-LABEL: test_v2i32_post_reg_st2lane:
   5114 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   5115   call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
   5116   %tmp = getelementptr i32, i32* %A, i64 %inc
   5117   ret i32* %tmp
   5118 }
   5119 
   5120 declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
   5121 
   5122 
   5123 define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
   5124 ;CHECK-LABEL: test_v2i64_post_imm_st2lane:
   5125 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
   5126   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   5127   %tmp = getelementptr i64, i64* %A, i64 2
   5128   ret i64* %tmp
   5129 }
   5130 
   5131 define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
   5132 ;CHECK-LABEL: test_v2i64_post_reg_st2lane:
   5133 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   5134   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
   5135   %tmp = getelementptr i64, i64* %A, i64 %inc
   5136   ret i64* %tmp
   5137 }
   5138 
   5139 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
   5140 
   5141 
   5142 define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
   5143 ;CHECK-LABEL: test_v1i64_post_imm_st2lane:
   5144 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
   5145   call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   5146   %tmp = getelementptr i64, i64* %A, i64 2
   5147   ret i64* %tmp
   5148 }
   5149 
   5150 define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
   5151 ;CHECK-LABEL: test_v1i64_post_reg_st2lane:
   5152 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   5153   call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
   5154   %tmp = getelementptr i64, i64* %A, i64 %inc
   5155   ret i64* %tmp
   5156 }
   5157 
   5158 declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
   5159 
   5160 
   5161 define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
   5162 ;CHECK-LABEL: test_v4f32_post_imm_st2lane:
   5163 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
   5164   call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   5165   %tmp = getelementptr float, float* %A, i32 2
   5166   ret float* %tmp
   5167 }
   5168 
   5169 define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
   5170 ;CHECK-LABEL: test_v4f32_post_reg_st2lane:
   5171 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   5172   call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
   5173   %tmp = getelementptr float, float* %A, i64 %inc
   5174   ret float* %tmp
   5175 }
   5176 
   5177 declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
   5178 
   5179 
   5180 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
   5181 ;CHECK-LABEL: test_v2f32_post_imm_st2lane:
   5182 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
   5183   call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   5184   %tmp = getelementptr float, float* %A, i32 2
   5185   ret float* %tmp
   5186 }
   5187 
   5188 define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
   5189 ;CHECK-LABEL: test_v2f32_post_reg_st2lane:
   5190 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
   5191   call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
   5192   %tmp = getelementptr float, float* %A, i64 %inc
   5193   ret float* %tmp
   5194 }
   5195 
   5196 declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
   5197 
   5198 
   5199 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
   5200 ;CHECK-LABEL: test_v2f64_post_imm_st2lane:
   5201 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
   5202   call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   5203   %tmp = getelementptr double, double* %A, i64 2
   5204   ret double* %tmp
   5205 }
   5206 
   5207 define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
   5208 ;CHECK-LABEL: test_v2f64_post_reg_st2lane:
   5209 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   5210   call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
   5211   %tmp = getelementptr double, double* %A, i64 %inc
   5212   ret double* %tmp
   5213 }
   5214 
   5215 declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
   5216 
   5217 
   5218 define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
   5219 ;CHECK-LABEL: test_v1f64_post_imm_st2lane:
   5220 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
   5221   call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   5222   %tmp = getelementptr double, double* %A, i64 2
   5223   ret double* %tmp
   5224 }
   5225 
   5226 define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
   5227 ;CHECK-LABEL: test_v1f64_post_reg_st2lane:
   5228 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
   5229   call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
   5230   %tmp = getelementptr double, double* %A, i64 %inc
   5231   ret double* %tmp
   5232 }
   5233 
   5234 declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
   5235 
   5236 
   5237 define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
   5238 ;CHECK-LABEL: test_v16i8_post_imm_st3lane:
   5239 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
   5240   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   5241   %tmp = getelementptr i8, i8* %A, i32 3
   5242   ret i8* %tmp
   5243 }
   5244 
   5245 define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
   5246 ;CHECK-LABEL: test_v16i8_post_reg_st3lane:
   5247 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5248   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
   5249   %tmp = getelementptr i8, i8* %A, i64 %inc
   5250   ret i8* %tmp
   5251 }
   5252 
   5253 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
   5254 
   5255 
   5256 define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
   5257 ;CHECK-LABEL: test_v8i8_post_imm_st3lane:
   5258 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
   5259   call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   5260   %tmp = getelementptr i8, i8* %A, i32 3
   5261   ret i8* %tmp
   5262 }
   5263 
   5264 define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
   5265 ;CHECK-LABEL: test_v8i8_post_reg_st3lane:
   5266 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5267   call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
   5268   %tmp = getelementptr i8, i8* %A, i64 %inc
   5269   ret i8* %tmp
   5270 }
   5271 
   5272 declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
   5273 
   5274 
   5275 define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
   5276 ;CHECK-LABEL: test_v8i16_post_imm_st3lane:
   5277 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
   5278   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   5279   %tmp = getelementptr i16, i16* %A, i32 3
   5280   ret i16* %tmp
   5281 }
   5282 
   5283 define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
   5284 ;CHECK-LABEL: test_v8i16_post_reg_st3lane:
   5285 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5286   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
   5287   %tmp = getelementptr i16, i16* %A, i64 %inc
   5288   ret i16* %tmp
   5289 }
   5290 
   5291 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
   5292 
   5293 
   5294 define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
   5295 ;CHECK-LABEL: test_v4i16_post_imm_st3lane:
   5296 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
   5297   call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   5298   %tmp = getelementptr i16, i16* %A, i32 3
   5299   ret i16* %tmp
   5300 }
   5301 
   5302 define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
   5303 ;CHECK-LABEL: test_v4i16_post_reg_st3lane:
   5304 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5305   call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
   5306   %tmp = getelementptr i16, i16* %A, i64 %inc
   5307   ret i16* %tmp
   5308 }
   5309 
   5310 declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
   5311 
   5312 
   5313 define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
   5314 ;CHECK-LABEL: test_v4i32_post_imm_st3lane:
   5315 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
   5316   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   5317   %tmp = getelementptr i32, i32* %A, i32 3
   5318   ret i32* %tmp
   5319 }
   5320 
   5321 define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
   5322 ;CHECK-LABEL: test_v4i32_post_reg_st3lane:
   5323 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5324   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
   5325   %tmp = getelementptr i32, i32* %A, i64 %inc
   5326   ret i32* %tmp
   5327 }
   5328 
   5329 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
   5330 
   5331 
   5332 define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
   5333 ;CHECK-LABEL: test_v2i32_post_imm_st3lane:
   5334 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
   5335   call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   5336   %tmp = getelementptr i32, i32* %A, i32 3
   5337   ret i32* %tmp
   5338 }
   5339 
   5340 define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
   5341 ;CHECK-LABEL: test_v2i32_post_reg_st3lane:
   5342 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5343   call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
   5344   %tmp = getelementptr i32, i32* %A, i64 %inc
   5345   ret i32* %tmp
   5346 }
   5347 
   5348 declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
   5349 
   5350 
   5351 define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
   5352 ;CHECK-LABEL: test_v2i64_post_imm_st3lane:
   5353 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
   5354   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   5355   %tmp = getelementptr i64, i64* %A, i64 3
   5356   ret i64* %tmp
   5357 }
   5358 
   5359 define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
   5360 ;CHECK-LABEL: test_v2i64_post_reg_st3lane:
   5361 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5362   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
   5363   %tmp = getelementptr i64, i64* %A, i64 %inc
   5364   ret i64* %tmp
   5365 }
   5366 
   5367 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
   5368 
   5369 
   5370 define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
   5371 ;CHECK-LABEL: test_v1i64_post_imm_st3lane:
   5372 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
   5373   call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   5374   %tmp = getelementptr i64, i64* %A, i64 3
   5375   ret i64* %tmp
   5376 }
   5377 
   5378 define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
   5379 ;CHECK-LABEL: test_v1i64_post_reg_st3lane:
   5380 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5381   call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
   5382   %tmp = getelementptr i64, i64* %A, i64 %inc
   5383   ret i64* %tmp
   5384 }
   5385 
   5386 declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
   5387 
   5388 
   5389 define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
   5390 ;CHECK-LABEL: test_v4f32_post_imm_st3lane:
   5391 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
   5392   call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   5393   %tmp = getelementptr float, float* %A, i32 3
   5394   ret float* %tmp
   5395 }
   5396 
   5397 define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
   5398 ;CHECK-LABEL: test_v4f32_post_reg_st3lane:
   5399 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5400   call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
   5401   %tmp = getelementptr float, float* %A, i64 %inc
   5402   ret float* %tmp
   5403 }
   5404 
   5405 declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
   5406 
   5407 
   5408 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
   5409 ;CHECK-LABEL: test_v2f32_post_imm_st3lane:
   5410 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
   5411   call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   5412   %tmp = getelementptr float, float* %A, i32 3
   5413   ret float* %tmp
   5414 }
   5415 
   5416 define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
   5417 ;CHECK-LABEL: test_v2f32_post_reg_st3lane:
   5418 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5419   call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
   5420   %tmp = getelementptr float, float* %A, i64 %inc
   5421   ret float* %tmp
   5422 }
   5423 
   5424 declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
   5425 
   5426 
   5427 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
   5428 ;CHECK-LABEL: test_v2f64_post_imm_st3lane:
   5429 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
   5430   call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   5431   %tmp = getelementptr double, double* %A, i64 3
   5432   ret double* %tmp
   5433 }
   5434 
   5435 define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
   5436 ;CHECK-LABEL: test_v2f64_post_reg_st3lane:
   5437 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5438   call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
   5439   %tmp = getelementptr double, double* %A, i64 %inc
   5440   ret double* %tmp
   5441 }
   5442 
   5443 declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
   5444 
   5445 
   5446 define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
   5447 ;CHECK-LABEL: test_v1f64_post_imm_st3lane:
   5448 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
   5449   call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   5450   %tmp = getelementptr double, double* %A, i64 3
   5451   ret double* %tmp
   5452 }
   5453 
   5454 define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
   5455 ;CHECK-LABEL: test_v1f64_post_reg_st3lane:
   5456 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
   5457   call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
   5458   %tmp = getelementptr double, double* %A, i64 %inc
   5459   ret double* %tmp
   5460 }
   5461 
   5462 declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
   5463 
   5464 
   5465 define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
   5466 ;CHECK-LABEL: test_v16i8_post_imm_st4lane:
   5467 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
   5468   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   5469   %tmp = getelementptr i8, i8* %A, i32 4
   5470   ret i8* %tmp
   5471 }
   5472 
   5473 define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
   5474 ;CHECK-LABEL: test_v16i8_post_reg_st4lane:
   5475 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5476   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
   5477   %tmp = getelementptr i8, i8* %A, i64 %inc
   5478   ret i8* %tmp
   5479 }
   5480 
   5481 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
   5482 
   5483 
   5484 define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
   5485 ;CHECK-LABEL: test_v8i8_post_imm_st4lane:
   5486 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
   5487   call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   5488   %tmp = getelementptr i8, i8* %A, i32 4
   5489   ret i8* %tmp
   5490 }
   5491 
   5492 define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
   5493 ;CHECK-LABEL: test_v8i8_post_reg_st4lane:
   5494 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5495   call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
   5496   %tmp = getelementptr i8, i8* %A, i64 %inc
   5497   ret i8* %tmp
   5498 }
   5499 
   5500 declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
   5501 
   5502 
   5503 define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
   5504 ;CHECK-LABEL: test_v8i16_post_imm_st4lane:
   5505 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
   5506   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   5507   %tmp = getelementptr i16, i16* %A, i32 4
   5508   ret i16* %tmp
   5509 }
   5510 
   5511 define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
   5512 ;CHECK-LABEL: test_v8i16_post_reg_st4lane:
   5513 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5514   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
   5515   %tmp = getelementptr i16, i16* %A, i64 %inc
   5516   ret i16* %tmp
   5517 }
   5518 
   5519 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
   5520 
   5521 
   5522 define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
   5523 ;CHECK-LABEL: test_v4i16_post_imm_st4lane:
   5524 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
   5525   call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   5526   %tmp = getelementptr i16, i16* %A, i32 4
   5527   ret i16* %tmp
   5528 }
   5529 
   5530 define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
   5531 ;CHECK-LABEL: test_v4i16_post_reg_st4lane:
   5532 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5533   call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
   5534   %tmp = getelementptr i16, i16* %A, i64 %inc
   5535   ret i16* %tmp
   5536 }
   5537 
   5538 declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
   5539 
   5540 
   5541 define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
   5542 ;CHECK-LABEL: test_v4i32_post_imm_st4lane:
   5543 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
   5544   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   5545   %tmp = getelementptr i32, i32* %A, i32 4
   5546   ret i32* %tmp
   5547 }
   5548 
   5549 define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
   5550 ;CHECK-LABEL: test_v4i32_post_reg_st4lane:
   5551 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5552   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
   5553   %tmp = getelementptr i32, i32* %A, i64 %inc
   5554   ret i32* %tmp
   5555 }
   5556 
   5557 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
   5558 
   5559 
   5560 define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
   5561 ;CHECK-LABEL: test_v2i32_post_imm_st4lane:
   5562 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
   5563   call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   5564   %tmp = getelementptr i32, i32* %A, i32 4
   5565   ret i32* %tmp
   5566 }
   5567 
   5568 define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
   5569 ;CHECK-LABEL: test_v2i32_post_reg_st4lane:
   5570 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5571   call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
   5572   %tmp = getelementptr i32, i32* %A, i64 %inc
   5573   ret i32* %tmp
   5574 }
   5575 
   5576 declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
   5577 
   5578 
   5579 define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
   5580 ;CHECK-LABEL: test_v2i64_post_imm_st4lane:
   5581 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
   5582   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   5583   %tmp = getelementptr i64, i64* %A, i64 4
   5584   ret i64* %tmp
   5585 }
   5586 
   5587 define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
   5588 ;CHECK-LABEL: test_v2i64_post_reg_st4lane:
   5589 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5590   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
   5591   %tmp = getelementptr i64, i64* %A, i64 %inc
   5592   ret i64* %tmp
   5593 }
   5594 
   5595 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
   5596 
   5597 
   5598 define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
   5599 ;CHECK-LABEL: test_v1i64_post_imm_st4lane:
   5600 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
   5601   call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   5602   %tmp = getelementptr i64, i64* %A, i64 4
   5603   ret i64* %tmp
   5604 }
   5605 
   5606 define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
   5607 ;CHECK-LABEL: test_v1i64_post_reg_st4lane:
   5608 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5609   call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
   5610   %tmp = getelementptr i64, i64* %A, i64 %inc
   5611   ret i64* %tmp
   5612 }
   5613 
   5614 declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
   5615 
   5616 
   5617 define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
   5618 ;CHECK-LABEL: test_v4f32_post_imm_st4lane:
   5619 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
   5620   call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   5621   %tmp = getelementptr float, float* %A, i32 4
   5622   ret float* %tmp
   5623 }
   5624 
   5625 define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
   5626 ;CHECK-LABEL: test_v4f32_post_reg_st4lane:
   5627 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5628   call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
   5629   %tmp = getelementptr float, float* %A, i64 %inc
   5630   ret float* %tmp
   5631 }
   5632 
   5633 declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
   5634 
   5635 
   5636 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
   5637 ;CHECK-LABEL: test_v2f32_post_imm_st4lane:
   5638 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
   5639   call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   5640   %tmp = getelementptr float, float* %A, i32 4
   5641   ret float* %tmp
   5642 }
   5643 
   5644 define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
   5645 ;CHECK-LABEL: test_v2f32_post_reg_st4lane:
   5646 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5647   call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
   5648   %tmp = getelementptr float, float* %A, i64 %inc
   5649   ret float* %tmp
   5650 }
   5651 
   5652 declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
   5653 
   5654 
   5655 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
   5656 ;CHECK-LABEL: test_v2f64_post_imm_st4lane:
   5657 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
   5658   call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   5659   %tmp = getelementptr double, double* %A, i64 4
   5660   ret double* %tmp
   5661 }
   5662 
   5663 define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
   5664 ;CHECK-LABEL: test_v2f64_post_reg_st4lane:
   5665 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5666   call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
   5667   %tmp = getelementptr double, double* %A, i64 %inc
   5668   ret double* %tmp
   5669 }
   5670 
   5671 declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
   5672 
   5673 
   5674 define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
   5675 ;CHECK-LABEL: test_v1f64_post_imm_st4lane:
   5676 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
   5677   call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   5678   %tmp = getelementptr double, double* %A, i64 4
   5679   ret double* %tmp
   5680 }
   5681 
   5682 define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
   5683 ;CHECK-LABEL: test_v1f64_post_reg_st4lane:
   5684 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
   5685   call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
   5686   %tmp = getelementptr double, double* %A, i64 %inc
   5687   ret double* %tmp
   5688 }
   5689 
   5690 declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
   5691 
   5692 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
   5693 ; CHECK-LABEL: test_v16i8_post_imm_ld1r:
   5694 ; CHECK: ld1r.16b { v0 }, [x0], #1
   5695   %tmp1 = load i8, i8* %bar
   5696   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   5697   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
   5698   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
   5699   %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
   5700   %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
   5701   %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
   5702   %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
   5703   %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
   5704   %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
   5705   %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
   5706   %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
   5707   %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
   5708   %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
   5709   %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
   5710   %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
   5711   %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
   5712   %tmp18 = getelementptr i8, i8* %bar, i64 1
   5713   store i8* %tmp18, i8** %ptr
   5714   ret <16 x i8> %tmp17
   5715 }
   5716 
   5717 define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
   5718 ; CHECK-LABEL: test_v16i8_post_reg_ld1r:
   5719 ; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
   5720   %tmp1 = load i8, i8* %bar
   5721   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   5722   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
   5723   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
   5724   %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
   5725   %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
   5726   %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
   5727   %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
   5728   %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
   5729   %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
   5730   %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
   5731   %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
   5732   %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
   5733   %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
   5734   %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
   5735   %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
   5736   %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
   5737   %tmp18 = getelementptr i8, i8* %bar, i64 %inc
   5738   store i8* %tmp18, i8** %ptr
   5739   ret <16 x i8> %tmp17
   5740 }
   5741 
   5742 define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
   5743 ; CHECK-LABEL: test_v8i8_post_imm_ld1r:
   5744 ; CHECK: ld1r.8b { v0 }, [x0], #1
   5745   %tmp1 = load i8, i8* %bar
   5746   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   5747   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
   5748   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
   5749   %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
   5750   %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
   5751   %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
   5752   %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
   5753   %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
   5754   %tmp10 = getelementptr i8, i8* %bar, i64 1
   5755   store i8* %tmp10, i8** %ptr
   5756   ret <8 x i8> %tmp9
   5757 }
   5758 
   5759 define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
   5760 ; CHECK-LABEL: test_v8i8_post_reg_ld1r:
   5761 ; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
   5762   %tmp1 = load i8, i8* %bar
   5763   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   5764   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
   5765   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
   5766   %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
   5767   %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
   5768   %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
   5769   %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
   5770   %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
   5771   %tmp10 = getelementptr i8, i8* %bar, i64 %inc
   5772   store i8* %tmp10, i8** %ptr
   5773   ret <8 x i8> %tmp9
   5774 }
   5775 
   5776 define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
   5777 ; CHECK-LABEL: test_v8i16_post_imm_ld1r:
   5778 ; CHECK: ld1r.8h { v0 }, [x0], #2
   5779   %tmp1 = load i16, i16* %bar
   5780   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   5781   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
   5782   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
   5783   %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
   5784   %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
   5785   %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
   5786   %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
   5787   %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
   5788   %tmp10 = getelementptr i16, i16* %bar, i64 1
   5789   store i16* %tmp10, i16** %ptr
   5790   ret <8 x i16> %tmp9
   5791 }
   5792 
   5793 define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
   5794 ; CHECK-LABEL: test_v8i16_post_reg_ld1r:
   5795 ; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
   5796   %tmp1 = load i16, i16* %bar
   5797   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   5798   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
   5799   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
   5800   %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
   5801   %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
   5802   %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
   5803   %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
   5804   %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
   5805   %tmp10 = getelementptr i16, i16* %bar, i64 %inc
   5806   store i16* %tmp10, i16** %ptr
   5807   ret <8 x i16> %tmp9
   5808 }
   5809 
   5810 define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
   5811 ; CHECK-LABEL: test_v4i16_post_imm_ld1r:
   5812 ; CHECK: ld1r.4h { v0 }, [x0], #2
   5813   %tmp1 = load i16, i16* %bar
   5814   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   5815   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
   5816   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
   5817   %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
   5818   %tmp6 = getelementptr i16, i16* %bar, i64 1
   5819   store i16* %tmp6, i16** %ptr
   5820   ret <4 x i16> %tmp5
   5821 }
   5822 
   5823 define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
   5824 ; CHECK-LABEL: test_v4i16_post_reg_ld1r:
   5825 ; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
   5826   %tmp1 = load i16, i16* %bar
   5827   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   5828   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
   5829   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
   5830   %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
   5831   %tmp6 = getelementptr i16, i16* %bar, i64 %inc
   5832   store i16* %tmp6, i16** %ptr
   5833   ret <4 x i16> %tmp5
   5834 }
   5835 
   5836 define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
   5837 ; CHECK-LABEL: test_v4i32_post_imm_ld1r:
   5838 ; CHECK: ld1r.4s { v0 }, [x0], #4
   5839   %tmp1 = load i32, i32* %bar
   5840   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   5841   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
   5842   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
   5843   %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
   5844   %tmp6 = getelementptr i32, i32* %bar, i64 1
   5845   store i32* %tmp6, i32** %ptr
   5846   ret <4 x i32> %tmp5
   5847 }
   5848 
   5849 define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
   5850 ; CHECK-LABEL: test_v4i32_post_reg_ld1r:
   5851 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
   5852   %tmp1 = load i32, i32* %bar
   5853   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   5854   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
   5855   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
   5856   %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
   5857   %tmp6 = getelementptr i32, i32* %bar, i64 %inc
   5858   store i32* %tmp6, i32** %ptr
   5859   ret <4 x i32> %tmp5
   5860 }
   5861 
   5862 define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
   5863 ; CHECK-LABEL: test_v2i32_post_imm_ld1r:
   5864 ; CHECK: ld1r.2s { v0 }, [x0], #4
   5865   %tmp1 = load i32, i32* %bar
   5866   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   5867   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
   5868   %tmp4 = getelementptr i32, i32* %bar, i64 1
   5869   store i32* %tmp4, i32** %ptr
   5870   ret <2 x i32> %tmp3
   5871 }
   5872 
   5873 define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
   5874 ; CHECK-LABEL: test_v2i32_post_reg_ld1r:
   5875 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
   5876   %tmp1 = load i32, i32* %bar
   5877   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   5878   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
   5879   %tmp4 = getelementptr i32, i32* %bar, i64 %inc
   5880   store i32* %tmp4, i32** %ptr
   5881   ret <2 x i32> %tmp3
   5882 }
   5883 
   5884 define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
   5885 ; CHECK-LABEL: test_v2i64_post_imm_ld1r:
   5886 ; CHECK: ld1r.2d { v0 }, [x0], #8
   5887   %tmp1 = load i64, i64* %bar
   5888   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   5889   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
   5890   %tmp4 = getelementptr i64, i64* %bar, i64 1
   5891   store i64* %tmp4, i64** %ptr
   5892   ret <2 x i64> %tmp3
   5893 }
   5894 
   5895 define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
   5896 ; CHECK-LABEL: test_v2i64_post_reg_ld1r:
   5897 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
   5898   %tmp1 = load i64, i64* %bar
   5899   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   5900   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
   5901   %tmp4 = getelementptr i64, i64* %bar, i64 %inc
   5902   store i64* %tmp4, i64** %ptr
   5903   ret <2 x i64> %tmp3
   5904 }
   5905 
   5906 define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
   5907 ; CHECK-LABEL: test_v4f32_post_imm_ld1r:
   5908 ; CHECK: ld1r.4s { v0 }, [x0], #4
   5909   %tmp1 = load float, float* %bar
   5910   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   5911   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
   5912   %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
   5913   %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
   5914   %tmp6 = getelementptr float, float* %bar, i64 1
   5915   store float* %tmp6, float** %ptr
   5916   ret <4 x float> %tmp5
   5917 }
   5918 
   5919 define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
   5920 ; CHECK-LABEL: test_v4f32_post_reg_ld1r:
   5921 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
   5922   %tmp1 = load float, float* %bar
   5923   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   5924   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
   5925   %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
   5926   %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
   5927   %tmp6 = getelementptr float, float* %bar, i64 %inc
   5928   store float* %tmp6, float** %ptr
   5929   ret <4 x float> %tmp5
   5930 }
   5931 
   5932 define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
   5933 ; CHECK-LABEL: test_v2f32_post_imm_ld1r:
   5934 ; CHECK: ld1r.2s { v0 }, [x0], #4
   5935   %tmp1 = load float, float* %bar
   5936   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   5937   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
   5938   %tmp4 = getelementptr float, float* %bar, i64 1
   5939   store float* %tmp4, float** %ptr
   5940   ret <2 x float> %tmp3
   5941 }
   5942 
   5943 define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
   5944 ; CHECK-LABEL: test_v2f32_post_reg_ld1r:
   5945 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
   5946   %tmp1 = load float, float* %bar
   5947   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   5948   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
   5949   %tmp4 = getelementptr float, float* %bar, i64 %inc
   5950   store float* %tmp4, float** %ptr
   5951   ret <2 x float> %tmp3
   5952 }
   5953 
   5954 define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
   5955 ; CHECK-LABEL: test_v2f64_post_imm_ld1r:
   5956 ; CHECK: ld1r.2d { v0 }, [x0], #8
   5957   %tmp1 = load double, double* %bar
   5958   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   5959   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
   5960   %tmp4 = getelementptr double, double* %bar, i64 1
   5961   store double* %tmp4, double** %ptr
   5962   ret <2 x double> %tmp3
   5963 }
   5964 
   5965 define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
   5966 ; CHECK-LABEL: test_v2f64_post_reg_ld1r:
   5967 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
   5968   %tmp1 = load double, double* %bar
   5969   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   5970   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
   5971   %tmp4 = getelementptr double, double* %bar, i64 %inc
   5972   store double* %tmp4, double** %ptr
   5973   ret <2 x double> %tmp3
   5974 }
   5975 
   5976 define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
   5977 ; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
   5978 ; CHECK: ld1.b { v0 }[1], [x0], #1
   5979   %tmp1 = load i8, i8* %bar
   5980   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   5981   %tmp3 = getelementptr i8, i8* %bar, i64 1
   5982   store i8* %tmp3, i8** %ptr
   5983   ret <16 x i8> %tmp2
   5984 }
   5985 
   5986 define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
   5987 ; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
   5988 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
   5989   %tmp1 = load i8, i8* %bar
   5990   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   5991   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
   5992   store i8* %tmp3, i8** %ptr
   5993   ret <16 x i8> %tmp2
   5994 }
   5995 
   5996 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
   5997 ; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
   5998 ; CHECK: ld1.b { v0 }[1], [x0], #1
   5999   %tmp1 = load i8, i8* %bar
   6000   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   6001   %tmp3 = getelementptr i8, i8* %bar, i64 1
   6002   store i8* %tmp3, i8** %ptr
   6003   ret <8 x i8> %tmp2
   6004 }
   6005 
   6006 define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
   6007 ; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
   6008 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
   6009   %tmp1 = load i8, i8* %bar
   6010   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   6011   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
   6012   store i8* %tmp3, i8** %ptr
   6013   ret <8 x i8> %tmp2
   6014 }
   6015 
   6016 define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
   6017 ; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
   6018 ; CHECK: ld1.h { v0 }[1], [x0], #2
   6019   %tmp1 = load i16, i16* %bar
   6020   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   6021   %tmp3 = getelementptr i16, i16* %bar, i64 1
   6022   store i16* %tmp3, i16** %ptr
   6023   ret <8 x i16> %tmp2
   6024 }
   6025 
   6026 define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
   6027 ; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
   6028 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
   6029   %tmp1 = load i16, i16* %bar
   6030   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   6031   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
   6032   store i16* %tmp3, i16** %ptr
   6033   ret <8 x i16> %tmp2
   6034 }
   6035 
   6036 define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
   6037 ; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
   6038 ; CHECK: ld1.h { v0 }[1], [x0], #2
   6039   %tmp1 = load i16, i16* %bar
   6040   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   6041   %tmp3 = getelementptr i16, i16* %bar, i64 1
   6042   store i16* %tmp3, i16** %ptr
   6043   ret <4 x i16> %tmp2
   6044 }
   6045 
   6046 define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
   6047 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
   6048 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
   6049   %tmp1 = load i16, i16* %bar
   6050   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   6051   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
   6052   store i16* %tmp3, i16** %ptr
   6053   ret <4 x i16> %tmp2
   6054 }
   6055 
   6056 define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
   6057 ; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
   6058 ; CHECK: ld1.s { v0 }[1], [x0], #4
   6059   %tmp1 = load i32, i32* %bar
   6060   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   6061   %tmp3 = getelementptr i32, i32* %bar, i64 1
   6062   store i32* %tmp3, i32** %ptr
   6063   ret <4 x i32> %tmp2
   6064 }
   6065 
   6066 define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
   6067 ; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
   6068 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
   6069   %tmp1 = load i32, i32* %bar
   6070   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   6071   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
   6072   store i32* %tmp3, i32** %ptr
   6073   ret <4 x i32> %tmp2
   6074 }
   6075 
   6076 define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
   6077 ; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
   6078 ; CHECK: ld1.s { v0 }[1], [x0], #4
   6079   %tmp1 = load i32, i32* %bar
   6080   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   6081   %tmp3 = getelementptr i32, i32* %bar, i64 1
   6082   store i32* %tmp3, i32** %ptr
   6083   ret <2 x i32> %tmp2
   6084 }
   6085 
   6086 define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
   6087 ; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
   6088 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
   6089   %tmp1 = load i32, i32* %bar
   6090   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   6091   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
   6092   store i32* %tmp3, i32** %ptr
   6093   ret <2 x i32> %tmp2
   6094 }
   6095 
   6096 define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
   6097 ; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
   6098 ; CHECK: ld1.d { v0 }[1], [x0], #8
   6099   %tmp1 = load i64, i64* %bar
   6100   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   6101   %tmp3 = getelementptr i64, i64* %bar, i64 1
   6102   store i64* %tmp3, i64** %ptr
   6103   ret <2 x i64> %tmp2
   6104 }
   6105 
   6106 define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
   6107 ; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
   6108 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
   6109   %tmp1 = load i64, i64* %bar
   6110   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   6111   %tmp3 = getelementptr i64, i64* %bar, i64 %inc
   6112   store i64* %tmp3, i64** %ptr
   6113   ret <2 x i64> %tmp2
   6114 }
   6115 
   6116 define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
   6117 ; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
   6118 ; CHECK: ld1.s { v0 }[1], [x0], #4
   6119   %tmp1 = load float, float* %bar
   6120   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   6121   %tmp3 = getelementptr float, float* %bar, i64 1
   6122   store float* %tmp3, float** %ptr
   6123   ret <4 x float> %tmp2
   6124 }
   6125 
   6126 define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
   6127 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
   6128 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
   6129   %tmp1 = load float, float* %bar
   6130   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   6131   %tmp3 = getelementptr float, float* %bar, i64 %inc
   6132   store float* %tmp3, float** %ptr
   6133   ret <4 x float> %tmp2
   6134 }
   6135 
   6136 define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
   6137 ; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
   6138 ; CHECK: ld1.s { v0 }[1], [x0], #4
   6139   %tmp1 = load float, float* %bar
   6140   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   6141   %tmp3 = getelementptr float, float* %bar, i64 1
   6142   store float* %tmp3, float** %ptr
   6143   ret <2 x float> %tmp2
   6144 }
   6145 
   6146 define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
   6147 ; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
   6148 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
   6149   %tmp1 = load float, float* %bar
   6150   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   6151   %tmp3 = getelementptr float, float* %bar, i64 %inc
   6152   store float* %tmp3, float** %ptr
   6153   ret <2 x float> %tmp2
   6154 }
   6155 
   6156 define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
   6157 ; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
   6158 ; CHECK: ld1.d { v0 }[1], [x0], #8
   6159   %tmp1 = load double, double* %bar
   6160   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   6161   %tmp3 = getelementptr double, double* %bar, i64 1
   6162   store double* %tmp3, double** %ptr
   6163   ret <2 x double> %tmp2
   6164 }
   6165 
   6166 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
   6167 ; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
   6168 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
   6169   %tmp1 = load double, double* %bar
   6170   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   6171   %tmp3 = getelementptr double, double* %bar, i64 %inc
   6172   store double* %tmp3, double** %ptr
   6173   ret <2 x double> %tmp2
   6174 }
   6175 
   6176 ; Check for dependencies between the vector and the scalar load.
   6177 define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2) {
   6178 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
   6179 ; CHECK: BB#0:
   6180 ; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
   6181 ; CHECK-NEXT: movi.2d v0, #0000000000000000
   6182 ; CHECK-NEXT: str q0, [x3]
   6183 ; CHECK-NEXT: ldr q0, [x4]
   6184 ; CHECK-NEXT: ins.s v0[1], v[[LD]][0]
   6185 ; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
   6186 ; CHECK-NEXT: str [[POST]], [x1]
   6187 ; CHECK-NEXT: ret
   6188   %tmp1 = load float, float* %bar
   6189   store <4 x float> zeroinitializer, <4 x float>* %dep_ptr_1, align 16
   6190   %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
   6191   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   6192   %tmp3 = getelementptr float, float* %bar, i64 %inc
   6193   store float* %tmp3, float** %ptr
   6194   ret <4 x float> %tmp2
   6195 }
   6196 
   6197 ; Make sure that we test the narrow V64 code path.
   6198 ; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
   6199 ; widened to 128-bit before the LD1LANEpost combine has the chance to run,
   6200 ; making it avoid narrow vector types.
   6201 ; One way to trick that combine into running early is to force the vector ops
   6202 ; legalizer to run.  We achieve that using the ctpop.
   6203 ; PR23265
   6204 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
   6205 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
   6206 ; CHECK: ld1.h  { v0 }[1], [x0], x{{[0-9]+}}
   6207   %tmp1 = load i16, i16* %bar
   6208   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   6209   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
   6210   store i16* %tmp3, i16** %ptr
   6211   %dl =  load <2 x i32>,  <2 x i32>* %d
   6212   %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
   6213   store <2 x i32> %dr, <2 x i32>* %d
   6214   ret <4 x i16> %tmp2
   6215 }
   6216 
   6217 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
   6218 
   6219 ; CHECK-LABEL: test_ld1lane_build:
   6220 ; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[0], [x0]
   6221 ; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[1], [x1]
   6222 ; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[0], [x2]
   6223 ; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[1], [x3]
   6224 ; CHECK: sub.2s v[[REGNUM2:[0-9]+]], [[REG0]], [[REG1]]
   6225 ; CHECK-NEXT: str d[[REGNUM2]], [x4]
   6226 ; CHECK-NEXT: ret
   6227 define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
   6228   %load0 = load i32, i32* %ptr0, align 4
   6229   %load1 = load i32, i32* %ptr1, align 4
   6230   %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
   6231   %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
   6232 
   6233   %load2 = load i32, i32* %ptr2, align 4
   6234   %load3 = load i32, i32* %ptr3, align 4
   6235   %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
   6236   %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
   6237 
   6238   %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
   6239   store <2 x i32> %sub, <2 x i32>* %out, align 16
   6240   ret void
   6241 }
   6242