Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
      2 
      3 define void @st1lane_16b(<16 x i8> %A, i8* %D) {
      4 ; CHECK-LABEL: st1lane_16b
      5 ; CHECK: st1.b
      6   %tmp = extractelement <16 x i8> %A, i32 1
      7   store i8 %tmp, i8* %D
      8   ret void
      9 }
     10 
     11 define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
     12 ; CHECK-LABEL: st1lane_ro_16b
     13 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     14 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
     15   %ptr = getelementptr i8, i8* %D, i64 %offset
     16   %tmp = extractelement <16 x i8> %A, i32 1
     17   store i8 %tmp, i8* %ptr
     18   ret void
     19 }
     20 
     21 define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
     22 ; CHECK-LABEL: st1lane0_ro_16b
     23 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     24 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
     25   %ptr = getelementptr i8, i8* %D, i64 %offset
     26   %tmp = extractelement <16 x i8> %A, i32 0
     27   store i8 %tmp, i8* %ptr
     28   ret void
     29 }
     30 
     31 define void @st1lane_8h(<8 x i16> %A, i16* %D) {
     32 ; CHECK-LABEL: st1lane_8h
     33 ; CHECK: st1.h
     34   %tmp = extractelement <8 x i16> %A, i32 1
     35   store i16 %tmp, i16* %D
     36   ret void
     37 }
     38 
     39 define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
     40 ; CHECK-LABEL: st1lane_ro_8h
     41 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     42 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
     43   %ptr = getelementptr i16, i16* %D, i64 %offset
     44   %tmp = extractelement <8 x i16> %A, i32 1
     45   store i16 %tmp, i16* %ptr
     46   ret void
     47 }
     48 
     49 define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
     50 ; CHECK-LABEL: st1lane0_ro_8h
     51 ; CHECK: str h0, [x0, x1, lsl #1]
     52   %ptr = getelementptr i16, i16* %D, i64 %offset
     53   %tmp = extractelement <8 x i16> %A, i32 0
     54   store i16 %tmp, i16* %ptr
     55   ret void
     56 }
     57 
     58 define void @st1lane_4s(<4 x i32> %A, i32* %D) {
     59 ; CHECK-LABEL: st1lane_4s
     60 ; CHECK: st1.s
     61   %tmp = extractelement <4 x i32> %A, i32 1
     62   store i32 %tmp, i32* %D
     63   ret void
     64 }
     65 
     66 define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
     67 ; CHECK-LABEL: st1lane_ro_4s
     68 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     69 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
     70   %ptr = getelementptr i32, i32* %D, i64 %offset
     71   %tmp = extractelement <4 x i32> %A, i32 1
     72   store i32 %tmp, i32* %ptr
     73   ret void
     74 }
     75 
     76 define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
     77 ; CHECK-LABEL: st1lane0_ro_4s
     78 ; CHECK: str s0, [x0, x1, lsl #2]
     79   %ptr = getelementptr i32, i32* %D, i64 %offset
     80   %tmp = extractelement <4 x i32> %A, i32 0
     81   store i32 %tmp, i32* %ptr
     82   ret void
     83 }
     84 
     85 define void @st1lane_4s_float(<4 x float> %A, float* %D) {
     86 ; CHECK-LABEL: st1lane_4s_float
     87 ; CHECK: st1.s
     88   %tmp = extractelement <4 x float> %A, i32 1
     89   store float %tmp, float* %D
     90   ret void
     91 }
     92 
     93 define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
     94 ; CHECK-LABEL: st1lane_ro_4s_float
     95 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     96 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
     97   %ptr = getelementptr float, float* %D, i64 %offset
     98   %tmp = extractelement <4 x float> %A, i32 1
     99   store float %tmp, float* %ptr
    100   ret void
    101 }
    102 
    103 define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
    104 ; CHECK-LABEL: st1lane0_ro_4s_float
    105 ; CHECK: str s0, [x0, x1, lsl #2]
    106   %ptr = getelementptr float, float* %D, i64 %offset
    107   %tmp = extractelement <4 x float> %A, i32 0
    108   store float %tmp, float* %ptr
    109   ret void
    110 }
    111 
    112 define void @st1lane_2d(<2 x i64> %A, i64* %D) {
    113 ; CHECK-LABEL: st1lane_2d
    114 ; CHECK: st1.d
    115   %tmp = extractelement <2 x i64> %A, i32 1
    116   store i64 %tmp, i64* %D
    117   ret void
    118 }
    119 
    120 define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
    121 ; CHECK-LABEL: st1lane_ro_2d
    122 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    123 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
    124   %ptr = getelementptr i64, i64* %D, i64 %offset
    125   %tmp = extractelement <2 x i64> %A, i32 1
    126   store i64 %tmp, i64* %ptr
    127   ret void
    128 }
    129 
    130 define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
    131 ; CHECK-LABEL: st1lane0_ro_2d
    132 ; CHECK: str d0, [x0, x1, lsl #3]
    133   %ptr = getelementptr i64, i64* %D, i64 %offset
    134   %tmp = extractelement <2 x i64> %A, i32 0
    135   store i64 %tmp, i64* %ptr
    136   ret void
    137 }
    138 
    139 define void @st1lane_2d_double(<2 x double> %A, double* %D) {
    140 ; CHECK-LABEL: st1lane_2d_double
    141 ; CHECK: st1.d
    142   %tmp = extractelement <2 x double> %A, i32 1
    143   store double %tmp, double* %D
    144   ret void
    145 }
    146 
    147 define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
    148 ; CHECK-LABEL: st1lane_ro_2d_double
    149 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    150 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
    151   %ptr = getelementptr double, double* %D, i64 %offset
    152   %tmp = extractelement <2 x double> %A, i32 1
    153   store double %tmp, double* %ptr
    154   ret void
    155 }
    156 
    157 define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
    158 ; CHECK-LABEL: st1lane0_ro_2d_double
    159 ; CHECK: str d0, [x0, x1, lsl #3]
    160   %ptr = getelementptr double, double* %D, i64 %offset
    161   %tmp = extractelement <2 x double> %A, i32 0
    162   store double %tmp, double* %ptr
    163   ret void
    164 }
    165 
    166 define void @st1lane_8b(<8 x i8> %A, i8* %D) {
    167 ; CHECK-LABEL: st1lane_8b
    168 ; CHECK: st1.b
    169   %tmp = extractelement <8 x i8> %A, i32 1
    170   store i8 %tmp, i8* %D
    171   ret void
    172 }
    173 
    174 define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
    175 ; CHECK-LABEL: st1lane_ro_8b
    176 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    177 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
    178   %ptr = getelementptr i8, i8* %D, i64 %offset
    179   %tmp = extractelement <8 x i8> %A, i32 1
    180   store i8 %tmp, i8* %ptr
    181   ret void
    182 }
    183 
    184 define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
    185 ; CHECK-LABEL: st1lane0_ro_8b
    186 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    187 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
    188   %ptr = getelementptr i8, i8* %D, i64 %offset
    189   %tmp = extractelement <8 x i8> %A, i32 0
    190   store i8 %tmp, i8* %ptr
    191   ret void
    192 }
    193 
    194 define void @st1lane_4h(<4 x i16> %A, i16* %D) {
    195 ; CHECK-LABEL: st1lane_4h
    196 ; CHECK: st1.h
    197   %tmp = extractelement <4 x i16> %A, i32 1
    198   store i16 %tmp, i16* %D
    199   ret void
    200 }
    201 
    202 define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
    203 ; CHECK-LABEL: st1lane_ro_4h
    204 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    205 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
    206   %ptr = getelementptr i16, i16* %D, i64 %offset
    207   %tmp = extractelement <4 x i16> %A, i32 1
    208   store i16 %tmp, i16* %ptr
    209   ret void
    210 }
    211 
    212 define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
    213 ; CHECK-LABEL: st1lane0_ro_4h
    214 ; CHECK: str h0, [x0, x1, lsl #1]
    215   %ptr = getelementptr i16, i16* %D, i64 %offset
    216   %tmp = extractelement <4 x i16> %A, i32 0
    217   store i16 %tmp, i16* %ptr
    218   ret void
    219 }
    220 
    221 define void @st1lane_2s(<2 x i32> %A, i32* %D) {
    222 ; CHECK-LABEL: st1lane_2s
    223 ; CHECK: st1.s
    224   %tmp = extractelement <2 x i32> %A, i32 1
    225   store i32 %tmp, i32* %D
    226   ret void
    227 }
    228 
    229 define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
    230 ; CHECK-LABEL: st1lane_ro_2s
    231 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    232 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    233   %ptr = getelementptr i32, i32* %D, i64 %offset
    234   %tmp = extractelement <2 x i32> %A, i32 1
    235   store i32 %tmp, i32* %ptr
    236   ret void
    237 }
    238 
    239 define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
    240 ; CHECK-LABEL: st1lane0_ro_2s
    241 ; CHECK: str s0, [x0, x1, lsl #2]
    242   %ptr = getelementptr i32, i32* %D, i64 %offset
    243   %tmp = extractelement <2 x i32> %A, i32 0
    244   store i32 %tmp, i32* %ptr
    245   ret void
    246 }
    247 
    248 define void @st1lane_2s_float(<2 x float> %A, float* %D) {
    249 ; CHECK-LABEL: st1lane_2s_float
    250 ; CHECK: st1.s
    251   %tmp = extractelement <2 x float> %A, i32 1
    252   store float %tmp, float* %D
    253   ret void
    254 }
    255 
    256 define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
    257 ; CHECK-LABEL: st1lane_ro_2s_float
    258 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    259 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    260   %ptr = getelementptr float, float* %D, i64 %offset
    261   %tmp = extractelement <2 x float> %A, i32 1
    262   store float %tmp, float* %ptr
    263   ret void
    264 }
    265 
    266 define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
    267 ; CHECK-LABEL: st1lane0_ro_2s_float
    268 ; CHECK: str s0, [x0, x1, lsl #2]
    269   %ptr = getelementptr float, float* %D, i64 %offset
    270   %tmp = extractelement <2 x float> %A, i32 0
    271   store float %tmp, float* %ptr
    272   ret void
    273 }
    274 
    275 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
    276 ; CHECK-LABEL: st2lane_16b
    277 ; CHECK: st2.b
    278   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
    279   ret void
    280 }
    281 
    282 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
    283 ; CHECK-LABEL: st2lane_8h
    284 ; CHECK: st2.h
    285   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
    286   ret void
    287 }
    288 
    289 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
    290 ; CHECK-LABEL: st2lane_4s
    291 ; CHECK: st2.s
    292   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
    293   ret void
    294 }
    295 
    296 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
    297 ; CHECK-LABEL: st2lane_2d
    298 ; CHECK: st2.d
    299   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
    300   ret void
    301 }
    302 
    303 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    304 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    305 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    306 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    307 
    308 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
    309 ; CHECK-LABEL: st3lane_16b
    310 ; CHECK: st3.b
    311   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
    312   ret void
    313 }
    314 
    315 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
    316 ; CHECK-LABEL: st3lane_8h
    317 ; CHECK: st3.h
    318   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
    319   ret void
    320 }
    321 
    322 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
    323 ; CHECK-LABEL: st3lane_4s
    324 ; CHECK: st3.s
    325   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
    326   ret void
    327 }
    328 
    329 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
    330 ; CHECK-LABEL: st3lane_2d
    331 ; CHECK: st3.d
    332   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
    333   ret void
    334 }
    335 
    336 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    337 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    338 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    339 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    340 
    341 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
    342 ; CHECK-LABEL: st4lane_16b
    343 ; CHECK: st4.b
    344   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
    345   ret void
    346 }
    347 
    348 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
    349 ; CHECK-LABEL: st4lane_8h
    350 ; CHECK: st4.h
    351   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
    352   ret void
    353 }
    354 
    355 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
    356 ; CHECK-LABEL: st4lane_4s
    357 ; CHECK: st4.s
    358   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
    359   ret void
    360 }
    361 
    362 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
    363 ; CHECK-LABEL: st4lane_2d
    364 ; CHECK: st4.d
    365   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
    366   ret void
    367 }
    368 
    369 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    370 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    371 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    372 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    373 
    374 
    375 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
    376 ; CHECK-LABEL: st2_8b
    377 ; CHECK: st2.8b
    378 	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
    379 	ret void
    380 }
    381 
    382 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
    383 ; CHECK-LABEL: st3_8b
    384 ; CHECK: st3.8b
    385 	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
    386 	ret void
    387 }
    388 
    389 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
    390 ; CHECK-LABEL: st4_8b
    391 ; CHECK: st4.8b
    392 	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
    393 	ret void
    394 }
    395 
    396 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
    397 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    398 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    399 
    400 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
    401 ; CHECK-LABEL: st2_16b
    402 ; CHECK: st2.16b
    403 	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
    404 	ret void
    405 }
    406 
    407 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
    408 ; CHECK-LABEL: st3_16b
    409 ; CHECK: st3.16b
    410 	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
    411 	ret void
    412 }
    413 
    414 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
    415 ; CHECK-LABEL: st4_16b
    416 ; CHECK: st4.16b
    417 	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
    418 	ret void
    419 }
    420 
    421 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
    422 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    423 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    424 
    425 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
    426 ; CHECK-LABEL: st2_4h
    427 ; CHECK: st2.4h
    428 	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
    429 	ret void
    430 }
    431 
    432 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
    433 ; CHECK-LABEL: st3_4h
    434 ; CHECK: st3.4h
    435 	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
    436 	ret void
    437 }
    438 
    439 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
    440 ; CHECK-LABEL: st4_4h
    441 ; CHECK: st4.4h
    442 	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
    443 	ret void
    444 }
    445 
    446 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
    447 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    448 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    449 
    450 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
    451 ; CHECK-LABEL: st2_8h
    452 ; CHECK: st2.8h
    453 	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
    454 	ret void
    455 }
    456 
    457 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
    458 ; CHECK-LABEL: st3_8h
    459 ; CHECK: st3.8h
    460 	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
    461 	ret void
    462 }
    463 
    464 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
    465 ; CHECK-LABEL: st4_8h
    466 ; CHECK: st4.8h
    467 	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
    468 	ret void
    469 }
    470 
    471 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
    472 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    473 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    474 
    475 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
    476 ; CHECK-LABEL: st2_2s
    477 ; CHECK: st2.2s
    478 	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
    479 	ret void
    480 }
    481 
    482 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
    483 ; CHECK-LABEL: st3_2s
    484 ; CHECK: st3.2s
    485 	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
    486 	ret void
    487 }
    488 
    489 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
    490 ; CHECK-LABEL: st4_2s
    491 ; CHECK: st4.2s
    492 	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
    493 	ret void
    494 }
    495 
    496 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
    497 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    498 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    499 
    500 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
    501 ; CHECK-LABEL: st2_4s
    502 ; CHECK: st2.4s
    503 	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
    504 	ret void
    505 }
    506 
    507 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
    508 ; CHECK-LABEL: st3_4s
    509 ; CHECK: st3.4s
    510 	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
    511 	ret void
    512 }
    513 
    514 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
    515 ; CHECK-LABEL: st4_4s
    516 ; CHECK: st4.4s
    517 	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
    518 	ret void
    519 }
    520 
    521 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
    522 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    523 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    524 
    525 ; If there's only one element, st2/3/4 don't make much sense, stick to st1.
    526 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
    527 ; CHECK-LABEL: st2_1d
    528 ; CHECK: st1.1d
    529 	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
    530 	ret void
    531 }
    532 
    533 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
    534 ; CHECK-LABEL: st3_1d
    535 ; CHECK: st1.1d
    536 	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
    537 	ret void
    538 }
    539 
    540 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
    541 ; CHECK-LABEL: st4_1d
    542 ; CHECK: st1.1d
    543 	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
    544 	ret void
    545 }
    546 
    547 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
    548 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    549 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    550 
    551 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
    552 ; CHECK-LABEL: st2_2d
    553 ; CHECK: st2.2d
    554 	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
    555 	ret void
    556 }
    557 
    558 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
    559 ; CHECK-LABEL: st3_2d
    560 ; CHECK: st3.2d
    561 	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
    562 	ret void
    563 }
    564 
    565 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
    566 ; CHECK-LABEL: st4_2d
    567 ; CHECK: st4.2d
    568 	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
    569 	ret void
    570 }
    571 
    572 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
    573 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    574 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    575 
    576 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
    577 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
    578 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
    579 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
    580 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
    581 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
    582 
    583 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
    584 ; CHECK-LABEL: st1_x2_v8i8:
    585 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    586   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
    587   ret void
    588 }
    589 
    590 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
    591 ; CHECK-LABEL: st1_x2_v4i16:
    592 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    593   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
    594   ret void
    595 }
    596 
    597 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
    598 ; CHECK-LABEL: st1_x2_v2i32:
    599 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    600   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
    601   ret void
    602 }
    603 
    604 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
    605 ; CHECK-LABEL: st1_x2_v2f32:
    606 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    607   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
    608   ret void
    609 }
    610 
    611 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
    612 ; CHECK-LABEL: st1_x2_v1i64:
    613 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    614   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
    615   ret void
    616 }
    617 
    618 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
    619 ; CHECK-LABEL: st1_x2_v1f64:
    620 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    621   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
    622   ret void
    623 }
    624 
    625 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
    626 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
    627 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
    628 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
    629 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
    630 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
    631 
    632 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
    633 ; CHECK-LABEL: st1_x2_v16i8:
    634 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    635   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
    636   ret void
    637 }
    638 
    639 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
    640 ; CHECK-LABEL: st1_x2_v8i16:
    641 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    642   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
    643   ret void
    644 }
    645 
    646 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
    647 ; CHECK-LABEL: st1_x2_v4i32:
    648 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    649   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
    650   ret void
    651 }
    652 
    653 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
    654 ; CHECK-LABEL: st1_x2_v4f32:
    655 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    656   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
    657   ret void
    658 }
    659 
    660 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
    661 ; CHECK-LABEL: st1_x2_v2i64:
    662 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    663   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
    664   ret void
    665 }
    666 
    667 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
    668 ; CHECK-LABEL: st1_x2_v2f64:
    669 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    670   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
    671   ret void
    672 }
    673 
    674 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    675 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    676 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    677 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
    678 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    679 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
    680 
    681 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
    682 ; CHECK-LABEL: st1_x3_v8i8:
    683 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    684   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
    685   ret void
    686 }
    687 
    688 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
    689 ; CHECK-LABEL: st1_x3_v4i16:
    690 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    691   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
    692   ret void
    693 }
    694 
    695 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
    696 ; CHECK-LABEL: st1_x3_v2i32:
    697 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    698   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
    699   ret void
    700 }
    701 
    702 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
    703 ; CHECK-LABEL: st1_x3_v2f32:
    704 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    705   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
    706   ret void
    707 }
    708 
    709 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
    710 ; CHECK-LABEL: st1_x3_v1i64:
    711 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    712   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
    713   ret void
    714 }
    715 
    716 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
    717 ; CHECK-LABEL: st1_x3_v1f64:
    718 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    719   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
    720   ret void
    721 }
    722 
    723 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    724 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    725 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    726 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
    727 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    728 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
    729 
    730 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
    731 ; CHECK-LABEL: st1_x3_v16i8:
    732 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    733   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
    734   ret void
    735 }
    736 
    737 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
    738 ; CHECK-LABEL: st1_x3_v8i16:
    739 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    740   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
    741   ret void
    742 }
    743 
    744 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
    745 ; CHECK-LABEL: st1_x3_v4i32:
    746 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    747   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
    748   ret void
    749 }
    750 
    751 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
    752 ; CHECK-LABEL: st1_x3_v4f32:
    753 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    754   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
    755   ret void
    756 }
    757 
    758 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
    759 ; CHECK-LABEL: st1_x3_v2i64:
    760 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    761   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
    762   ret void
    763 }
    764 
    765 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
    766 ; CHECK-LABEL: st1_x3_v2f64:
    767 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    768   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
    769   ret void
    770 }
    771 
    772 
    773 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    774 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    775 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    776 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
    777 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    778 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
    779 
    780 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
    781 ; CHECK-LABEL: st1_x4_v8i8:
    782 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    783   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
    784   ret void
    785 }
    786 
    787 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
    788 ; CHECK-LABEL: st1_x4_v4i16:
    789 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    790   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
    791   ret void
    792 }
    793 
    794 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
    795 ; CHECK-LABEL: st1_x4_v2i32:
    796 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    797   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
    798   ret void
    799 }
    800 
    801 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
    802 ; CHECK-LABEL: st1_x4_v2f32:
    803 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    804   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
    805   ret void
    806 }
    807 
    808 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
    809 ; CHECK-LABEL: st1_x4_v1i64:
    810 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    811   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
    812   ret void
    813 }
    814 
    815 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
    816 ; CHECK-LABEL: st1_x4_v1f64:
    817 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    818   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
    819   ret void
    820 }
    821 
    822 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    823 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    824 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    825 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
    826 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    827 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
    828 
    829 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
    830 ; CHECK-LABEL: st1_x4_v16i8:
    831 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    832   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
    833   ret void
    834 }
    835 
    836 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
    837 ; CHECK-LABEL: st1_x4_v8i16:
    838 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    839   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
    840   ret void
    841 }
    842 
    843 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
    844 ; CHECK-LABEL: st1_x4_v4i32:
    845 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    846   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
    847   ret void
    848 }
    849 
    850 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
    851 ; CHECK-LABEL: st1_x4_v4f32:
    852 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    853   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
    854   ret void
    855 }
    856 
    857 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
    858 ; CHECK-LABEL: st1_x4_v2i64:
    859 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    860   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
    861   ret void
    862 }
    863 
    864 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
    865 ; CHECK-LABEL: st1_x4_v2f64:
    866 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    867   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
    868   ret void
    869 }
    870