Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
      2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s
      3 ; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check.
      4 
      5 define void @st1lane_16b(<16 x i8> %A, i8* %D) {
      6 ; CHECK-LABEL: st1lane_16b
      7 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
      8   %ptr = getelementptr i8, i8* %D, i64 1
      9   %tmp = extractelement <16 x i8> %A, i32 1
     10   store i8 %tmp, i8* %ptr
     11   ret void
     12 }
     13 
     14 define void @st1lane0_16b(<16 x i8> %A, i8* %D) {
     15 ; CHECK-LABEL: st1lane0_16b
     16 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
     17   %ptr = getelementptr i8, i8* %D, i64 1
     18   %tmp = extractelement <16 x i8> %A, i32 0
     19   store i8 %tmp, i8* %ptr
     20   ret void
     21 }
     22 
     23 define void @st1lane0u_16b(<16 x i8> %A, i8* %D) {
     24 ; CHECK-LABEL: st1lane0u_16b
     25 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
     26   %ptr = getelementptr i8, i8* %D, i64 -1
     27   %tmp = extractelement <16 x i8> %A, i32 0
     28   store i8 %tmp, i8* %ptr
     29   ret void
     30 }
     31 
     32 define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
     33 ; CHECK-LABEL: st1lane_ro_16b
     34 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     35 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
     36   %ptr = getelementptr i8, i8* %D, i64 %offset
     37   %tmp = extractelement <16 x i8> %A, i32 1
     38   store i8 %tmp, i8* %ptr
     39   ret void
     40 }
     41 
     42 define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
     43 ; CHECK-LABEL: st1lane0_ro_16b
     44 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     45 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
     46   %ptr = getelementptr i8, i8* %D, i64 %offset
     47   %tmp = extractelement <16 x i8> %A, i32 0
     48   store i8 %tmp, i8* %ptr
     49   ret void
     50 }
     51 
     52 define void @st1lane_8h(<8 x i16> %A, i16* %D) {
     53 ; CHECK-LABEL: st1lane_8h
     54 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
     55   %ptr = getelementptr i16, i16* %D, i64 1
     56   %tmp = extractelement <8 x i16> %A, i32 1
     57   store i16 %tmp, i16* %ptr
     58   ret void
     59 }
     60 
     61 define void @st1lane0_8h(<8 x i16> %A, i16* %D) {
     62 ; CHECK-LABEL: st1lane0_8h
     63 ; CHECK: str h0, [x0, #2]
     64   %ptr = getelementptr i16, i16* %D, i64 1
     65   %tmp = extractelement <8 x i16> %A, i32 0
     66   store i16 %tmp, i16* %ptr
     67   ret void
     68 }
     69 
     70 define void @st1lane0u_8h(<8 x i16> %A, i16* %D) {
     71 ; CHECK-LABEL: st1lane0u_8h
     72 ; CHECK: stur h0, [x0, #-2]
     73   %ptr = getelementptr i16, i16* %D, i64 -1
     74   %tmp = extractelement <8 x i16> %A, i32 0
     75   store i16 %tmp, i16* %ptr
     76   ret void
     77 }
     78 
     79 define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
     80 ; CHECK-LABEL: st1lane_ro_8h
     81 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
     82 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
     83   %ptr = getelementptr i16, i16* %D, i64 %offset
     84   %tmp = extractelement <8 x i16> %A, i32 1
     85   store i16 %tmp, i16* %ptr
     86   ret void
     87 }
     88 
     89 define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
     90 ; CHECK-LABEL: st1lane0_ro_8h
     91 ; CHECK: str h0, [x0, x1, lsl #1]
     92   %ptr = getelementptr i16, i16* %D, i64 %offset
     93   %tmp = extractelement <8 x i16> %A, i32 0
     94   store i16 %tmp, i16* %ptr
     95   ret void
     96 }
     97 
     98 define void @st1lane_4s(<4 x i32> %A, i32* %D) {
     99 ; CHECK-LABEL: st1lane_4s
    100 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
    101   %ptr = getelementptr i32, i32* %D, i64 1
    102   %tmp = extractelement <4 x i32> %A, i32 1
    103   store i32 %tmp, i32* %ptr
    104   ret void
    105 }
    106 
    107 define void @st1lane0_4s(<4 x i32> %A, i32* %D) {
    108 ; CHECK-LABEL: st1lane0_4s
    109 ; CHECK: str s0, [x0, #4]
    110   %ptr = getelementptr i32, i32* %D, i64 1
    111   %tmp = extractelement <4 x i32> %A, i32 0
    112   store i32 %tmp, i32* %ptr
    113   ret void
    114 }
    115 
    116 define void @st1lane0u_4s(<4 x i32> %A, i32* %D) {
    117 ; CHECK-LABEL: st1lane0u_4s
    118 ; CHECK: stur s0, [x0, #-4]
    119   %ptr = getelementptr i32, i32* %D, i64 -1
    120   %tmp = extractelement <4 x i32> %A, i32 0
    121   store i32 %tmp, i32* %ptr
    122   ret void
    123 }
    124 
    125 define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
    126 ; CHECK-LABEL: st1lane_ro_4s
    127 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    128 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    129   %ptr = getelementptr i32, i32* %D, i64 %offset
    130   %tmp = extractelement <4 x i32> %A, i32 1
    131   store i32 %tmp, i32* %ptr
    132   ret void
    133 }
    134 
    135 define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
    136 ; CHECK-LABEL: st1lane0_ro_4s
    137 ; CHECK: str s0, [x0, x1, lsl #2]
    138   %ptr = getelementptr i32, i32* %D, i64 %offset
    139   %tmp = extractelement <4 x i32> %A, i32 0
    140   store i32 %tmp, i32* %ptr
    141   ret void
    142 }
    143 
    144 define void @st1lane_4s_float(<4 x float> %A, float* %D) {
    145 ; CHECK-LABEL: st1lane_4s_float
    146 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
    147   %ptr = getelementptr float, float* %D, i64 1
    148   %tmp = extractelement <4 x float> %A, i32 1
    149   store float %tmp, float* %ptr
    150   ret void
    151 }
    152 
    153 define void @st1lane0_4s_float(<4 x float> %A, float* %D) {
    154 ; CHECK-LABEL: st1lane0_4s_float
    155 ; CHECK: str s0, [x0, #4]
    156   %ptr = getelementptr float, float* %D, i64 1
    157   %tmp = extractelement <4 x float> %A, i32 0
    158   store float %tmp, float* %ptr
    159   ret void
    160 }
    161 
    162 define void @st1lane0u_4s_float(<4 x float> %A, float* %D) {
    163 ; CHECK-LABEL: st1lane0u_4s_float
    164 ; CHECK: stur s0, [x0, #-4]
    165   %ptr = getelementptr float, float* %D, i64 -1
    166   %tmp = extractelement <4 x float> %A, i32 0
    167   store float %tmp, float* %ptr
    168   ret void
    169 }
    170 
    171 define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
    172 ; CHECK-LABEL: st1lane_ro_4s_float
    173 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    174 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    175   %ptr = getelementptr float, float* %D, i64 %offset
    176   %tmp = extractelement <4 x float> %A, i32 1
    177   store float %tmp, float* %ptr
    178   ret void
    179 }
    180 
    181 define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
    182 ; CHECK-LABEL: st1lane0_ro_4s_float
    183 ; CHECK: str s0, [x0, x1, lsl #2]
    184   %ptr = getelementptr float, float* %D, i64 %offset
    185   %tmp = extractelement <4 x float> %A, i32 0
    186   store float %tmp, float* %ptr
    187   ret void
    188 }
    189 
    190 define void @st1lane_2d(<2 x i64> %A, i64* %D) {
    191 ; CHECK-LABEL: st1lane_2d
    192 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
    193   %ptr = getelementptr i64, i64* %D, i64 1
    194   %tmp = extractelement <2 x i64> %A, i32 1
    195   store i64 %tmp, i64* %ptr
    196   ret void
    197 }
    198 
    199 define void @st1lane0_2d(<2 x i64> %A, i64* %D) {
    200 ; CHECK-LABEL: st1lane0_2d
    201 ; CHECK: str d0, [x0, #8]
    202   %ptr = getelementptr i64, i64* %D, i64 1
    203   %tmp = extractelement <2 x i64> %A, i32 0
    204   store i64 %tmp, i64* %ptr
    205   ret void
    206 }
    207 
    208 define void @st1lane0u_2d(<2 x i64> %A, i64* %D) {
    209 ; CHECK-LABEL: st1lane0u_2d
    210 ; CHECK: stur d0, [x0, #-8]
    211   %ptr = getelementptr i64, i64* %D, i64 -1
    212   %tmp = extractelement <2 x i64> %A, i32 0
    213   store i64 %tmp, i64* %ptr
    214   ret void
    215 }
    216 
    217 define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
    218 ; CHECK-LABEL: st1lane_ro_2d
    219 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    220 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
    221   %ptr = getelementptr i64, i64* %D, i64 %offset
    222   %tmp = extractelement <2 x i64> %A, i32 1
    223   store i64 %tmp, i64* %ptr
    224   ret void
    225 }
    226 
    227 define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
    228 ; CHECK-LABEL: st1lane0_ro_2d
    229 ; CHECK: str d0, [x0, x1, lsl #3]
    230   %ptr = getelementptr i64, i64* %D, i64 %offset
    231   %tmp = extractelement <2 x i64> %A, i32 0
    232   store i64 %tmp, i64* %ptr
    233   ret void
    234 }
    235 
    236 define void @st1lane_2d_double(<2 x double> %A, double* %D) {
    237 ; CHECK-LABEL: st1lane_2d_double
    238 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
    239   %ptr = getelementptr double, double* %D, i64 1
    240   %tmp = extractelement <2 x double> %A, i32 1
    241   store double %tmp, double* %ptr
    242   ret void
    243 }
    244 
    245 define void @st1lane0_2d_double(<2 x double> %A, double* %D) {
    246 ; CHECK-LABEL: st1lane0_2d_double
    247 ; CHECK: str d0, [x0, #8]
    248   %ptr = getelementptr double, double* %D, i64 1
    249   %tmp = extractelement <2 x double> %A, i32 0
    250   store double %tmp, double* %ptr
    251   ret void
    252 }
    253 
    254 define void @st1lane0u_2d_double(<2 x double> %A, double* %D) {
    255 ; CHECK-LABEL: st1lane0u_2d_double
    256 ; CHECK: stur d0, [x0, #-8]
    257   %ptr = getelementptr double, double* %D, i64 -1
    258   %tmp = extractelement <2 x double> %A, i32 0
    259   store double %tmp, double* %ptr
    260   ret void
    261 }
    262 
    263 define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
    264 ; CHECK-LABEL: st1lane_ro_2d_double
    265 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    266 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
    267   %ptr = getelementptr double, double* %D, i64 %offset
    268   %tmp = extractelement <2 x double> %A, i32 1
    269   store double %tmp, double* %ptr
    270   ret void
    271 }
    272 
    273 define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
    274 ; CHECK-LABEL: st1lane0_ro_2d_double
    275 ; CHECK: str d0, [x0, x1, lsl #3]
    276   %ptr = getelementptr double, double* %D, i64 %offset
    277   %tmp = extractelement <2 x double> %A, i32 0
    278   store double %tmp, double* %ptr
    279   ret void
    280 }
    281 
    282 define void @st1lane_8b(<8 x i8> %A, i8* %D) {
    283 ; CHECK-LABEL: st1lane_8b
    284 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
    285   %ptr = getelementptr i8, i8* %D, i64 1
    286   %tmp = extractelement <8 x i8> %A, i32 1
    287   store i8 %tmp, i8* %ptr
    288   ret void
    289 }
    290 
    291 define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
    292 ; CHECK-LABEL: st1lane_ro_8b
    293 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    294 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
    295   %ptr = getelementptr i8, i8* %D, i64 %offset
    296   %tmp = extractelement <8 x i8> %A, i32 1
    297   store i8 %tmp, i8* %ptr
    298   ret void
    299 }
    300 
    301 define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
    302 ; CHECK-LABEL: st1lane0_ro_8b
    303 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    304 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
    305   %ptr = getelementptr i8, i8* %D, i64 %offset
    306   %tmp = extractelement <8 x i8> %A, i32 0
    307   store i8 %tmp, i8* %ptr
    308   ret void
    309 }
    310 
    311 define void @st1lane_4h(<4 x i16> %A, i16* %D) {
    312 ; CHECK-LABEL: st1lane_4h
    313 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
    314   %ptr = getelementptr i16, i16* %D, i64 1
    315   %tmp = extractelement <4 x i16> %A, i32 1
    316   store i16 %tmp, i16* %ptr
    317   ret void
    318 }
    319 
    320 define void @st1lane0_4h(<4 x i16> %A, i16* %D) {
    321 ; CHECK-LABEL: st1lane0_4h
    322 ; CHECK: str h0, [x0, #2]
    323   %ptr = getelementptr i16, i16* %D, i64 1
    324   %tmp = extractelement <4 x i16> %A, i32 0
    325   store i16 %tmp, i16* %ptr
    326   ret void
    327 }
    328 
    329 define void @st1lane0u_4h(<4 x i16> %A, i16* %D) {
    330 ; CHECK-LABEL: st1lane0u_4h
    331 ; CHECK: stur h0, [x0, #-2]
    332   %ptr = getelementptr i16, i16* %D, i64 -1
    333   %tmp = extractelement <4 x i16> %A, i32 0
    334   store i16 %tmp, i16* %ptr
    335   ret void
    336 }
    337 
    338 define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
    339 ; CHECK-LABEL: st1lane_ro_4h
    340 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    341 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
    342   %ptr = getelementptr i16, i16* %D, i64 %offset
    343   %tmp = extractelement <4 x i16> %A, i32 1
    344   store i16 %tmp, i16* %ptr
    345   ret void
    346 }
    347 
    348 define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
    349 ; CHECK-LABEL: st1lane0_ro_4h
    350 ; CHECK: str h0, [x0, x1, lsl #1]
    351   %ptr = getelementptr i16, i16* %D, i64 %offset
    352   %tmp = extractelement <4 x i16> %A, i32 0
    353   store i16 %tmp, i16* %ptr
    354   ret void
    355 }
    356 
    357 define void @st1lane_2s(<2 x i32> %A, i32* %D) {
    358 ; CHECK-LABEL: st1lane_2s
    359 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
    360   %ptr = getelementptr i32, i32* %D, i64 1
    361   %tmp = extractelement <2 x i32> %A, i32 1
    362   store i32 %tmp, i32* %ptr
    363   ret void
    364 }
    365 
    366 define void @st1lane0_2s(<2 x i32> %A, i32* %D) {
    367 ; CHECK-LABEL: st1lane0_2s
    368 ; CHECK: str s0, [x0, #4]
    369   %ptr = getelementptr i32, i32* %D, i64 1
    370   %tmp = extractelement <2 x i32> %A, i32 0
    371   store i32 %tmp, i32* %ptr
    372   ret void
    373 }
    374 
    375 define void @st1lane0u_2s(<2 x i32> %A, i32* %D) {
    376 ; CHECK-LABEL: st1lane0u_2s
    377 ; CHECK: stur s0, [x0, #-4]
    378   %ptr = getelementptr i32, i32* %D, i64 -1
    379   %tmp = extractelement <2 x i32> %A, i32 0
    380   store i32 %tmp, i32* %ptr
    381   ret void
    382 }
    383 
    384 define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
    385 ; CHECK-LABEL: st1lane_ro_2s
    386 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    387 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    388   %ptr = getelementptr i32, i32* %D, i64 %offset
    389   %tmp = extractelement <2 x i32> %A, i32 1
    390   store i32 %tmp, i32* %ptr
    391   ret void
    392 }
    393 
    394 define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
    395 ; CHECK-LABEL: st1lane0_ro_2s
    396 ; CHECK: str s0, [x0, x1, lsl #2]
    397   %ptr = getelementptr i32, i32* %D, i64 %offset
    398   %tmp = extractelement <2 x i32> %A, i32 0
    399   store i32 %tmp, i32* %ptr
    400   ret void
    401 }
    402 
    403 define void @st1lane_2s_float(<2 x float> %A, float* %D) {
    404 ; CHECK-LABEL: st1lane_2s_float
    405 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
    406   %ptr = getelementptr float, float* %D, i64 1
    407   %tmp = extractelement <2 x float> %A, i32 1
    408   store float %tmp, float* %ptr
    409   ret void
    410 }
    411 
    412 define void @st1lane0_2s_float(<2 x float> %A, float* %D) {
    413 ; CHECK-LABEL: st1lane0_2s_float
    414 ; CHECK: str s0, [x0, #4]
    415   %ptr = getelementptr float, float* %D, i64 1
    416   %tmp = extractelement <2 x float> %A, i32 0
    417   store float %tmp, float* %ptr
    418   ret void
    419 }
    420 
    421 define void @st1lane0u_2s_float(<2 x float> %A, float* %D) {
    422 ; CHECK-LABEL: st1lane0u_2s_float
    423 ; CHECK: stur s0, [x0, #-4]
    424   %ptr = getelementptr float, float* %D, i64 -1
    425   %tmp = extractelement <2 x float> %A, i32 0
    426   store float %tmp, float* %ptr
    427   ret void
    428 }
    429 
    430 define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
    431 ; CHECK-LABEL: st1lane_ro_2s_float
    432 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
    433 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
    434   %ptr = getelementptr float, float* %D, i64 %offset
    435   %tmp = extractelement <2 x float> %A, i32 1
    436   store float %tmp, float* %ptr
    437   ret void
    438 }
    439 
    440 define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
    441 ; CHECK-LABEL: st1lane0_ro_2s_float
    442 ; CHECK: str s0, [x0, x1, lsl #2]
    443   %ptr = getelementptr float, float* %D, i64 %offset
    444   %tmp = extractelement <2 x float> %A, i32 0
    445   store float %tmp, float* %ptr
    446   ret void
    447 }
    448 
    449 define void @st1lane0_1d(<1 x i64> %A, i64* %D) {
    450 ; CHECK-LABEL: st1lane0_1d
    451 ; CHECK: str d0, [x0, #8]
    452   %ptr = getelementptr i64, i64* %D, i64 1
    453   %tmp = extractelement <1 x i64> %A, i32 0
    454   store i64 %tmp, i64* %ptr
    455   ret void
    456 }
    457 
    458 define void @st1lane0u_1d(<1 x i64> %A, i64* %D) {
    459 ; CHECK-LABEL: st1lane0u_1d
    460 ; CHECK: stur d0, [x0, #-8]
    461   %ptr = getelementptr i64, i64* %D, i64 -1
    462   %tmp = extractelement <1 x i64> %A, i32 0
    463   store i64 %tmp, i64* %ptr
    464   ret void
    465 }
    466 
    467 define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) {
    468 ; CHECK-LABEL: st1lane0_ro_1d
    469 ; CHECK: str d0, [x0, x1, lsl #3]
    470   %ptr = getelementptr i64, i64* %D, i64 %offset
    471   %tmp = extractelement <1 x i64> %A, i32 0
    472   store i64 %tmp, i64* %ptr
    473   ret void
    474 }
    475 
    476 define void @st1lane0_1d_double(<1 x double> %A, double* %D) {
    477 ; CHECK-LABEL: st1lane0_1d_double
    478 ; CHECK: str d0, [x0, #8]
    479   %ptr = getelementptr double, double* %D, i64 1
    480   %tmp = extractelement <1 x double> %A, i32 0
    481   store double %tmp, double* %ptr
    482   ret void
    483 }
    484 
    485 define void @st1lane0u_1d_double(<1 x double> %A, double* %D) {
    486 ; CHECK-LABEL: st1lane0u_1d_double
    487 ; CHECK: stur d0, [x0, #-8]
    488   %ptr = getelementptr double, double* %D, i64 -1
    489   %tmp = extractelement <1 x double> %A, i32 0
    490   store double %tmp, double* %ptr
    491   ret void
    492 }
    493 
    494 define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) {
    495 ; CHECK-LABEL: st1lane0_ro_1d_double
    496 ; CHECK: str d0, [x0, x1, lsl #3]
    497   %ptr = getelementptr double, double* %D, i64 %offset
    498   %tmp = extractelement <1 x double> %A, i32 0
    499   store double %tmp, double* %ptr
    500   ret void
    501 }
    502 
    503 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
    504 ; CHECK-LABEL: st2lane_16b
    505 ; CHECK: st2.b
    506   call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
    507   ret void
    508 }
    509 
    510 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
    511 ; CHECK-LABEL: st2lane_8h
    512 ; CHECK: st2.h
    513   call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
    514   ret void
    515 }
    516 
    517 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
    518 ; CHECK-LABEL: st2lane_4s
    519 ; CHECK: st2.s
    520   call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
    521   ret void
    522 }
    523 
    524 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
    525 ; CHECK-LABEL: st2lane_2d
    526 ; CHECK: st2.d
    527   call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
    528   ret void
    529 }
    530 
    531 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    532 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    533 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    534 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    535 
    536 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
    537 ; CHECK-LABEL: st3lane_16b
    538 ; CHECK: st3.b
    539   call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
    540   ret void
    541 }
    542 
    543 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
    544 ; CHECK-LABEL: st3lane_8h
    545 ; CHECK: st3.h
    546   call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
    547   ret void
    548 }
    549 
    550 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
    551 ; CHECK-LABEL: st3lane_4s
    552 ; CHECK: st3.s
    553   call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
    554   ret void
    555 }
    556 
    557 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
    558 ; CHECK-LABEL: st3lane_2d
    559 ; CHECK: st3.d
    560   call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
    561   ret void
    562 }
    563 
    564 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    565 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    566 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    567 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    568 
    569 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
    570 ; CHECK-LABEL: st4lane_16b
    571 ; CHECK: st4.b
    572   call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
    573   ret void
    574 }
    575 
    576 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
    577 ; CHECK-LABEL: st4lane_8h
    578 ; CHECK: st4.h
    579   call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
    580   ret void
    581 }
    582 
    583 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
    584 ; CHECK-LABEL: st4lane_4s
    585 ; CHECK: st4.s
    586   call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
    587   ret void
    588 }
    589 
    590 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
    591 ; CHECK-LABEL: st4lane_2d
    592 ; CHECK: st4.d
    593   call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
    594   ret void
    595 }
    596 
    597 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
    598 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
    599 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
    600 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
    601 
    602 
    603 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
    604 ; CHECK-LABEL: st2_8b
    605 ; CHECK: st2.8b
    606 ; EXYNOS-LABEL: st2_8b
    607 ; EXYNOS: zip1.8b
    608 ; EXYNOS: zip2.8b
    609 ; EXYNOS: stp
    610 	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
    611 	ret void
    612 }
    613 
    614 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
    615 ; CHECK-LABEL: st3_8b
    616 ; CHECK: st3.8b
    617 	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
    618 	ret void
    619 }
    620 
    621 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
    622 ; CHECK-LABEL: st4_8b
    623 ; CHECK: st4.8b
    624 ; EXYNOS-LABEL: st4_8b
    625 ; EXYNOS: zip1.8b
    626 ; EXYNOS: zip2.8b
    627 ; EXYNOS: zip1.8b
    628 ; EXYNOS: zip2.8b
    629 ; EXYNOS: zip1.8b
    630 ; EXYNOS: zip2.8b
    631 ; EXYNOS: stp
    632 ; EXYNOS: zip1.8b
    633 ; EXYNOS: zip2.8b
    634 ; EXYNOS: stp
    635 	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
    636 	ret void
    637 }
    638 
    639 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
    640 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    641 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
    642 
    643 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
    644 ; CHECK-LABEL: st2_16b
    645 ; CHECK: st2.16b
    646 ; EXYNOS-LABEL: st2_16b
    647 ; EXYNOS: zip1.16b
    648 ; EXYNOS: zip2.16b
    649 ; EXYNOS: stp
    650 	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
    651 	ret void
    652 }
    653 
    654 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
    655 ; CHECK-LABEL: st3_16b
    656 ; CHECK: st3.16b
    657 	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
    658 	ret void
    659 }
    660 
    661 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
    662 ; CHECK-LABEL: st4_16b
    663 ; CHECK: st4.16b
    664 ; EXYNOS-LABEL: st4_16b
    665 ; EXYNOS: zip1.16b
    666 ; EXYNOS: zip2.16b
    667 ; EXYNOS: zip1.16b
    668 ; EXYNOS: zip2.16b
    669 ; EXYNOS: zip1.16b
    670 ; EXYNOS: zip2.16b
    671 ; EXYNOS: stp
    672 ; EXYNOS: zip1.16b
    673 ; EXYNOS: zip2.16b
    674 ; EXYNOS: stp
    675 	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
    676 	ret void
    677 }
    678 
    679 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
    680 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    681 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
    682 
    683 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
    684 ; CHECK-LABEL: st2_4h
    685 ; CHECK: st2.4h
    686 ; EXYNOS-LABEL: st2_4h
    687 ; EXYNOS: zip1.4h
    688 ; EXYNOS: zip2.4h
    689 ; EXYNOS: stp
    690 	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
    691 	ret void
    692 }
    693 
    694 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
    695 ; CHECK-LABEL: st3_4h
    696 ; CHECK: st3.4h
    697 	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
    698 	ret void
    699 }
    700 
    701 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
    702 ; CHECK-LABEL: st4_4h
    703 ; CHECK: st4.4h
    704 ; EXYNOS-LABEL: st4_4h
    705 ; EXYNOS: zip1.4h
    706 ; EXYNOS: zip2.4h
    707 ; EXYNOS: zip1.4h
    708 ; EXYNOS: zip2.4h
    709 ; EXYNOS: zip1.4h
    710 ; EXYNOS: zip2.4h
    711 ; EXYNOS: stp
    712 ; EXYNOS: zip1.4h
    713 ; EXYNOS: zip2.4h
    714 ; EXYNOS: stp
    715 	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
    716 	ret void
    717 }
    718 
    719 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
    720 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    721 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
    722 
    723 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
    724 ; CHECK-LABEL: st2_8h
    725 ; CHECK: st2.8h
    726 ; EXYNOS-LABEL: st2_8h
    727 ; EXYNOS: zip1.8h
    728 ; EXYNOS: zip2.8h
    729 ; EXYNOS: stp
    730 	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
    731 	ret void
    732 }
    733 
    734 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
    735 ; CHECK-LABEL: st3_8h
    736 ; CHECK: st3.8h
    737 	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
    738 	ret void
    739 }
    740 
    741 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
    742 ; CHECK-LABEL: st4_8h
    743 ; CHECK: st4.8h
    744 ; EXYNOS-LABEL: st4_8h
    745 ; EXYNOS: zip1.8h
    746 ; EXYNOS: zip2.8h
    747 ; EXYNOS: zip1.8h
    748 ; EXYNOS: zip2.8h
    749 ; EXYNOS: zip1.8h
    750 ; EXYNOS: zip2.8h
    751 ; EXYNOS: stp
    752 ; EXYNOS: zip1.8h
    753 ; EXYNOS: zip2.8h
    754 ; EXYNOS: stp
    755 	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
    756 	ret void
    757 }
    758 
    759 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
    760 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    761 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
    762 
    763 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
    764 ; CHECK-LABEL: st2_2s
    765 ; CHECK: st2.2s
    766 ; EXYNOS-LABEL: st2_2s
    767 ; EXYNOS: zip1.2s
    768 ; EXYNOS: zip2.2s
    769 ; EXYNOS: stp
    770 	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
    771 	ret void
    772 }
    773 
    774 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
    775 ; CHECK-LABEL: st3_2s
    776 ; CHECK: st3.2s
    777 	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
    778 	ret void
    779 }
    780 
    781 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
    782 ; CHECK-LABEL: st4_2s
    783 ; CHECK: st4.2s
    784 ; EXYNOS-LABEL: st4_2s
    785 ; EXYNOS: zip1.2s
    786 ; EXYNOS: zip2.2s
    787 ; EXYNOS: zip1.2s
    788 ; EXYNOS: zip2.2s
    789 ; EXYNOS: zip1.2s
    790 ; EXYNOS: zip2.2s
    791 ; EXYNOS: stp
    792 ; EXYNOS: zip1.2s
    793 ; EXYNOS: zip2.2s
    794 ; EXYNOS: stp
    795 	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
    796 	ret void
    797 }
    798 
    799 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
    800 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    801 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
    802 
    803 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
    804 ; CHECK-LABEL: st2_4s
    805 ; CHECK: st2.4s
    806 ; EXYNOS-LABEL: st2_4s
    807 ; EXYNOS: zip1.4s
    808 ; EXYNOS: zip2.4s
    809 ; EXYNOS: stp
    810 	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
    811 	ret void
    812 }
    813 
    814 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
    815 ; CHECK-LABEL: st3_4s
    816 ; CHECK: st3.4s
    817 	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
    818 	ret void
    819 }
    820 
    821 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
    822 ; CHECK-LABEL: st4_4s
    823 ; CHECK: st4.4s
    824 ; EXYNOS-LABEL: st4_4s
    825 ; EXYNOS: zip1.4s
    826 ; EXYNOS: zip2.4s
    827 ; EXYNOS: zip1.4s
    828 ; EXYNOS: zip2.4s
    829 ; EXYNOS: zip1.4s
    830 ; EXYNOS: zip2.4s
    831 ; EXYNOS: stp
    832 ; EXYNOS: zip1.4s
    833 ; EXYNOS: zip2.4s
    834 ; EXYNOS: stp
    835 	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
    836 	ret void
    837 }
    838 
    839 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
    840 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    841 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
    842 
    843 ; If there's only one element, st2/3/4 don't make much sense, stick to st1.
    844 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
    845 ; CHECK-LABEL: st2_1d
    846 ; CHECK: st1.1d
    847 	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
    848 	ret void
    849 }
    850 
    851 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
    852 ; CHECK-LABEL: st3_1d
    853 ; CHECK: st1.1d
    854 	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
    855 	ret void
    856 }
    857 
    858 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
    859 ; CHECK-LABEL: st4_1d
    860 ; CHECK: st1.1d
    861 	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
    862 	ret void
    863 }
    864 
    865 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
    866 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    867 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
    868 
    869 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
    870 ; CHECK-LABEL: st2_2d
    871 ; CHECK: st2.2d
    872 ; EXYNOS-LABEL: st2_2d
    873 ; EXYNOS: zip1.2d
    874 ; EXYNOS: zip2.2d
    875 ; EXYNOS: stp
    876 	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
    877 	ret void
    878 }
    879 
    880 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
    881 ; CHECK-LABEL: st3_2d
    882 ; CHECK: st3.2d
    883 	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
    884 	ret void
    885 }
    886 
    887 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
    888 ; CHECK-LABEL: st4_2d
    889 ; CHECK: st4.2d
    890 ; EXYNOS-LABEL: st4_2d
    891 ; EXYNOS: zip1.2d
    892 ; EXYNOS: zip2.2d
    893 ; EXYNOS: zip1.2d
    894 ; EXYNOS: zip2.2d
    895 ; EXYNOS: zip1.2d
    896 ; EXYNOS: zip2.2d
    897 ; EXYNOS: stp
    898 ; EXYNOS: zip1.2d
    899 ; EXYNOS: zip2.2d
    900 ; EXYNOS: stp
    901 	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
    902 	ret void
    903 }
    904 
    905 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
    906 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    907 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
    908 
    909 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
    910 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
    911 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
    912 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
    913 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
    914 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
    915 
    916 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
    917 ; CHECK-LABEL: st1_x2_v8i8:
    918 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    919   call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
    920   ret void
    921 }
    922 
    923 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
    924 ; CHECK-LABEL: st1_x2_v4i16:
    925 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    926   call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
    927   ret void
    928 }
    929 
    930 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
    931 ; CHECK-LABEL: st1_x2_v2i32:
    932 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    933   call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
    934   ret void
    935 }
    936 
    937 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
    938 ; CHECK-LABEL: st1_x2_v2f32:
    939 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    940   call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
    941   ret void
    942 }
    943 
    944 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
    945 ; CHECK-LABEL: st1_x2_v1i64:
    946 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    947   call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
    948   ret void
    949 }
    950 
    951 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
    952 ; CHECK-LABEL: st1_x2_v1f64:
    953 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    954   call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
    955   ret void
    956 }
    957 
    958 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
    959 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
    960 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
    961 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
    962 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
    963 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
    964 
    965 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
    966 ; CHECK-LABEL: st1_x2_v16i8:
    967 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    968   call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
    969   ret void
    970 }
    971 
    972 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
    973 ; CHECK-LABEL: st1_x2_v8i16:
    974 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    975   call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
    976   ret void
    977 }
    978 
    979 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
    980 ; CHECK-LABEL: st1_x2_v4i32:
    981 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    982   call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
    983   ret void
    984 }
    985 
    986 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
    987 ; CHECK-LABEL: st1_x2_v4f32:
    988 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    989   call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
    990   ret void
    991 }
    992 
    993 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
    994 ; CHECK-LABEL: st1_x2_v2i64:
    995 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
    996   call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
    997   ret void
    998 }
    999 
   1000 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
   1001 ; CHECK-LABEL: st1_x2_v2f64:
   1002 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1003   call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
   1004   ret void
   1005 }
   1006 
   1007 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
   1008 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
   1009 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
   1010 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
   1011 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
   1012 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
   1013 
   1014 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
   1015 ; CHECK-LABEL: st1_x3_v8i8:
   1016 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1017   call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
   1018   ret void
   1019 }
   1020 
   1021 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
   1022 ; CHECK-LABEL: st1_x3_v4i16:
   1023 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1024   call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
   1025   ret void
   1026 }
   1027 
   1028 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
   1029 ; CHECK-LABEL: st1_x3_v2i32:
   1030 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1031   call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
   1032   ret void
   1033 }
   1034 
   1035 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
   1036 ; CHECK-LABEL: st1_x3_v2f32:
   1037 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1038   call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
   1039   ret void
   1040 }
   1041 
   1042 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
   1043 ; CHECK-LABEL: st1_x3_v1i64:
   1044 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1045   call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
   1046   ret void
   1047 }
   1048 
   1049 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
   1050 ; CHECK-LABEL: st1_x3_v1f64:
   1051 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1052   call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
   1053   ret void
   1054 }
   1055 
   1056 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
   1057 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
   1058 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
   1059 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
   1060 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
   1061 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
   1062 
   1063 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
   1064 ; CHECK-LABEL: st1_x3_v16i8:
   1065 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1066   call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
   1067   ret void
   1068 }
   1069 
   1070 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
   1071 ; CHECK-LABEL: st1_x3_v8i16:
   1072 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1073   call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
   1074   ret void
   1075 }
   1076 
   1077 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
   1078 ; CHECK-LABEL: st1_x3_v4i32:
   1079 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1080   call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
   1081   ret void
   1082 }
   1083 
   1084 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
   1085 ; CHECK-LABEL: st1_x3_v4f32:
   1086 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1087   call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
   1088   ret void
   1089 }
   1090 
   1091 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
   1092 ; CHECK-LABEL: st1_x3_v2i64:
   1093 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1094   call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
   1095   ret void
   1096 }
   1097 
   1098 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
   1099 ; CHECK-LABEL: st1_x3_v2f64:
   1100 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1101   call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
   1102   ret void
   1103 }
   1104 
   1105 
   1106 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
   1107 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
   1108 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
   1109 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
   1110 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
   1111 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
   1112 
   1113 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
   1114 ; CHECK-LABEL: st1_x4_v8i8:
   1115 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1116   call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
   1117   ret void
   1118 }
   1119 
   1120 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
   1121 ; CHECK-LABEL: st1_x4_v4i16:
   1122 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1123   call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
   1124   ret void
   1125 }
   1126 
   1127 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
   1128 ; CHECK-LABEL: st1_x4_v2i32:
   1129 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1130   call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
   1131   ret void
   1132 }
   1133 
   1134 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
   1135 ; CHECK-LABEL: st1_x4_v2f32:
   1136 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1137   call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
   1138   ret void
   1139 }
   1140 
   1141 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
   1142 ; CHECK-LABEL: st1_x4_v1i64:
   1143 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1144   call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
   1145   ret void
   1146 }
   1147 
   1148 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
   1149 ; CHECK-LABEL: st1_x4_v1f64:
   1150 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1151   call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
   1152   ret void
   1153 }
   1154 
   1155 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
   1156 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
   1157 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
   1158 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
   1159 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
   1160 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
   1161 
   1162 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
   1163 ; CHECK-LABEL: st1_x4_v16i8:
   1164 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1165   call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
   1166   ret void
   1167 }
   1168 
   1169 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
   1170 ; CHECK-LABEL: st1_x4_v8i16:
   1171 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1172   call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
   1173   ret void
   1174 }
   1175 
   1176 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
   1177 ; CHECK-LABEL: st1_x4_v4i32:
   1178 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1179   call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
   1180   ret void
   1181 }
   1182 
   1183 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
   1184 ; CHECK-LABEL: st1_x4_v4f32:
   1185 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1186   call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
   1187   ret void
   1188 }
   1189 
   1190 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
   1191 ; CHECK-LABEL: st1_x4_v2i64:
   1192 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1193   call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
   1194   ret void
   1195 }
   1196 
   1197 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
   1198 ; CHECK-LABEL: st1_x4_v2f64:
   1199 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
   1200   call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
   1201   ret void
   1202 }
   1203