1 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2 3 define void @st1lane_16b(<16 x i8> %A, i8* %D) { 4 ; CHECK-LABEL: st1lane_16b 5 ; CHECK: st1.b 6 %tmp = extractelement <16 x i8> %A, i32 1 7 store i8 %tmp, i8* %D 8 ret void 9 } 10 11 define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 12 ; CHECK-LABEL: st1lane_ro_16b 13 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 14 ; CHECK: st1.b { v0 }[1], [x[[XREG]]] 15 %ptr = getelementptr i8, i8* %D, i64 %offset 16 %tmp = extractelement <16 x i8> %A, i32 1 17 store i8 %tmp, i8* %ptr 18 ret void 19 } 20 21 define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) { 22 ; CHECK-LABEL: st1lane0_ro_16b 23 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 24 ; CHECK: st1.b { v0 }[0], [x[[XREG]]] 25 %ptr = getelementptr i8, i8* %D, i64 %offset 26 %tmp = extractelement <16 x i8> %A, i32 0 27 store i8 %tmp, i8* %ptr 28 ret void 29 } 30 31 define void @st1lane_8h(<8 x i16> %A, i16* %D) { 32 ; CHECK-LABEL: st1lane_8h 33 ; CHECK: st1.h 34 %tmp = extractelement <8 x i16> %A, i32 1 35 store i16 %tmp, i16* %D 36 ret void 37 } 38 39 define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 40 ; CHECK-LABEL: st1lane_ro_8h 41 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 42 ; CHECK: st1.h { v0 }[1], [x[[XREG]]] 43 %ptr = getelementptr i16, i16* %D, i64 %offset 44 %tmp = extractelement <8 x i16> %A, i32 1 45 store i16 %tmp, i16* %ptr 46 ret void 47 } 48 49 define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) { 50 ; CHECK-LABEL: st1lane0_ro_8h 51 ; CHECK: str h0, [x0, x1, lsl #1] 52 %ptr = getelementptr i16, i16* %D, i64 %offset 53 %tmp = extractelement <8 x i16> %A, i32 0 54 store i16 %tmp, i16* %ptr 55 ret void 56 } 57 58 define void @st1lane_4s(<4 x i32> %A, i32* %D) { 59 ; CHECK-LABEL: st1lane_4s 60 ; CHECK: st1.s 61 %tmp = extractelement <4 x i32> %A, i32 1 62 store i32 %tmp, i32* %D 63 ret void 64 } 65 66 define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 67 ; CHECK-LABEL: st1lane_ro_4s 68 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 69 ; CHECK: st1.s { v0 }[1], [x[[XREG]]] 70 %ptr = getelementptr i32, i32* %D, i64 %offset 71 %tmp = extractelement <4 x i32> %A, i32 1 72 store i32 %tmp, i32* %ptr 73 ret void 74 } 75 76 define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) { 77 ; CHECK-LABEL: st1lane0_ro_4s 78 ; CHECK: str s0, [x0, x1, lsl #2] 79 %ptr = getelementptr i32, i32* %D, i64 %offset 80 %tmp = extractelement <4 x i32> %A, i32 0 81 store i32 %tmp, i32* %ptr 82 ret void 83 } 84 85 define void @st1lane_4s_float(<4 x float> %A, float* %D) { 86 ; CHECK-LABEL: st1lane_4s_float 87 ; CHECK: st1.s 88 %tmp = extractelement <4 x float> %A, i32 1 89 store float %tmp, float* %D 90 ret void 91 } 92 93 define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 94 ; CHECK-LABEL: st1lane_ro_4s_float 95 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 96 ; CHECK: st1.s { v0 }[1], [x[[XREG]]] 97 %ptr = getelementptr float, float* %D, i64 %offset 98 %tmp = extractelement <4 x float> %A, i32 1 99 store float %tmp, float* %ptr 100 ret void 101 } 102 103 define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) { 104 ; CHECK-LABEL: st1lane0_ro_4s_float 105 ; CHECK: str s0, [x0, x1, lsl #2] 106 %ptr = getelementptr float, float* %D, i64 %offset 107 %tmp = extractelement <4 x float> %A, i32 0 108 store float %tmp, float* %ptr 109 ret void 110 } 111 112 define void @st1lane_2d(<2 x i64> %A, i64* %D) { 113 ; CHECK-LABEL: st1lane_2d 114 ; CHECK: st1.d 115 %tmp = extractelement <2 x i64> %A, i32 1 116 store i64 %tmp, i64* %D 117 ret void 118 } 119 120 define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 121 ; CHECK-LABEL: st1lane_ro_2d 122 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 123 ; CHECK: st1.d { v0 }[1], [x[[XREG]]] 124 %ptr = getelementptr i64, i64* %D, i64 %offset 125 %tmp = extractelement <2 x i64> %A, i32 1 126 store i64 %tmp, i64* %ptr 127 ret void 128 } 129 130 define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) { 131 ; CHECK-LABEL: st1lane0_ro_2d 132 ; CHECK: str d0, [x0, x1, lsl #3] 133 %ptr = getelementptr i64, i64* %D, i64 %offset 134 %tmp = extractelement <2 x i64> %A, i32 0 135 store i64 %tmp, i64* %ptr 136 ret void 137 } 138 139 define void @st1lane_2d_double(<2 x double> %A, double* %D) { 140 ; CHECK-LABEL: st1lane_2d_double 141 ; CHECK: st1.d 142 %tmp = extractelement <2 x double> %A, i32 1 143 store double %tmp, double* %D 144 ret void 145 } 146 147 define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 148 ; CHECK-LABEL: st1lane_ro_2d_double 149 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 150 ; CHECK: st1.d { v0 }[1], [x[[XREG]]] 151 %ptr = getelementptr double, double* %D, i64 %offset 152 %tmp = extractelement <2 x double> %A, i32 1 153 store double %tmp, double* %ptr 154 ret void 155 } 156 157 define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) { 158 ; CHECK-LABEL: st1lane0_ro_2d_double 159 ; CHECK: str d0, [x0, x1, lsl #3] 160 %ptr = getelementptr double, double* %D, i64 %offset 161 %tmp = extractelement <2 x double> %A, i32 0 162 store double %tmp, double* %ptr 163 ret void 164 } 165 166 define void @st1lane_8b(<8 x i8> %A, i8* %D) { 167 ; CHECK-LABEL: st1lane_8b 168 ; CHECK: st1.b 169 %tmp = extractelement <8 x i8> %A, i32 1 170 store i8 %tmp, i8* %D 171 ret void 172 } 173 174 define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 175 ; CHECK-LABEL: st1lane_ro_8b 176 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 177 ; CHECK: st1.b { v0 }[1], [x[[XREG]]] 178 %ptr = getelementptr i8, i8* %D, i64 %offset 179 %tmp = extractelement <8 x i8> %A, i32 1 180 store i8 %tmp, i8* %ptr 181 ret void 182 } 183 184 define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) { 185 ; CHECK-LABEL: st1lane0_ro_8b 186 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 187 ; CHECK: st1.b { v0 }[0], [x[[XREG]]] 188 %ptr = getelementptr i8, i8* %D, i64 %offset 189 %tmp = extractelement <8 x i8> %A, i32 0 190 store i8 %tmp, i8* %ptr 191 ret void 192 } 193 194 define void @st1lane_4h(<4 x i16> %A, i16* %D) { 195 ; CHECK-LABEL: st1lane_4h 196 ; CHECK: st1.h 197 %tmp = extractelement <4 x i16> %A, i32 1 198 store i16 %tmp, i16* %D 199 ret void 200 } 201 202 define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 203 ; CHECK-LABEL: st1lane_ro_4h 204 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 205 ; CHECK: st1.h { v0 }[1], [x[[XREG]]] 206 %ptr = getelementptr i16, i16* %D, i64 %offset 207 %tmp = extractelement <4 x i16> %A, i32 1 208 store i16 %tmp, i16* %ptr 209 ret void 210 } 211 212 define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) { 213 ; CHECK-LABEL: st1lane0_ro_4h 214 ; CHECK: str h0, [x0, x1, lsl #1] 215 %ptr = getelementptr i16, i16* %D, i64 %offset 216 %tmp = extractelement <4 x i16> %A, i32 0 217 store i16 %tmp, i16* %ptr 218 ret void 219 } 220 221 define void @st1lane_2s(<2 x i32> %A, i32* %D) { 222 ; CHECK-LABEL: st1lane_2s 223 ; CHECK: st1.s 224 %tmp = extractelement <2 x i32> %A, i32 1 225 store i32 %tmp, i32* %D 226 ret void 227 } 228 229 define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 230 ; CHECK-LABEL: st1lane_ro_2s 231 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 232 ; CHECK: st1.s { v0 }[1], [x[[XREG]]] 233 %ptr = getelementptr i32, i32* %D, i64 %offset 234 %tmp = extractelement <2 x i32> %A, i32 1 235 store i32 %tmp, i32* %ptr 236 ret void 237 } 238 239 define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) { 240 ; CHECK-LABEL: st1lane0_ro_2s 241 ; CHECK: str s0, [x0, x1, lsl #2] 242 %ptr = getelementptr i32, i32* %D, i64 %offset 243 %tmp = extractelement <2 x i32> %A, i32 0 244 store i32 %tmp, i32* %ptr 245 ret void 246 } 247 248 define void @st1lane_2s_float(<2 x float> %A, float* %D) { 249 ; CHECK-LABEL: st1lane_2s_float 250 ; CHECK: st1.s 251 %tmp = extractelement <2 x float> %A, i32 1 252 store float %tmp, float* %D 253 ret void 254 } 255 256 define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 257 ; CHECK-LABEL: st1lane_ro_2s_float 258 ; CHECK: add x[[XREG:[0-9]+]], x0, x1 259 ; CHECK: st1.s { v0 }[1], [x[[XREG]]] 260 %ptr = getelementptr float, float* %D, i64 %offset 261 %tmp = extractelement <2 x float> %A, i32 1 262 store float %tmp, float* %ptr 263 ret void 264 } 265 266 define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) { 267 ; CHECK-LABEL: st1lane0_ro_2s_float 268 ; CHECK: str s0, [x0, x1, lsl #2] 269 %ptr = getelementptr float, float* %D, i64 %offset 270 %tmp = extractelement <2 x float> %A, i32 0 271 store float %tmp, float* %ptr 272 ret void 273 } 274 275 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { 276 ; CHECK-LABEL: st2lane_16b 277 ; CHECK: st2.b 278 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) 279 ret void 280 } 281 282 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { 283 ; CHECK-LABEL: st2lane_8h 284 ; CHECK: st2.h 285 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) 286 ret void 287 } 288 289 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { 290 ; CHECK-LABEL: st2lane_4s 291 ; CHECK: st2.s 292 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) 293 ret void 294 } 295 296 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { 297 ; CHECK-LABEL: st2lane_2d 298 ; CHECK: st2.d 299 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) 300 ret void 301 } 302 303 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 304 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 305 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 306 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 307 308 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { 309 ; CHECK-LABEL: st3lane_16b 310 ; CHECK: st3.b 311 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) 312 ret void 313 } 314 315 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { 316 ; CHECK-LABEL: st3lane_8h 317 ; CHECK: st3.h 318 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) 319 ret void 320 } 321 322 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { 323 ; CHECK-LABEL: st3lane_4s 324 ; CHECK: st3.s 325 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) 326 ret void 327 } 328 329 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { 330 ; CHECK-LABEL: st3lane_2d 331 ; CHECK: st3.d 332 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) 333 ret void 334 } 335 336 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 337 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 338 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 339 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 340 341 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { 342 ; CHECK-LABEL: st4lane_16b 343 ; CHECK: st4.b 344 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) 345 ret void 346 } 347 348 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { 349 ; CHECK-LABEL: st4lane_8h 350 ; CHECK: st4.h 351 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) 352 ret void 353 } 354 355 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { 356 ; CHECK-LABEL: st4lane_4s 357 ; CHECK: st4.s 358 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) 359 ret void 360 } 361 362 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { 363 ; CHECK-LABEL: st4lane_2d 364 ; CHECK: st4.d 365 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) 366 ret void 367 } 368 369 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone 370 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone 371 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone 372 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone 373 374 375 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { 376 ; CHECK-LABEL: st2_8b 377 ; CHECK: st2.8b 378 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) 379 ret void 380 } 381 382 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { 383 ; CHECK-LABEL: st3_8b 384 ; CHECK: st3.8b 385 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) 386 ret void 387 } 388 389 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { 390 ; CHECK-LABEL: st4_8b 391 ; CHECK: st4.8b 392 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) 393 ret void 394 } 395 396 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 397 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 398 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 399 400 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { 401 ; CHECK-LABEL: st2_16b 402 ; CHECK: st2.16b 403 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) 404 ret void 405 } 406 407 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { 408 ; CHECK-LABEL: st3_16b 409 ; CHECK: st3.16b 410 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) 411 ret void 412 } 413 414 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { 415 ; CHECK-LABEL: st4_16b 416 ; CHECK: st4.16b 417 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) 418 ret void 419 } 420 421 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 422 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 423 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 424 425 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { 426 ; CHECK-LABEL: st2_4h 427 ; CHECK: st2.4h 428 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) 429 ret void 430 } 431 432 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { 433 ; CHECK-LABEL: st3_4h 434 ; CHECK: st3.4h 435 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) 436 ret void 437 } 438 439 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { 440 ; CHECK-LABEL: st4_4h 441 ; CHECK: st4.4h 442 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) 443 ret void 444 } 445 446 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 447 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 448 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 449 450 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { 451 ; CHECK-LABEL: st2_8h 452 ; CHECK: st2.8h 453 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) 454 ret void 455 } 456 457 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { 458 ; CHECK-LABEL: st3_8h 459 ; CHECK: st3.8h 460 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) 461 ret void 462 } 463 464 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { 465 ; CHECK-LABEL: st4_8h 466 ; CHECK: st4.8h 467 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) 468 ret void 469 } 470 471 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 472 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 473 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 474 475 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { 476 ; CHECK-LABEL: st2_2s 477 ; CHECK: st2.2s 478 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) 479 ret void 480 } 481 482 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { 483 ; CHECK-LABEL: st3_2s 484 ; CHECK: st3.2s 485 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) 486 ret void 487 } 488 489 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { 490 ; CHECK-LABEL: st4_2s 491 ; CHECK: st4.2s 492 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) 493 ret void 494 } 495 496 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 497 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 498 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 499 500 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { 501 ; CHECK-LABEL: st2_4s 502 ; CHECK: st2.4s 503 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) 504 ret void 505 } 506 507 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { 508 ; CHECK-LABEL: st3_4s 509 ; CHECK: st3.4s 510 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) 511 ret void 512 } 513 514 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { 515 ; CHECK-LABEL: st4_4s 516 ; CHECK: st4.4s 517 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) 518 ret void 519 } 520 521 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 522 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 523 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 524 525 ; If there's only one element, st2/3/4 don't make much sense, stick to st1. 526 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { 527 ; CHECK-LABEL: st2_1d 528 ; CHECK: st1.1d 529 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) 530 ret void 531 } 532 533 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { 534 ; CHECK-LABEL: st3_1d 535 ; CHECK: st1.1d 536 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) 537 ret void 538 } 539 540 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { 541 ; CHECK-LABEL: st4_1d 542 ; CHECK: st1.1d 543 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) 544 ret void 545 } 546 547 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 548 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 549 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 550 551 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { 552 ; CHECK-LABEL: st2_2d 553 ; CHECK: st2.2d 554 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) 555 ret void 556 } 557 558 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { 559 ; CHECK-LABEL: st3_2d 560 ; CHECK: st3.2d 561 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) 562 ret void 563 } 564 565 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { 566 ; CHECK-LABEL: st4_2d 567 ; CHECK: st4.2d 568 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) 569 ret void 570 } 571 572 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 573 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 574 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 575 576 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly 577 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly 578 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly 579 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly 580 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly 581 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly 582 583 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) { 584 ; CHECK-LABEL: st1_x2_v8i8: 585 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 586 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) 587 ret void 588 } 589 590 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) { 591 ; CHECK-LABEL: st1_x2_v4i16: 592 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 593 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) 594 ret void 595 } 596 597 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) { 598 ; CHECK-LABEL: st1_x2_v2i32: 599 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 600 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) 601 ret void 602 } 603 604 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) { 605 ; CHECK-LABEL: st1_x2_v2f32: 606 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 607 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr) 608 ret void 609 } 610 611 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) { 612 ; CHECK-LABEL: st1_x2_v1i64: 613 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 614 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) 615 ret void 616 } 617 618 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) { 619 ; CHECK-LABEL: st1_x2_v1f64: 620 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 621 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr) 622 ret void 623 } 624 625 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly 626 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly 627 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly 628 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly 629 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly 630 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly 631 632 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) { 633 ; CHECK-LABEL: st1_x2_v16i8: 634 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 635 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) 636 ret void 637 } 638 639 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) { 640 ; CHECK-LABEL: st1_x2_v8i16: 641 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 642 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) 643 ret void 644 } 645 646 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) { 647 ; CHECK-LABEL: st1_x2_v4i32: 648 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 649 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) 650 ret void 651 } 652 653 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) { 654 ; CHECK-LABEL: st1_x2_v4f32: 655 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 656 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr) 657 ret void 658 } 659 660 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) { 661 ; CHECK-LABEL: st1_x2_v2i64: 662 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 663 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) 664 ret void 665 } 666 667 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) { 668 ; CHECK-LABEL: st1_x2_v2f64: 669 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 670 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr) 671 ret void 672 } 673 674 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 675 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 676 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 677 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 678 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 679 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 680 681 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) { 682 ; CHECK-LABEL: st1_x3_v8i8: 683 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 684 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) 685 ret void 686 } 687 688 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) { 689 ; CHECK-LABEL: st1_x3_v4i16: 690 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 691 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) 692 ret void 693 } 694 695 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) { 696 ; CHECK-LABEL: st1_x3_v2i32: 697 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 698 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) 699 ret void 700 } 701 702 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) { 703 ; CHECK-LABEL: st1_x3_v2f32: 704 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 705 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) 706 ret void 707 } 708 709 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) { 710 ; CHECK-LABEL: st1_x3_v1i64: 711 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 712 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) 713 ret void 714 } 715 716 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) { 717 ; CHECK-LABEL: st1_x3_v1f64: 718 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 719 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) 720 ret void 721 } 722 723 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 724 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 725 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 726 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 727 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 728 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 729 730 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) { 731 ; CHECK-LABEL: st1_x3_v16i8: 732 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 733 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) 734 ret void 735 } 736 737 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) { 738 ; CHECK-LABEL: st1_x3_v8i16: 739 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 740 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) 741 ret void 742 } 743 744 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) { 745 ; CHECK-LABEL: st1_x3_v4i32: 746 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 747 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) 748 ret void 749 } 750 751 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) { 752 ; CHECK-LABEL: st1_x3_v4f32: 753 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 754 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) 755 ret void 756 } 757 758 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) { 759 ; CHECK-LABEL: st1_x3_v2i64: 760 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 761 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) 762 ret void 763 } 764 765 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) { 766 ; CHECK-LABEL: st1_x3_v2f64: 767 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 768 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) 769 ret void 770 } 771 772 773 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly 774 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly 775 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly 776 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly 777 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly 778 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly 779 780 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) { 781 ; CHECK-LABEL: st1_x4_v8i8: 782 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 783 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) 784 ret void 785 } 786 787 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) { 788 ; CHECK-LABEL: st1_x4_v4i16: 789 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 790 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) 791 ret void 792 } 793 794 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) { 795 ; CHECK-LABEL: st1_x4_v2i32: 796 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 797 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) 798 ret void 799 } 800 801 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) { 802 ; CHECK-LABEL: st1_x4_v2f32: 803 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 804 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) 805 ret void 806 } 807 808 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) { 809 ; CHECK-LABEL: st1_x4_v1i64: 810 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 811 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) 812 ret void 813 } 814 815 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) { 816 ; CHECK-LABEL: st1_x4_v1f64: 817 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 818 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) 819 ret void 820 } 821 822 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly 823 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly 824 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly 825 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly 826 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly 827 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly 828 829 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) { 830 ; CHECK-LABEL: st1_x4_v16i8: 831 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 832 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) 833 ret void 834 } 835 836 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) { 837 ; CHECK-LABEL: st1_x4_v8i16: 838 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 839 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) 840 ret void 841 } 842 843 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) { 844 ; CHECK-LABEL: st1_x4_v4i32: 845 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 846 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) 847 ret void 848 } 849 850 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) { 851 ; CHECK-LABEL: st1_x4_v4f32: 852 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 853 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) 854 ret void 855 } 856 857 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) { 858 ; CHECK-LABEL: st1_x4_v2i64: 859 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 860 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) 861 ret void 862 } 863 864 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) { 865 ; CHECK-LABEL: st1_x4_v2f64: 866 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 867 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) 868 ret void 869 } 870