1 ; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s 2 3 @ptr = global i8* null 4 5 define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) { 6 ; CHECK-LABEL: test_v8i8_pre_load: 7 ; CHECK: ldr d0, [x0, #40]! 8 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 9 %val = load <8 x i8>, <8 x i8>* %newaddr, align 8 10 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) 11 ret <8 x i8> %val 12 } 13 14 define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) { 15 ; CHECK-LABEL: test_v8i8_post_load: 16 ; CHECK: ldr d0, [x0], #40 17 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 18 %val = load <8 x i8>, <8 x i8>* %addr, align 8 19 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) 20 ret <8 x i8> %val 21 } 22 23 define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) { 24 ; CHECK-LABEL: test_v8i8_pre_store: 25 ; CHECK: str d0, [x0, #40]! 26 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 27 store <8 x i8> %in, <8 x i8>* %newaddr, align 8 28 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) 29 ret void 30 } 31 32 define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) { 33 ; CHECK-LABEL: test_v8i8_post_store: 34 ; CHECK: str d0, [x0], #40 35 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5 36 store <8 x i8> %in, <8 x i8>* %addr, align 8 37 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**) 38 ret void 39 } 40 41 define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) { 42 ; CHECK-LABEL: test_v4i16_pre_load: 43 ; CHECK: ldr d0, [x0, #40]! 44 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 45 %val = load <4 x i16>, <4 x i16>* %newaddr, align 8 46 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) 47 ret <4 x i16> %val 48 } 49 50 define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) { 51 ; CHECK-LABEL: test_v4i16_post_load: 52 ; CHECK: ldr d0, [x0], #40 53 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 54 %val = load <4 x i16>, <4 x i16>* %addr, align 8 55 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) 56 ret <4 x i16> %val 57 } 58 59 define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) { 60 ; CHECK-LABEL: test_v4i16_pre_store: 61 ; CHECK: str d0, [x0, #40]! 62 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 63 store <4 x i16> %in, <4 x i16>* %newaddr, align 8 64 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) 65 ret void 66 } 67 68 define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) { 69 ; CHECK-LABEL: test_v4i16_post_store: 70 ; CHECK: str d0, [x0], #40 71 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5 72 store <4 x i16> %in, <4 x i16>* %addr, align 8 73 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**) 74 ret void 75 } 76 77 define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) { 78 ; CHECK-LABEL: test_v2i32_pre_load: 79 ; CHECK: ldr d0, [x0, #40]! 80 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 81 %val = load <2 x i32>, <2 x i32>* %newaddr, align 8 82 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) 83 ret <2 x i32> %val 84 } 85 86 define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) { 87 ; CHECK-LABEL: test_v2i32_post_load: 88 ; CHECK: ldr d0, [x0], #40 89 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 90 %val = load <2 x i32>, <2 x i32>* %addr, align 8 91 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) 92 ret <2 x i32> %val 93 } 94 95 define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) { 96 ; CHECK-LABEL: test_v2i32_pre_store: 97 ; CHECK: str d0, [x0, #40]! 98 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 99 store <2 x i32> %in, <2 x i32>* %newaddr, align 8 100 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) 101 ret void 102 } 103 104 define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) { 105 ; CHECK-LABEL: test_v2i32_post_store: 106 ; CHECK: str d0, [x0], #40 107 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5 108 store <2 x i32> %in, <2 x i32>* %addr, align 8 109 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**) 110 ret void 111 } 112 113 define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) { 114 ; CHECK-LABEL: test_v2f32_pre_load: 115 ; CHECK: ldr d0, [x0, #40]! 116 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 117 %val = load <2 x float>, <2 x float>* %newaddr, align 8 118 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) 119 ret <2 x float> %val 120 } 121 122 define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) { 123 ; CHECK-LABEL: test_v2f32_post_load: 124 ; CHECK: ldr d0, [x0], #40 125 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 126 %val = load <2 x float>, <2 x float>* %addr, align 8 127 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) 128 ret <2 x float> %val 129 } 130 131 define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) { 132 ; CHECK-LABEL: test_v2f32_pre_store: 133 ; CHECK: str d0, [x0, #40]! 134 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 135 store <2 x float> %in, <2 x float>* %newaddr, align 8 136 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) 137 ret void 138 } 139 140 define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) { 141 ; CHECK-LABEL: test_v2f32_post_store: 142 ; CHECK: str d0, [x0], #40 143 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5 144 store <2 x float> %in, <2 x float>* %addr, align 8 145 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**) 146 ret void 147 } 148 149 define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) { 150 ; CHECK-LABEL: test_v1i64_pre_load: 151 ; CHECK: ldr d0, [x0, #40]! 152 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 153 %val = load <1 x i64>, <1 x i64>* %newaddr, align 8 154 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) 155 ret <1 x i64> %val 156 } 157 158 define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) { 159 ; CHECK-LABEL: test_v1i64_post_load: 160 ; CHECK: ldr d0, [x0], #40 161 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 162 %val = load <1 x i64>, <1 x i64>* %addr, align 8 163 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) 164 ret <1 x i64> %val 165 } 166 167 define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) { 168 ; CHECK-LABEL: test_v1i64_pre_store: 169 ; CHECK: str d0, [x0, #40]! 170 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 171 store <1 x i64> %in, <1 x i64>* %newaddr, align 8 172 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) 173 ret void 174 } 175 176 define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) { 177 ; CHECK-LABEL: test_v1i64_post_store: 178 ; CHECK: str d0, [x0], #40 179 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5 180 store <1 x i64> %in, <1 x i64>* %addr, align 8 181 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**) 182 ret void 183 } 184 185 define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) { 186 ; CHECK-LABEL: test_v16i8_pre_load: 187 ; CHECK: ldr q0, [x0, #80]! 188 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 189 %val = load <16 x i8>, <16 x i8>* %newaddr, align 8 190 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) 191 ret <16 x i8> %val 192 } 193 194 define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) { 195 ; CHECK-LABEL: test_v16i8_post_load: 196 ; CHECK: ldr q0, [x0], #80 197 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 198 %val = load <16 x i8>, <16 x i8>* %addr, align 8 199 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) 200 ret <16 x i8> %val 201 } 202 203 define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) { 204 ; CHECK-LABEL: test_v16i8_pre_store: 205 ; CHECK: str q0, [x0, #80]! 206 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 207 store <16 x i8> %in, <16 x i8>* %newaddr, align 8 208 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) 209 ret void 210 } 211 212 define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) { 213 ; CHECK-LABEL: test_v16i8_post_store: 214 ; CHECK: str q0, [x0], #80 215 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5 216 store <16 x i8> %in, <16 x i8>* %addr, align 8 217 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**) 218 ret void 219 } 220 221 define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) { 222 ; CHECK-LABEL: test_v8i16_pre_load: 223 ; CHECK: ldr q0, [x0, #80]! 224 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 225 %val = load <8 x i16>, <8 x i16>* %newaddr, align 8 226 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) 227 ret <8 x i16> %val 228 } 229 230 define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) { 231 ; CHECK-LABEL: test_v8i16_post_load: 232 ; CHECK: ldr q0, [x0], #80 233 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 234 %val = load <8 x i16>, <8 x i16>* %addr, align 8 235 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) 236 ret <8 x i16> %val 237 } 238 239 define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) { 240 ; CHECK-LABEL: test_v8i16_pre_store: 241 ; CHECK: str q0, [x0, #80]! 242 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 243 store <8 x i16> %in, <8 x i16>* %newaddr, align 8 244 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) 245 ret void 246 } 247 248 define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) { 249 ; CHECK-LABEL: test_v8i16_post_store: 250 ; CHECK: str q0, [x0], #80 251 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5 252 store <8 x i16> %in, <8 x i16>* %addr, align 8 253 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**) 254 ret void 255 } 256 257 define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) { 258 ; CHECK-LABEL: test_v4i32_pre_load: 259 ; CHECK: ldr q0, [x0, #80]! 260 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 261 %val = load <4 x i32>, <4 x i32>* %newaddr, align 8 262 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) 263 ret <4 x i32> %val 264 } 265 266 define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) { 267 ; CHECK-LABEL: test_v4i32_post_load: 268 ; CHECK: ldr q0, [x0], #80 269 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 270 %val = load <4 x i32>, <4 x i32>* %addr, align 8 271 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) 272 ret <4 x i32> %val 273 } 274 275 define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) { 276 ; CHECK-LABEL: test_v4i32_pre_store: 277 ; CHECK: str q0, [x0, #80]! 278 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 279 store <4 x i32> %in, <4 x i32>* %newaddr, align 8 280 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) 281 ret void 282 } 283 284 define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) { 285 ; CHECK-LABEL: test_v4i32_post_store: 286 ; CHECK: str q0, [x0], #80 287 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5 288 store <4 x i32> %in, <4 x i32>* %addr, align 8 289 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**) 290 ret void 291 } 292 293 294 define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) { 295 ; CHECK-LABEL: test_v4f32_pre_load: 296 ; CHECK: ldr q0, [x0, #80]! 297 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 298 %val = load <4 x float>, <4 x float>* %newaddr, align 8 299 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) 300 ret <4 x float> %val 301 } 302 303 define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) { 304 ; CHECK-LABEL: test_v4f32_post_load: 305 ; CHECK: ldr q0, [x0], #80 306 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 307 %val = load <4 x float>, <4 x float>* %addr, align 8 308 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) 309 ret <4 x float> %val 310 } 311 312 define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) { 313 ; CHECK-LABEL: test_v4f32_pre_store: 314 ; CHECK: str q0, [x0, #80]! 315 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 316 store <4 x float> %in, <4 x float>* %newaddr, align 8 317 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) 318 ret void 319 } 320 321 define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) { 322 ; CHECK-LABEL: test_v4f32_post_store: 323 ; CHECK: str q0, [x0], #80 324 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5 325 store <4 x float> %in, <4 x float>* %addr, align 8 326 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**) 327 ret void 328 } 329 330 331 define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) { 332 ; CHECK-LABEL: test_v2i64_pre_load: 333 ; CHECK: ldr q0, [x0, #80]! 334 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 335 %val = load <2 x i64>, <2 x i64>* %newaddr, align 8 336 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) 337 ret <2 x i64> %val 338 } 339 340 define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) { 341 ; CHECK-LABEL: test_v2i64_post_load: 342 ; CHECK: ldr q0, [x0], #80 343 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 344 %val = load <2 x i64>, <2 x i64>* %addr, align 8 345 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) 346 ret <2 x i64> %val 347 } 348 349 define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) { 350 ; CHECK-LABEL: test_v2i64_pre_store: 351 ; CHECK: str q0, [x0, #80]! 352 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 353 store <2 x i64> %in, <2 x i64>* %newaddr, align 8 354 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) 355 ret void 356 } 357 358 define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) { 359 ; CHECK-LABEL: test_v2i64_post_store: 360 ; CHECK: str q0, [x0], #80 361 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5 362 store <2 x i64> %in, <2 x i64>* %addr, align 8 363 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**) 364 ret void 365 } 366 367 368 define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) { 369 ; CHECK-LABEL: test_v2f64_pre_load: 370 ; CHECK: ldr q0, [x0, #80]! 371 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 372 %val = load <2 x double>, <2 x double>* %newaddr, align 8 373 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) 374 ret <2 x double> %val 375 } 376 377 define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) { 378 ; CHECK-LABEL: test_v2f64_post_load: 379 ; CHECK: ldr q0, [x0], #80 380 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 381 %val = load <2 x double>, <2 x double>* %addr, align 8 382 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) 383 ret <2 x double> %val 384 } 385 386 define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) { 387 ; CHECK-LABEL: test_v2f64_pre_store: 388 ; CHECK: str q0, [x0, #80]! 389 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 390 store <2 x double> %in, <2 x double>* %newaddr, align 8 391 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) 392 ret void 393 } 394 395 define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) { 396 ; CHECK-LABEL: test_v2f64_post_store: 397 ; CHECK: str q0, [x0], #80 398 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5 399 store <2 x double> %in, <2 x double>* %addr, align 8 400 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**) 401 ret void 402 } 403 404 define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) { 405 ; CHECK-LABEL: test_v16i8_post_imm_st1_lane: 406 ; CHECK: st1.b { v0 }[3], [x0], #1 407 %elt = extractelement <16 x i8> %in, i32 3 408 store i8 %elt, i8* %addr 409 410 %newaddr = getelementptr i8, i8* %addr, i32 1 411 ret i8* %newaddr 412 } 413 414 define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) { 415 ; CHECK-LABEL: test_v16i8_post_reg_st1_lane: 416 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2 417 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] 418 %elt = extractelement <16 x i8> %in, i32 3 419 store i8 %elt, i8* %addr 420 421 %newaddr = getelementptr i8, i8* %addr, i32 2 422 ret i8* %newaddr 423 } 424 425 426 define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) { 427 ; CHECK-LABEL: test_v8i16_post_imm_st1_lane: 428 ; CHECK: st1.h { v0 }[3], [x0], #2 429 %elt = extractelement <8 x i16> %in, i32 3 430 store i16 %elt, i16* %addr 431 432 %newaddr = getelementptr i16, i16* %addr, i32 1 433 ret i16* %newaddr 434 } 435 436 define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) { 437 ; CHECK-LABEL: test_v8i16_post_reg_st1_lane: 438 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4 439 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] 440 %elt = extractelement <8 x i16> %in, i32 3 441 store i16 %elt, i16* %addr 442 443 %newaddr = getelementptr i16, i16* %addr, i32 2 444 ret i16* %newaddr 445 } 446 447 define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) { 448 ; CHECK-LABEL: test_v4i32_post_imm_st1_lane: 449 ; CHECK: st1.s { v0 }[3], [x0], #4 450 %elt = extractelement <4 x i32> %in, i32 3 451 store i32 %elt, i32* %addr 452 453 %newaddr = getelementptr i32, i32* %addr, i32 1 454 ret i32* %newaddr 455 } 456 457 define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) { 458 ; CHECK-LABEL: test_v4i32_post_reg_st1_lane: 459 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 460 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] 461 %elt = extractelement <4 x i32> %in, i32 3 462 store i32 %elt, i32* %addr 463 464 %newaddr = getelementptr i32, i32* %addr, i32 2 465 ret i32* %newaddr 466 } 467 468 define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) { 469 ; CHECK-LABEL: test_v4f32_post_imm_st1_lane: 470 ; CHECK: st1.s { v0 }[3], [x0], #4 471 %elt = extractelement <4 x float> %in, i32 3 472 store float %elt, float* %addr 473 474 %newaddr = getelementptr float, float* %addr, i32 1 475 ret float* %newaddr 476 } 477 478 define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) { 479 ; CHECK-LABEL: test_v4f32_post_reg_st1_lane: 480 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 481 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]] 482 %elt = extractelement <4 x float> %in, i32 3 483 store float %elt, float* %addr 484 485 %newaddr = getelementptr float, float* %addr, i32 2 486 ret float* %newaddr 487 } 488 489 define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) { 490 ; CHECK-LABEL: test_v2i64_post_imm_st1_lane: 491 ; CHECK: st1.d { v0 }[1], [x0], #8 492 %elt = extractelement <2 x i64> %in, i64 1 493 store i64 %elt, i64* %addr 494 495 %newaddr = getelementptr i64, i64* %addr, i64 1 496 ret i64* %newaddr 497 } 498 499 define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) { 500 ; CHECK-LABEL: test_v2i64_post_reg_st1_lane: 501 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10 502 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] 503 %elt = extractelement <2 x i64> %in, i64 1 504 store i64 %elt, i64* %addr 505 506 %newaddr = getelementptr i64, i64* %addr, i64 2 507 ret i64* %newaddr 508 } 509 510 define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) { 511 ; CHECK-LABEL: test_v2f64_post_imm_st1_lane: 512 ; CHECK: st1.d { v0 }[1], [x0], #8 513 %elt = extractelement <2 x double> %in, i32 1 514 store double %elt, double* %addr 515 516 %newaddr = getelementptr double, double* %addr, i32 1 517 ret double* %newaddr 518 } 519 520 define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) { 521 ; CHECK-LABEL: test_v2f64_post_reg_st1_lane: 522 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x10 523 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]] 524 %elt = extractelement <2 x double> %in, i32 1 525 store double %elt, double* %addr 526 527 %newaddr = getelementptr double, double* %addr, i32 2 528 ret double* %newaddr 529 } 530 531 define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) { 532 ; CHECK-LABEL: test_v8i8_post_imm_st1_lane: 533 ; CHECK: st1.b { v0 }[3], [x0], #1 534 %elt = extractelement <8 x i8> %in, i32 3 535 store i8 %elt, i8* %addr 536 537 %newaddr = getelementptr i8, i8* %addr, i32 1 538 ret i8* %newaddr 539 } 540 541 define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) { 542 ; CHECK-LABEL: test_v8i8_post_reg_st1_lane: 543 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x2 544 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]] 545 %elt = extractelement <8 x i8> %in, i32 3 546 store i8 %elt, i8* %addr 547 548 %newaddr = getelementptr i8, i8* %addr, i32 2 549 ret i8* %newaddr 550 } 551 552 define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) { 553 ; CHECK-LABEL: test_v4i16_post_imm_st1_lane: 554 ; CHECK: st1.h { v0 }[3], [x0], #2 555 %elt = extractelement <4 x i16> %in, i32 3 556 store i16 %elt, i16* %addr 557 558 %newaddr = getelementptr i16, i16* %addr, i32 1 559 ret i16* %newaddr 560 } 561 562 define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) { 563 ; CHECK-LABEL: test_v4i16_post_reg_st1_lane: 564 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x4 565 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]] 566 %elt = extractelement <4 x i16> %in, i32 3 567 store i16 %elt, i16* %addr 568 569 %newaddr = getelementptr i16, i16* %addr, i32 2 570 ret i16* %newaddr 571 } 572 573 define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) { 574 ; CHECK-LABEL: test_v2i32_post_imm_st1_lane: 575 ; CHECK: st1.s { v0 }[1], [x0], #4 576 %elt = extractelement <2 x i32> %in, i32 1 577 store i32 %elt, i32* %addr 578 579 %newaddr = getelementptr i32, i32* %addr, i32 1 580 ret i32* %newaddr 581 } 582 583 define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) { 584 ; CHECK-LABEL: test_v2i32_post_reg_st1_lane: 585 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 586 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] 587 %elt = extractelement <2 x i32> %in, i32 1 588 store i32 %elt, i32* %addr 589 590 %newaddr = getelementptr i32, i32* %addr, i32 2 591 ret i32* %newaddr 592 } 593 594 define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) { 595 ; CHECK-LABEL: test_v2f32_post_imm_st1_lane: 596 ; CHECK: st1.s { v0 }[1], [x0], #4 597 %elt = extractelement <2 x float> %in, i32 1 598 store float %elt, float* %addr 599 600 %newaddr = getelementptr float, float* %addr, i32 1 601 ret float* %newaddr 602 } 603 604 define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) { 605 ; CHECK-LABEL: test_v2f32_post_reg_st1_lane: 606 ; CHECK: orr w[[OFFSET:[0-9]+]], wzr, #0x8 607 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]] 608 %elt = extractelement <2 x float> %in, i32 1 609 store float %elt, float* %addr 610 611 %newaddr = getelementptr float, float* %addr, i32 2 612 ret float* %newaddr 613 } 614 615 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) { 616 ;CHECK-LABEL: test_v16i8_post_imm_ld2: 617 ;CHECK: ld2.16b { v0, v1 }, [x0], #32 618 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) 619 %tmp = getelementptr i8, i8* %A, i32 32 620 store i8* %tmp, i8** %ptr 621 ret { <16 x i8>, <16 x i8> } %ld2 622 } 623 624 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { 625 ;CHECK-LABEL: test_v16i8_post_reg_ld2: 626 ;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}} 627 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A) 628 %tmp = getelementptr i8, i8* %A, i64 %inc 629 store i8* %tmp, i8** %ptr 630 ret { <16 x i8>, <16 x i8> } %ld2 631 } 632 633 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) 634 635 636 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) { 637 ;CHECK-LABEL: test_v8i8_post_imm_ld2: 638 ;CHECK: ld2.8b { v0, v1 }, [x0], #16 639 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) 640 %tmp = getelementptr i8, i8* %A, i32 16 641 store i8* %tmp, i8** %ptr 642 ret { <8 x i8>, <8 x i8> } %ld2 643 } 644 645 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) { 646 ;CHECK-LABEL: test_v8i8_post_reg_ld2: 647 ;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}} 648 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A) 649 %tmp = getelementptr i8, i8* %A, i64 %inc 650 store i8* %tmp, i8** %ptr 651 ret { <8 x i8>, <8 x i8> } %ld2 652 } 653 654 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) 655 656 657 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) { 658 ;CHECK-LABEL: test_v8i16_post_imm_ld2: 659 ;CHECK: ld2.8h { v0, v1 }, [x0], #32 660 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) 661 %tmp = getelementptr i16, i16* %A, i32 16 662 store i16* %tmp, i16** %ptr 663 ret { <8 x i16>, <8 x i16> } %ld2 664 } 665 666 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { 667 ;CHECK-LABEL: test_v8i16_post_reg_ld2: 668 ;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}} 669 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A) 670 %tmp = getelementptr i16, i16* %A, i64 %inc 671 store i16* %tmp, i16** %ptr 672 ret { <8 x i16>, <8 x i16> } %ld2 673 } 674 675 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) 676 677 678 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) { 679 ;CHECK-LABEL: test_v4i16_post_imm_ld2: 680 ;CHECK: ld2.4h { v0, v1 }, [x0], #16 681 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) 682 %tmp = getelementptr i16, i16* %A, i32 8 683 store i16* %tmp, i16** %ptr 684 ret { <4 x i16>, <4 x i16> } %ld2 685 } 686 687 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) { 688 ;CHECK-LABEL: test_v4i16_post_reg_ld2: 689 ;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}} 690 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A) 691 %tmp = getelementptr i16, i16* %A, i64 %inc 692 store i16* %tmp, i16** %ptr 693 ret { <4 x i16>, <4 x i16> } %ld2 694 } 695 696 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) 697 698 699 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) { 700 ;CHECK-LABEL: test_v4i32_post_imm_ld2: 701 ;CHECK: ld2.4s { v0, v1 }, [x0], #32 702 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) 703 %tmp = getelementptr i32, i32* %A, i32 8 704 store i32* %tmp, i32** %ptr 705 ret { <4 x i32>, <4 x i32> } %ld2 706 } 707 708 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { 709 ;CHECK-LABEL: test_v4i32_post_reg_ld2: 710 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} 711 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A) 712 %tmp = getelementptr i32, i32* %A, i64 %inc 713 store i32* %tmp, i32** %ptr 714 ret { <4 x i32>, <4 x i32> } %ld2 715 } 716 717 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) 718 719 720 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) { 721 ;CHECK-LABEL: test_v2i32_post_imm_ld2: 722 ;CHECK: ld2.2s { v0, v1 }, [x0], #16 723 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) 724 %tmp = getelementptr i32, i32* %A, i32 4 725 store i32* %tmp, i32** %ptr 726 ret { <2 x i32>, <2 x i32> } %ld2 727 } 728 729 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) { 730 ;CHECK-LABEL: test_v2i32_post_reg_ld2: 731 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} 732 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A) 733 %tmp = getelementptr i32, i32* %A, i64 %inc 734 store i32* %tmp, i32** %ptr 735 ret { <2 x i32>, <2 x i32> } %ld2 736 } 737 738 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) 739 740 741 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) { 742 ;CHECK-LABEL: test_v2i64_post_imm_ld2: 743 ;CHECK: ld2.2d { v0, v1 }, [x0], #32 744 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) 745 %tmp = getelementptr i64, i64* %A, i32 4 746 store i64* %tmp, i64** %ptr 747 ret { <2 x i64>, <2 x i64> } %ld2 748 } 749 750 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { 751 ;CHECK-LABEL: test_v2i64_post_reg_ld2: 752 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} 753 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A) 754 %tmp = getelementptr i64, i64* %A, i64 %inc 755 store i64* %tmp, i64** %ptr 756 ret { <2 x i64>, <2 x i64> } %ld2 757 } 758 759 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) 760 761 762 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) { 763 ;CHECK-LABEL: test_v1i64_post_imm_ld2: 764 ;CHECK: ld1.1d { v0, v1 }, [x0], #16 765 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) 766 %tmp = getelementptr i64, i64* %A, i32 2 767 store i64* %tmp, i64** %ptr 768 ret { <1 x i64>, <1 x i64> } %ld2 769 } 770 771 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) { 772 ;CHECK-LABEL: test_v1i64_post_reg_ld2: 773 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} 774 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A) 775 %tmp = getelementptr i64, i64* %A, i64 %inc 776 store i64* %tmp, i64** %ptr 777 ret { <1 x i64>, <1 x i64> } %ld2 778 } 779 780 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) 781 782 783 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) { 784 ;CHECK-LABEL: test_v4f32_post_imm_ld2: 785 ;CHECK: ld2.4s { v0, v1 }, [x0], #32 786 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) 787 %tmp = getelementptr float, float* %A, i32 8 788 store float* %tmp, float** %ptr 789 ret { <4 x float>, <4 x float> } %ld2 790 } 791 792 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { 793 ;CHECK-LABEL: test_v4f32_post_reg_ld2: 794 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}} 795 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A) 796 %tmp = getelementptr float, float* %A, i64 %inc 797 store float* %tmp, float** %ptr 798 ret { <4 x float>, <4 x float> } %ld2 799 } 800 801 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*) 802 803 804 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) { 805 ;CHECK-LABEL: test_v2f32_post_imm_ld2: 806 ;CHECK: ld2.2s { v0, v1 }, [x0], #16 807 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) 808 %tmp = getelementptr float, float* %A, i32 4 809 store float* %tmp, float** %ptr 810 ret { <2 x float>, <2 x float> } %ld2 811 } 812 813 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) { 814 ;CHECK-LABEL: test_v2f32_post_reg_ld2: 815 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}} 816 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A) 817 %tmp = getelementptr float, float* %A, i64 %inc 818 store float* %tmp, float** %ptr 819 ret { <2 x float>, <2 x float> } %ld2 820 } 821 822 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*) 823 824 825 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) { 826 ;CHECK-LABEL: test_v2f64_post_imm_ld2: 827 ;CHECK: ld2.2d { v0, v1 }, [x0], #32 828 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) 829 %tmp = getelementptr double, double* %A, i32 4 830 store double* %tmp, double** %ptr 831 ret { <2 x double>, <2 x double> } %ld2 832 } 833 834 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { 835 ;CHECK-LABEL: test_v2f64_post_reg_ld2: 836 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}} 837 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A) 838 %tmp = getelementptr double, double* %A, i64 %inc 839 store double* %tmp, double** %ptr 840 ret { <2 x double>, <2 x double> } %ld2 841 } 842 843 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*) 844 845 846 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) { 847 ;CHECK-LABEL: test_v1f64_post_imm_ld2: 848 ;CHECK: ld1.1d { v0, v1 }, [x0], #16 849 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) 850 %tmp = getelementptr double, double* %A, i32 2 851 store double* %tmp, double** %ptr 852 ret { <1 x double>, <1 x double> } %ld2 853 } 854 855 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) { 856 ;CHECK-LABEL: test_v1f64_post_reg_ld2: 857 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} 858 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A) 859 %tmp = getelementptr double, double* %A, i64 %inc 860 store double* %tmp, double** %ptr 861 ret { <1 x double>, <1 x double> } %ld2 862 } 863 864 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) 865 866 867 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) { 868 ;CHECK-LABEL: test_v16i8_post_imm_ld3: 869 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48 870 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) 871 %tmp = getelementptr i8, i8* %A, i32 48 872 store i8* %tmp, i8** %ptr 873 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 874 } 875 876 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { 877 ;CHECK-LABEL: test_v16i8_post_reg_ld3: 878 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} 879 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A) 880 %tmp = getelementptr i8, i8* %A, i64 %inc 881 store i8* %tmp, i8** %ptr 882 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 883 } 884 885 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) 886 887 888 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) { 889 ;CHECK-LABEL: test_v8i8_post_imm_ld3: 890 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24 891 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) 892 %tmp = getelementptr i8, i8* %A, i32 24 893 store i8* %tmp, i8** %ptr 894 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 895 } 896 897 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) { 898 ;CHECK-LABEL: test_v8i8_post_reg_ld3: 899 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} 900 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A) 901 %tmp = getelementptr i8, i8* %A, i64 %inc 902 store i8* %tmp, i8** %ptr 903 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 904 } 905 906 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) 907 908 909 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) { 910 ;CHECK-LABEL: test_v8i16_post_imm_ld3: 911 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48 912 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) 913 %tmp = getelementptr i16, i16* %A, i32 24 914 store i16* %tmp, i16** %ptr 915 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 916 } 917 918 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { 919 ;CHECK-LABEL: test_v8i16_post_reg_ld3: 920 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} 921 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A) 922 %tmp = getelementptr i16, i16* %A, i64 %inc 923 store i16* %tmp, i16** %ptr 924 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 925 } 926 927 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) 928 929 930 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) { 931 ;CHECK-LABEL: test_v4i16_post_imm_ld3: 932 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24 933 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) 934 %tmp = getelementptr i16, i16* %A, i32 12 935 store i16* %tmp, i16** %ptr 936 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 937 } 938 939 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) { 940 ;CHECK-LABEL: test_v4i16_post_reg_ld3: 941 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} 942 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A) 943 %tmp = getelementptr i16, i16* %A, i64 %inc 944 store i16* %tmp, i16** %ptr 945 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 946 } 947 948 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) 949 950 951 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) { 952 ;CHECK-LABEL: test_v4i32_post_imm_ld3: 953 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 954 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) 955 %tmp = getelementptr i32, i32* %A, i32 12 956 store i32* %tmp, i32** %ptr 957 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 958 } 959 960 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { 961 ;CHECK-LABEL: test_v4i32_post_reg_ld3: 962 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 963 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A) 964 %tmp = getelementptr i32, i32* %A, i64 %inc 965 store i32* %tmp, i32** %ptr 966 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 967 } 968 969 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) 970 971 972 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) { 973 ;CHECK-LABEL: test_v2i32_post_imm_ld3: 974 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 975 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) 976 %tmp = getelementptr i32, i32* %A, i32 6 977 store i32* %tmp, i32** %ptr 978 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 979 } 980 981 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) { 982 ;CHECK-LABEL: test_v2i32_post_reg_ld3: 983 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 984 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A) 985 %tmp = getelementptr i32, i32* %A, i64 %inc 986 store i32* %tmp, i32** %ptr 987 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 988 } 989 990 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) 991 992 993 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) { 994 ;CHECK-LABEL: test_v2i64_post_imm_ld3: 995 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 996 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) 997 %tmp = getelementptr i64, i64* %A, i32 6 998 store i64* %tmp, i64** %ptr 999 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 1000 } 1001 1002 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { 1003 ;CHECK-LABEL: test_v2i64_post_reg_ld3: 1004 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1005 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A) 1006 %tmp = getelementptr i64, i64* %A, i64 %inc 1007 store i64* %tmp, i64** %ptr 1008 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 1009 } 1010 1011 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) 1012 1013 1014 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) { 1015 ;CHECK-LABEL: test_v1i64_post_imm_ld3: 1016 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 1017 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) 1018 %tmp = getelementptr i64, i64* %A, i32 3 1019 store i64* %tmp, i64** %ptr 1020 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 1021 } 1022 1023 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) { 1024 ;CHECK-LABEL: test_v1i64_post_reg_ld3: 1025 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1026 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A) 1027 %tmp = getelementptr i64, i64* %A, i64 %inc 1028 store i64* %tmp, i64** %ptr 1029 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 1030 } 1031 1032 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) 1033 1034 1035 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) { 1036 ;CHECK-LABEL: test_v4f32_post_imm_ld3: 1037 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48 1038 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) 1039 %tmp = getelementptr float, float* %A, i32 12 1040 store float* %tmp, float** %ptr 1041 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 1042 } 1043 1044 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { 1045 ;CHECK-LABEL: test_v4f32_post_reg_ld3: 1046 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1047 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A) 1048 %tmp = getelementptr float, float* %A, i64 %inc 1049 store float* %tmp, float** %ptr 1050 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 1051 } 1052 1053 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*) 1054 1055 1056 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) { 1057 ;CHECK-LABEL: test_v2f32_post_imm_ld3: 1058 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24 1059 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) 1060 %tmp = getelementptr float, float* %A, i32 6 1061 store float* %tmp, float** %ptr 1062 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 1063 } 1064 1065 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) { 1066 ;CHECK-LABEL: test_v2f32_post_reg_ld3: 1067 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1068 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A) 1069 %tmp = getelementptr float, float* %A, i64 %inc 1070 store float* %tmp, float** %ptr 1071 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 1072 } 1073 1074 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*) 1075 1076 1077 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) { 1078 ;CHECK-LABEL: test_v2f64_post_imm_ld3: 1079 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48 1080 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) 1081 %tmp = getelementptr double, double* %A, i32 6 1082 store double* %tmp, double** %ptr 1083 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 1084 } 1085 1086 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { 1087 ;CHECK-LABEL: test_v2f64_post_reg_ld3: 1088 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1089 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A) 1090 %tmp = getelementptr double, double* %A, i64 %inc 1091 store double* %tmp, double** %ptr 1092 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 1093 } 1094 1095 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*) 1096 1097 1098 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) { 1099 ;CHECK-LABEL: test_v1f64_post_imm_ld3: 1100 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 1101 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) 1102 %tmp = getelementptr double, double* %A, i32 3 1103 store double* %tmp, double** %ptr 1104 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 1105 } 1106 1107 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) { 1108 ;CHECK-LABEL: test_v1f64_post_reg_ld3: 1109 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1110 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A) 1111 %tmp = getelementptr double, double* %A, i64 %inc 1112 store double* %tmp, double** %ptr 1113 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 1114 } 1115 1116 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) 1117 1118 1119 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) { 1120 ;CHECK-LABEL: test_v16i8_post_imm_ld4: 1121 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64 1122 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) 1123 %tmp = getelementptr i8, i8* %A, i32 64 1124 store i8* %tmp, i8** %ptr 1125 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 1126 } 1127 1128 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { 1129 ;CHECK-LABEL: test_v16i8_post_reg_ld4: 1130 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1131 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A) 1132 %tmp = getelementptr i8, i8* %A, i64 %inc 1133 store i8* %tmp, i8** %ptr 1134 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 1135 } 1136 1137 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) 1138 1139 1140 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) { 1141 ;CHECK-LABEL: test_v8i8_post_imm_ld4: 1142 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32 1143 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) 1144 %tmp = getelementptr i8, i8* %A, i32 32 1145 store i8* %tmp, i8** %ptr 1146 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 1147 } 1148 1149 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) { 1150 ;CHECK-LABEL: test_v8i8_post_reg_ld4: 1151 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1152 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A) 1153 %tmp = getelementptr i8, i8* %A, i64 %inc 1154 store i8* %tmp, i8** %ptr 1155 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 1156 } 1157 1158 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) 1159 1160 1161 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) { 1162 ;CHECK-LABEL: test_v8i16_post_imm_ld4: 1163 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64 1164 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) 1165 %tmp = getelementptr i16, i16* %A, i32 32 1166 store i16* %tmp, i16** %ptr 1167 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 1168 } 1169 1170 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { 1171 ;CHECK-LABEL: test_v8i16_post_reg_ld4: 1172 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1173 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A) 1174 %tmp = getelementptr i16, i16* %A, i64 %inc 1175 store i16* %tmp, i16** %ptr 1176 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 1177 } 1178 1179 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) 1180 1181 1182 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) { 1183 ;CHECK-LABEL: test_v4i16_post_imm_ld4: 1184 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32 1185 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) 1186 %tmp = getelementptr i16, i16* %A, i32 16 1187 store i16* %tmp, i16** %ptr 1188 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 1189 } 1190 1191 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) { 1192 ;CHECK-LABEL: test_v4i16_post_reg_ld4: 1193 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1194 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A) 1195 %tmp = getelementptr i16, i16* %A, i64 %inc 1196 store i16* %tmp, i16** %ptr 1197 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 1198 } 1199 1200 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) 1201 1202 1203 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) { 1204 ;CHECK-LABEL: test_v4i32_post_imm_ld4: 1205 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 1206 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) 1207 %tmp = getelementptr i32, i32* %A, i32 16 1208 store i32* %tmp, i32** %ptr 1209 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 1210 } 1211 1212 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { 1213 ;CHECK-LABEL: test_v4i32_post_reg_ld4: 1214 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1215 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A) 1216 %tmp = getelementptr i32, i32* %A, i64 %inc 1217 store i32* %tmp, i32** %ptr 1218 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 1219 } 1220 1221 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) 1222 1223 1224 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) { 1225 ;CHECK-LABEL: test_v2i32_post_imm_ld4: 1226 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 1227 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) 1228 %tmp = getelementptr i32, i32* %A, i32 8 1229 store i32* %tmp, i32** %ptr 1230 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 1231 } 1232 1233 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) { 1234 ;CHECK-LABEL: test_v2i32_post_reg_ld4: 1235 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1236 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A) 1237 %tmp = getelementptr i32, i32* %A, i64 %inc 1238 store i32* %tmp, i32** %ptr 1239 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 1240 } 1241 1242 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) 1243 1244 1245 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) { 1246 ;CHECK-LABEL: test_v2i64_post_imm_ld4: 1247 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 1248 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) 1249 %tmp = getelementptr i64, i64* %A, i32 8 1250 store i64* %tmp, i64** %ptr 1251 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 1252 } 1253 1254 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { 1255 ;CHECK-LABEL: test_v2i64_post_reg_ld4: 1256 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1257 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A) 1258 %tmp = getelementptr i64, i64* %A, i64 %inc 1259 store i64* %tmp, i64** %ptr 1260 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 1261 } 1262 1263 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) 1264 1265 1266 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) { 1267 ;CHECK-LABEL: test_v1i64_post_imm_ld4: 1268 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 1269 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) 1270 %tmp = getelementptr i64, i64* %A, i32 4 1271 store i64* %tmp, i64** %ptr 1272 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 1273 } 1274 1275 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) { 1276 ;CHECK-LABEL: test_v1i64_post_reg_ld4: 1277 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1278 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A) 1279 %tmp = getelementptr i64, i64* %A, i64 %inc 1280 store i64* %tmp, i64** %ptr 1281 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 1282 } 1283 1284 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) 1285 1286 1287 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) { 1288 ;CHECK-LABEL: test_v4f32_post_imm_ld4: 1289 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64 1290 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) 1291 %tmp = getelementptr float, float* %A, i32 16 1292 store float* %tmp, float** %ptr 1293 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 1294 } 1295 1296 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { 1297 ;CHECK-LABEL: test_v4f32_post_reg_ld4: 1298 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1299 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A) 1300 %tmp = getelementptr float, float* %A, i64 %inc 1301 store float* %tmp, float** %ptr 1302 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 1303 } 1304 1305 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*) 1306 1307 1308 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) { 1309 ;CHECK-LABEL: test_v2f32_post_imm_ld4: 1310 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32 1311 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) 1312 %tmp = getelementptr float, float* %A, i32 8 1313 store float* %tmp, float** %ptr 1314 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 1315 } 1316 1317 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) { 1318 ;CHECK-LABEL: test_v2f32_post_reg_ld4: 1319 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1320 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A) 1321 %tmp = getelementptr float, float* %A, i64 %inc 1322 store float* %tmp, float** %ptr 1323 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 1324 } 1325 1326 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*) 1327 1328 1329 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) { 1330 ;CHECK-LABEL: test_v2f64_post_imm_ld4: 1331 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64 1332 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) 1333 %tmp = getelementptr double, double* %A, i32 8 1334 store double* %tmp, double** %ptr 1335 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 1336 } 1337 1338 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { 1339 ;CHECK-LABEL: test_v2f64_post_reg_ld4: 1340 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1341 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A) 1342 %tmp = getelementptr double, double* %A, i64 %inc 1343 store double* %tmp, double** %ptr 1344 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 1345 } 1346 1347 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*) 1348 1349 1350 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) { 1351 ;CHECK-LABEL: test_v1f64_post_imm_ld4: 1352 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 1353 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) 1354 %tmp = getelementptr double, double* %A, i32 4 1355 store double* %tmp, double** %ptr 1356 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 1357 } 1358 1359 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) { 1360 ;CHECK-LABEL: test_v1f64_post_reg_ld4: 1361 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1362 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A) 1363 %tmp = getelementptr double, double* %A, i64 %inc 1364 store double* %tmp, double** %ptr 1365 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 1366 } 1367 1368 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) 1369 1370 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) { 1371 ;CHECK-LABEL: test_v16i8_post_imm_ld1x2: 1372 ;CHECK: ld1.16b { v0, v1 }, [x0], #32 1373 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) 1374 %tmp = getelementptr i8, i8* %A, i32 32 1375 store i8* %tmp, i8** %ptr 1376 ret { <16 x i8>, <16 x i8> } %ld1x2 1377 } 1378 1379 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { 1380 ;CHECK-LABEL: test_v16i8_post_reg_ld1x2: 1381 ;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}} 1382 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A) 1383 %tmp = getelementptr i8, i8* %A, i64 %inc 1384 store i8* %tmp, i8** %ptr 1385 ret { <16 x i8>, <16 x i8> } %ld1x2 1386 } 1387 1388 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) 1389 1390 1391 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) { 1392 ;CHECK-LABEL: test_v8i8_post_imm_ld1x2: 1393 ;CHECK: ld1.8b { v0, v1 }, [x0], #16 1394 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) 1395 %tmp = getelementptr i8, i8* %A, i32 16 1396 store i8* %tmp, i8** %ptr 1397 ret { <8 x i8>, <8 x i8> } %ld1x2 1398 } 1399 1400 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) { 1401 ;CHECK-LABEL: test_v8i8_post_reg_ld1x2: 1402 ;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}} 1403 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A) 1404 %tmp = getelementptr i8, i8* %A, i64 %inc 1405 store i8* %tmp, i8** %ptr 1406 ret { <8 x i8>, <8 x i8> } %ld1x2 1407 } 1408 1409 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) 1410 1411 1412 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) { 1413 ;CHECK-LABEL: test_v8i16_post_imm_ld1x2: 1414 ;CHECK: ld1.8h { v0, v1 }, [x0], #32 1415 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) 1416 %tmp = getelementptr i16, i16* %A, i32 16 1417 store i16* %tmp, i16** %ptr 1418 ret { <8 x i16>, <8 x i16> } %ld1x2 1419 } 1420 1421 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { 1422 ;CHECK-LABEL: test_v8i16_post_reg_ld1x2: 1423 ;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}} 1424 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A) 1425 %tmp = getelementptr i16, i16* %A, i64 %inc 1426 store i16* %tmp, i16** %ptr 1427 ret { <8 x i16>, <8 x i16> } %ld1x2 1428 } 1429 1430 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) 1431 1432 1433 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) { 1434 ;CHECK-LABEL: test_v4i16_post_imm_ld1x2: 1435 ;CHECK: ld1.4h { v0, v1 }, [x0], #16 1436 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) 1437 %tmp = getelementptr i16, i16* %A, i32 8 1438 store i16* %tmp, i16** %ptr 1439 ret { <4 x i16>, <4 x i16> } %ld1x2 1440 } 1441 1442 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) { 1443 ;CHECK-LABEL: test_v4i16_post_reg_ld1x2: 1444 ;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}} 1445 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A) 1446 %tmp = getelementptr i16, i16* %A, i64 %inc 1447 store i16* %tmp, i16** %ptr 1448 ret { <4 x i16>, <4 x i16> } %ld1x2 1449 } 1450 1451 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) 1452 1453 1454 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) { 1455 ;CHECK-LABEL: test_v4i32_post_imm_ld1x2: 1456 ;CHECK: ld1.4s { v0, v1 }, [x0], #32 1457 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) 1458 %tmp = getelementptr i32, i32* %A, i32 8 1459 store i32* %tmp, i32** %ptr 1460 ret { <4 x i32>, <4 x i32> } %ld1x2 1461 } 1462 1463 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { 1464 ;CHECK-LABEL: test_v4i32_post_reg_ld1x2: 1465 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} 1466 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A) 1467 %tmp = getelementptr i32, i32* %A, i64 %inc 1468 store i32* %tmp, i32** %ptr 1469 ret { <4 x i32>, <4 x i32> } %ld1x2 1470 } 1471 1472 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) 1473 1474 1475 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) { 1476 ;CHECK-LABEL: test_v2i32_post_imm_ld1x2: 1477 ;CHECK: ld1.2s { v0, v1 }, [x0], #16 1478 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) 1479 %tmp = getelementptr i32, i32* %A, i32 4 1480 store i32* %tmp, i32** %ptr 1481 ret { <2 x i32>, <2 x i32> } %ld1x2 1482 } 1483 1484 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) { 1485 ;CHECK-LABEL: test_v2i32_post_reg_ld1x2: 1486 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} 1487 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A) 1488 %tmp = getelementptr i32, i32* %A, i64 %inc 1489 store i32* %tmp, i32** %ptr 1490 ret { <2 x i32>, <2 x i32> } %ld1x2 1491 } 1492 1493 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) 1494 1495 1496 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) { 1497 ;CHECK-LABEL: test_v2i64_post_imm_ld1x2: 1498 ;CHECK: ld1.2d { v0, v1 }, [x0], #32 1499 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) 1500 %tmp = getelementptr i64, i64* %A, i32 4 1501 store i64* %tmp, i64** %ptr 1502 ret { <2 x i64>, <2 x i64> } %ld1x2 1503 } 1504 1505 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { 1506 ;CHECK-LABEL: test_v2i64_post_reg_ld1x2: 1507 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} 1508 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A) 1509 %tmp = getelementptr i64, i64* %A, i64 %inc 1510 store i64* %tmp, i64** %ptr 1511 ret { <2 x i64>, <2 x i64> } %ld1x2 1512 } 1513 1514 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) 1515 1516 1517 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) { 1518 ;CHECK-LABEL: test_v1i64_post_imm_ld1x2: 1519 ;CHECK: ld1.1d { v0, v1 }, [x0], #16 1520 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) 1521 %tmp = getelementptr i64, i64* %A, i32 2 1522 store i64* %tmp, i64** %ptr 1523 ret { <1 x i64>, <1 x i64> } %ld1x2 1524 } 1525 1526 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) { 1527 ;CHECK-LABEL: test_v1i64_post_reg_ld1x2: 1528 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} 1529 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A) 1530 %tmp = getelementptr i64, i64* %A, i64 %inc 1531 store i64* %tmp, i64** %ptr 1532 ret { <1 x i64>, <1 x i64> } %ld1x2 1533 } 1534 1535 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) 1536 1537 1538 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) { 1539 ;CHECK-LABEL: test_v4f32_post_imm_ld1x2: 1540 ;CHECK: ld1.4s { v0, v1 }, [x0], #32 1541 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) 1542 %tmp = getelementptr float, float* %A, i32 8 1543 store float* %tmp, float** %ptr 1544 ret { <4 x float>, <4 x float> } %ld1x2 1545 } 1546 1547 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { 1548 ;CHECK-LABEL: test_v4f32_post_reg_ld1x2: 1549 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}} 1550 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A) 1551 %tmp = getelementptr float, float* %A, i64 %inc 1552 store float* %tmp, float** %ptr 1553 ret { <4 x float>, <4 x float> } %ld1x2 1554 } 1555 1556 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) 1557 1558 1559 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) { 1560 ;CHECK-LABEL: test_v2f32_post_imm_ld1x2: 1561 ;CHECK: ld1.2s { v0, v1 }, [x0], #16 1562 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) 1563 %tmp = getelementptr float, float* %A, i32 4 1564 store float* %tmp, float** %ptr 1565 ret { <2 x float>, <2 x float> } %ld1x2 1566 } 1567 1568 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) { 1569 ;CHECK-LABEL: test_v2f32_post_reg_ld1x2: 1570 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}} 1571 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A) 1572 %tmp = getelementptr float, float* %A, i64 %inc 1573 store float* %tmp, float** %ptr 1574 ret { <2 x float>, <2 x float> } %ld1x2 1575 } 1576 1577 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) 1578 1579 1580 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) { 1581 ;CHECK-LABEL: test_v2f64_post_imm_ld1x2: 1582 ;CHECK: ld1.2d { v0, v1 }, [x0], #32 1583 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) 1584 %tmp = getelementptr double, double* %A, i32 4 1585 store double* %tmp, double** %ptr 1586 ret { <2 x double>, <2 x double> } %ld1x2 1587 } 1588 1589 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { 1590 ;CHECK-LABEL: test_v2f64_post_reg_ld1x2: 1591 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}} 1592 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A) 1593 %tmp = getelementptr double, double* %A, i64 %inc 1594 store double* %tmp, double** %ptr 1595 ret { <2 x double>, <2 x double> } %ld1x2 1596 } 1597 1598 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) 1599 1600 1601 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) { 1602 ;CHECK-LABEL: test_v1f64_post_imm_ld1x2: 1603 ;CHECK: ld1.1d { v0, v1 }, [x0], #16 1604 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) 1605 %tmp = getelementptr double, double* %A, i32 2 1606 store double* %tmp, double** %ptr 1607 ret { <1 x double>, <1 x double> } %ld1x2 1608 } 1609 1610 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) { 1611 ;CHECK-LABEL: test_v1f64_post_reg_ld1x2: 1612 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}} 1613 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A) 1614 %tmp = getelementptr double, double* %A, i64 %inc 1615 store double* %tmp, double** %ptr 1616 ret { <1 x double>, <1 x double> } %ld1x2 1617 } 1618 1619 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) 1620 1621 1622 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) { 1623 ;CHECK-LABEL: test_v16i8_post_imm_ld1x3: 1624 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48 1625 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) 1626 %tmp = getelementptr i8, i8* %A, i32 48 1627 store i8* %tmp, i8** %ptr 1628 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3 1629 } 1630 1631 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { 1632 ;CHECK-LABEL: test_v16i8_post_reg_ld1x3: 1633 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} 1634 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A) 1635 %tmp = getelementptr i8, i8* %A, i64 %inc 1636 store i8* %tmp, i8** %ptr 1637 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3 1638 } 1639 1640 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) 1641 1642 1643 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) { 1644 ;CHECK-LABEL: test_v8i8_post_imm_ld1x3: 1645 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24 1646 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) 1647 %tmp = getelementptr i8, i8* %A, i32 24 1648 store i8* %tmp, i8** %ptr 1649 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3 1650 } 1651 1652 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) { 1653 ;CHECK-LABEL: test_v8i8_post_reg_ld1x3: 1654 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} 1655 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A) 1656 %tmp = getelementptr i8, i8* %A, i64 %inc 1657 store i8* %tmp, i8** %ptr 1658 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3 1659 } 1660 1661 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) 1662 1663 1664 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) { 1665 ;CHECK-LABEL: test_v8i16_post_imm_ld1x3: 1666 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48 1667 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) 1668 %tmp = getelementptr i16, i16* %A, i32 24 1669 store i16* %tmp, i16** %ptr 1670 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3 1671 } 1672 1673 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { 1674 ;CHECK-LABEL: test_v8i16_post_reg_ld1x3: 1675 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} 1676 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A) 1677 %tmp = getelementptr i16, i16* %A, i64 %inc 1678 store i16* %tmp, i16** %ptr 1679 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3 1680 } 1681 1682 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) 1683 1684 1685 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) { 1686 ;CHECK-LABEL: test_v4i16_post_imm_ld1x3: 1687 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24 1688 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) 1689 %tmp = getelementptr i16, i16* %A, i32 12 1690 store i16* %tmp, i16** %ptr 1691 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3 1692 } 1693 1694 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) { 1695 ;CHECK-LABEL: test_v4i16_post_reg_ld1x3: 1696 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} 1697 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A) 1698 %tmp = getelementptr i16, i16* %A, i64 %inc 1699 store i16* %tmp, i16** %ptr 1700 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3 1701 } 1702 1703 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) 1704 1705 1706 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) { 1707 ;CHECK-LABEL: test_v4i32_post_imm_ld1x3: 1708 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 1709 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) 1710 %tmp = getelementptr i32, i32* %A, i32 12 1711 store i32* %tmp, i32** %ptr 1712 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3 1713 } 1714 1715 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { 1716 ;CHECK-LABEL: test_v4i32_post_reg_ld1x3: 1717 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1718 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A) 1719 %tmp = getelementptr i32, i32* %A, i64 %inc 1720 store i32* %tmp, i32** %ptr 1721 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3 1722 } 1723 1724 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) 1725 1726 1727 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) { 1728 ;CHECK-LABEL: test_v2i32_post_imm_ld1x3: 1729 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 1730 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) 1731 %tmp = getelementptr i32, i32* %A, i32 6 1732 store i32* %tmp, i32** %ptr 1733 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3 1734 } 1735 1736 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) { 1737 ;CHECK-LABEL: test_v2i32_post_reg_ld1x3: 1738 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1739 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A) 1740 %tmp = getelementptr i32, i32* %A, i64 %inc 1741 store i32* %tmp, i32** %ptr 1742 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3 1743 } 1744 1745 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) 1746 1747 1748 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) { 1749 ;CHECK-LABEL: test_v2i64_post_imm_ld1x3: 1750 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 1751 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) 1752 %tmp = getelementptr i64, i64* %A, i32 6 1753 store i64* %tmp, i64** %ptr 1754 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3 1755 } 1756 1757 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { 1758 ;CHECK-LABEL: test_v2i64_post_reg_ld1x3: 1759 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1760 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A) 1761 %tmp = getelementptr i64, i64* %A, i64 %inc 1762 store i64* %tmp, i64** %ptr 1763 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3 1764 } 1765 1766 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) 1767 1768 1769 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) { 1770 ;CHECK-LABEL: test_v1i64_post_imm_ld1x3: 1771 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 1772 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) 1773 %tmp = getelementptr i64, i64* %A, i32 3 1774 store i64* %tmp, i64** %ptr 1775 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3 1776 } 1777 1778 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) { 1779 ;CHECK-LABEL: test_v1i64_post_reg_ld1x3: 1780 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1781 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A) 1782 %tmp = getelementptr i64, i64* %A, i64 %inc 1783 store i64* %tmp, i64** %ptr 1784 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3 1785 } 1786 1787 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) 1788 1789 1790 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) { 1791 ;CHECK-LABEL: test_v4f32_post_imm_ld1x3: 1792 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48 1793 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) 1794 %tmp = getelementptr float, float* %A, i32 12 1795 store float* %tmp, float** %ptr 1796 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3 1797 } 1798 1799 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { 1800 ;CHECK-LABEL: test_v4f32_post_reg_ld1x3: 1801 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1802 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A) 1803 %tmp = getelementptr float, float* %A, i64 %inc 1804 store float* %tmp, float** %ptr 1805 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3 1806 } 1807 1808 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) 1809 1810 1811 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) { 1812 ;CHECK-LABEL: test_v2f32_post_imm_ld1x3: 1813 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24 1814 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) 1815 %tmp = getelementptr float, float* %A, i32 6 1816 store float* %tmp, float** %ptr 1817 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3 1818 } 1819 1820 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) { 1821 ;CHECK-LABEL: test_v2f32_post_reg_ld1x3: 1822 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 1823 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A) 1824 %tmp = getelementptr float, float* %A, i64 %inc 1825 store float* %tmp, float** %ptr 1826 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3 1827 } 1828 1829 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) 1830 1831 1832 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) { 1833 ;CHECK-LABEL: test_v2f64_post_imm_ld1x3: 1834 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48 1835 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) 1836 %tmp = getelementptr double, double* %A, i32 6 1837 store double* %tmp, double** %ptr 1838 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3 1839 } 1840 1841 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { 1842 ;CHECK-LABEL: test_v2f64_post_reg_ld1x3: 1843 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1844 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A) 1845 %tmp = getelementptr double, double* %A, i64 %inc 1846 store double* %tmp, double** %ptr 1847 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3 1848 } 1849 1850 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) 1851 1852 1853 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) { 1854 ;CHECK-LABEL: test_v1f64_post_imm_ld1x3: 1855 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24 1856 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) 1857 %tmp = getelementptr double, double* %A, i32 3 1858 store double* %tmp, double** %ptr 1859 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3 1860 } 1861 1862 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) { 1863 ;CHECK-LABEL: test_v1f64_post_reg_ld1x3: 1864 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 1865 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A) 1866 %tmp = getelementptr double, double* %A, i64 %inc 1867 store double* %tmp, double** %ptr 1868 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3 1869 } 1870 1871 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) 1872 1873 1874 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) { 1875 ;CHECK-LABEL: test_v16i8_post_imm_ld1x4: 1876 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64 1877 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) 1878 %tmp = getelementptr i8, i8* %A, i32 64 1879 store i8* %tmp, i8** %ptr 1880 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4 1881 } 1882 1883 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { 1884 ;CHECK-LABEL: test_v16i8_post_reg_ld1x4: 1885 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1886 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A) 1887 %tmp = getelementptr i8, i8* %A, i64 %inc 1888 store i8* %tmp, i8** %ptr 1889 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4 1890 } 1891 1892 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) 1893 1894 1895 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) { 1896 ;CHECK-LABEL: test_v8i8_post_imm_ld1x4: 1897 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32 1898 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) 1899 %tmp = getelementptr i8, i8* %A, i32 32 1900 store i8* %tmp, i8** %ptr 1901 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4 1902 } 1903 1904 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) { 1905 ;CHECK-LABEL: test_v8i8_post_reg_ld1x4: 1906 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1907 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A) 1908 %tmp = getelementptr i8, i8* %A, i64 %inc 1909 store i8* %tmp, i8** %ptr 1910 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4 1911 } 1912 1913 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) 1914 1915 1916 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) { 1917 ;CHECK-LABEL: test_v8i16_post_imm_ld1x4: 1918 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64 1919 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) 1920 %tmp = getelementptr i16, i16* %A, i32 32 1921 store i16* %tmp, i16** %ptr 1922 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4 1923 } 1924 1925 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { 1926 ;CHECK-LABEL: test_v8i16_post_reg_ld1x4: 1927 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1928 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A) 1929 %tmp = getelementptr i16, i16* %A, i64 %inc 1930 store i16* %tmp, i16** %ptr 1931 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4 1932 } 1933 1934 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) 1935 1936 1937 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) { 1938 ;CHECK-LABEL: test_v4i16_post_imm_ld1x4: 1939 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32 1940 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) 1941 %tmp = getelementptr i16, i16* %A, i32 16 1942 store i16* %tmp, i16** %ptr 1943 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4 1944 } 1945 1946 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) { 1947 ;CHECK-LABEL: test_v4i16_post_reg_ld1x4: 1948 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1949 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A) 1950 %tmp = getelementptr i16, i16* %A, i64 %inc 1951 store i16* %tmp, i16** %ptr 1952 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4 1953 } 1954 1955 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) 1956 1957 1958 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) { 1959 ;CHECK-LABEL: test_v4i32_post_imm_ld1x4: 1960 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 1961 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) 1962 %tmp = getelementptr i32, i32* %A, i32 16 1963 store i32* %tmp, i32** %ptr 1964 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4 1965 } 1966 1967 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { 1968 ;CHECK-LABEL: test_v4i32_post_reg_ld1x4: 1969 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1970 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A) 1971 %tmp = getelementptr i32, i32* %A, i64 %inc 1972 store i32* %tmp, i32** %ptr 1973 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4 1974 } 1975 1976 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) 1977 1978 1979 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) { 1980 ;CHECK-LABEL: test_v2i32_post_imm_ld1x4: 1981 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 1982 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) 1983 %tmp = getelementptr i32, i32* %A, i32 8 1984 store i32* %tmp, i32** %ptr 1985 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4 1986 } 1987 1988 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) { 1989 ;CHECK-LABEL: test_v2i32_post_reg_ld1x4: 1990 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 1991 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A) 1992 %tmp = getelementptr i32, i32* %A, i64 %inc 1993 store i32* %tmp, i32** %ptr 1994 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4 1995 } 1996 1997 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) 1998 1999 2000 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) { 2001 ;CHECK-LABEL: test_v2i64_post_imm_ld1x4: 2002 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 2003 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) 2004 %tmp = getelementptr i64, i64* %A, i32 8 2005 store i64* %tmp, i64** %ptr 2006 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4 2007 } 2008 2009 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { 2010 ;CHECK-LABEL: test_v2i64_post_reg_ld1x4: 2011 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2012 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A) 2013 %tmp = getelementptr i64, i64* %A, i64 %inc 2014 store i64* %tmp, i64** %ptr 2015 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4 2016 } 2017 2018 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) 2019 2020 2021 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) { 2022 ;CHECK-LABEL: test_v1i64_post_imm_ld1x4: 2023 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 2024 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) 2025 %tmp = getelementptr i64, i64* %A, i32 4 2026 store i64* %tmp, i64** %ptr 2027 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4 2028 } 2029 2030 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) { 2031 ;CHECK-LABEL: test_v1i64_post_reg_ld1x4: 2032 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2033 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A) 2034 %tmp = getelementptr i64, i64* %A, i64 %inc 2035 store i64* %tmp, i64** %ptr 2036 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4 2037 } 2038 2039 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) 2040 2041 2042 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) { 2043 ;CHECK-LABEL: test_v4f32_post_imm_ld1x4: 2044 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64 2045 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) 2046 %tmp = getelementptr float, float* %A, i32 16 2047 store float* %tmp, float** %ptr 2048 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4 2049 } 2050 2051 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { 2052 ;CHECK-LABEL: test_v4f32_post_reg_ld1x4: 2053 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2054 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A) 2055 %tmp = getelementptr float, float* %A, i64 %inc 2056 store float* %tmp, float** %ptr 2057 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4 2058 } 2059 2060 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) 2061 2062 2063 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) { 2064 ;CHECK-LABEL: test_v2f32_post_imm_ld1x4: 2065 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32 2066 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) 2067 %tmp = getelementptr float, float* %A, i32 8 2068 store float* %tmp, float** %ptr 2069 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4 2070 } 2071 2072 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) { 2073 ;CHECK-LABEL: test_v2f32_post_reg_ld1x4: 2074 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2075 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A) 2076 %tmp = getelementptr float, float* %A, i64 %inc 2077 store float* %tmp, float** %ptr 2078 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4 2079 } 2080 2081 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) 2082 2083 2084 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) { 2085 ;CHECK-LABEL: test_v2f64_post_imm_ld1x4: 2086 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64 2087 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) 2088 %tmp = getelementptr double, double* %A, i32 8 2089 store double* %tmp, double** %ptr 2090 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4 2091 } 2092 2093 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { 2094 ;CHECK-LABEL: test_v2f64_post_reg_ld1x4: 2095 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2096 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A) 2097 %tmp = getelementptr double, double* %A, i64 %inc 2098 store double* %tmp, double** %ptr 2099 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4 2100 } 2101 2102 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) 2103 2104 2105 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) { 2106 ;CHECK-LABEL: test_v1f64_post_imm_ld1x4: 2107 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32 2108 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) 2109 %tmp = getelementptr double, double* %A, i32 4 2110 store double* %tmp, double** %ptr 2111 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4 2112 } 2113 2114 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) { 2115 ;CHECK-LABEL: test_v1f64_post_reg_ld1x4: 2116 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2117 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A) 2118 %tmp = getelementptr double, double* %A, i64 %inc 2119 store double* %tmp, double** %ptr 2120 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4 2121 } 2122 2123 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) 2124 2125 2126 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { 2127 ;CHECK-LABEL: test_v16i8_post_imm_ld2r: 2128 ;CHECK: ld2r.16b { v0, v1 }, [x0], #2 2129 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) 2130 %tmp = getelementptr i8, i8* %A, i32 2 2131 store i8* %tmp, i8** %ptr 2132 ret { <16 x i8>, <16 x i8> } %ld2 2133 } 2134 2135 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2136 ;CHECK-LABEL: test_v16i8_post_reg_ld2r: 2137 ;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}} 2138 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A) 2139 %tmp = getelementptr i8, i8* %A, i64 %inc 2140 store i8* %tmp, i8** %ptr 2141 ret { <16 x i8>, <16 x i8> } %ld2 2142 } 2143 2144 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly 2145 2146 2147 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind { 2148 ;CHECK-LABEL: test_v8i8_post_imm_ld2r: 2149 ;CHECK: ld2r.8b { v0, v1 }, [x0], #2 2150 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) 2151 %tmp = getelementptr i8, i8* %A, i32 2 2152 store i8* %tmp, i8** %ptr 2153 ret { <8 x i8>, <8 x i8> } %ld2 2154 } 2155 2156 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2157 ;CHECK-LABEL: test_v8i8_post_reg_ld2r: 2158 ;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}} 2159 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A) 2160 %tmp = getelementptr i8, i8* %A, i64 %inc 2161 store i8* %tmp, i8** %ptr 2162 ret { <8 x i8>, <8 x i8> } %ld2 2163 } 2164 2165 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly 2166 2167 2168 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { 2169 ;CHECK-LABEL: test_v8i16_post_imm_ld2r: 2170 ;CHECK: ld2r.8h { v0, v1 }, [x0], #4 2171 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) 2172 %tmp = getelementptr i16, i16* %A, i32 2 2173 store i16* %tmp, i16** %ptr 2174 ret { <8 x i16>, <8 x i16> } %ld2 2175 } 2176 2177 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2178 ;CHECK-LABEL: test_v8i16_post_reg_ld2r: 2179 ;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}} 2180 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A) 2181 %tmp = getelementptr i16, i16* %A, i64 %inc 2182 store i16* %tmp, i16** %ptr 2183 ret { <8 x i16>, <8 x i16> } %ld2 2184 } 2185 2186 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly 2187 2188 2189 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind { 2190 ;CHECK-LABEL: test_v4i16_post_imm_ld2r: 2191 ;CHECK: ld2r.4h { v0, v1 }, [x0], #4 2192 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) 2193 %tmp = getelementptr i16, i16* %A, i32 2 2194 store i16* %tmp, i16** %ptr 2195 ret { <4 x i16>, <4 x i16> } %ld2 2196 } 2197 2198 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2199 ;CHECK-LABEL: test_v4i16_post_reg_ld2r: 2200 ;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}} 2201 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A) 2202 %tmp = getelementptr i16, i16* %A, i64 %inc 2203 store i16* %tmp, i16** %ptr 2204 ret { <4 x i16>, <4 x i16> } %ld2 2205 } 2206 2207 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly 2208 2209 2210 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { 2211 ;CHECK-LABEL: test_v4i32_post_imm_ld2r: 2212 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8 2213 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) 2214 %tmp = getelementptr i32, i32* %A, i32 2 2215 store i32* %tmp, i32** %ptr 2216 ret { <4 x i32>, <4 x i32> } %ld2 2217 } 2218 2219 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2220 ;CHECK-LABEL: test_v4i32_post_reg_ld2r: 2221 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} 2222 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A) 2223 %tmp = getelementptr i32, i32* %A, i64 %inc 2224 store i32* %tmp, i32** %ptr 2225 ret { <4 x i32>, <4 x i32> } %ld2 2226 } 2227 2228 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly 2229 2230 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind { 2231 ;CHECK-LABEL: test_v2i32_post_imm_ld2r: 2232 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8 2233 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) 2234 %tmp = getelementptr i32, i32* %A, i32 2 2235 store i32* %tmp, i32** %ptr 2236 ret { <2 x i32>, <2 x i32> } %ld2 2237 } 2238 2239 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2240 ;CHECK-LABEL: test_v2i32_post_reg_ld2r: 2241 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} 2242 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A) 2243 %tmp = getelementptr i32, i32* %A, i64 %inc 2244 store i32* %tmp, i32** %ptr 2245 ret { <2 x i32>, <2 x i32> } %ld2 2246 } 2247 2248 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly 2249 2250 2251 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { 2252 ;CHECK-LABEL: test_v2i64_post_imm_ld2r: 2253 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16 2254 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) 2255 %tmp = getelementptr i64, i64* %A, i32 2 2256 store i64* %tmp, i64** %ptr 2257 ret { <2 x i64>, <2 x i64> } %ld2 2258 } 2259 2260 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2261 ;CHECK-LABEL: test_v2i64_post_reg_ld2r: 2262 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} 2263 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A) 2264 %tmp = getelementptr i64, i64* %A, i64 %inc 2265 store i64* %tmp, i64** %ptr 2266 ret { <2 x i64>, <2 x i64> } %ld2 2267 } 2268 2269 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly 2270 2271 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind { 2272 ;CHECK-LABEL: test_v1i64_post_imm_ld2r: 2273 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16 2274 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) 2275 %tmp = getelementptr i64, i64* %A, i32 2 2276 store i64* %tmp, i64** %ptr 2277 ret { <1 x i64>, <1 x i64> } %ld2 2278 } 2279 2280 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2281 ;CHECK-LABEL: test_v1i64_post_reg_ld2r: 2282 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} 2283 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A) 2284 %tmp = getelementptr i64, i64* %A, i64 %inc 2285 store i64* %tmp, i64** %ptr 2286 ret { <1 x i64>, <1 x i64> } %ld2 2287 } 2288 2289 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly 2290 2291 2292 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { 2293 ;CHECK-LABEL: test_v4f32_post_imm_ld2r: 2294 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8 2295 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) 2296 %tmp = getelementptr float, float* %A, i32 2 2297 store float* %tmp, float** %ptr 2298 ret { <4 x float>, <4 x float> } %ld2 2299 } 2300 2301 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { 2302 ;CHECK-LABEL: test_v4f32_post_reg_ld2r: 2303 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}} 2304 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A) 2305 %tmp = getelementptr float, float* %A, i64 %inc 2306 store float* %tmp, float** %ptr 2307 ret { <4 x float>, <4 x float> } %ld2 2308 } 2309 2310 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly 2311 2312 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind { 2313 ;CHECK-LABEL: test_v2f32_post_imm_ld2r: 2314 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8 2315 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) 2316 %tmp = getelementptr float, float* %A, i32 2 2317 store float* %tmp, float** %ptr 2318 ret { <2 x float>, <2 x float> } %ld2 2319 } 2320 2321 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind { 2322 ;CHECK-LABEL: test_v2f32_post_reg_ld2r: 2323 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}} 2324 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A) 2325 %tmp = getelementptr float, float* %A, i64 %inc 2326 store float* %tmp, float** %ptr 2327 ret { <2 x float>, <2 x float> } %ld2 2328 } 2329 2330 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly 2331 2332 2333 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { 2334 ;CHECK-LABEL: test_v2f64_post_imm_ld2r: 2335 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16 2336 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) 2337 %tmp = getelementptr double, double* %A, i32 2 2338 store double* %tmp, double** %ptr 2339 ret { <2 x double>, <2 x double> } %ld2 2340 } 2341 2342 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { 2343 ;CHECK-LABEL: test_v2f64_post_reg_ld2r: 2344 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}} 2345 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A) 2346 %tmp = getelementptr double, double* %A, i64 %inc 2347 store double* %tmp, double** %ptr 2348 ret { <2 x double>, <2 x double> } %ld2 2349 } 2350 2351 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly 2352 2353 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind { 2354 ;CHECK-LABEL: test_v1f64_post_imm_ld2r: 2355 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16 2356 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) 2357 %tmp = getelementptr double, double* %A, i32 2 2358 store double* %tmp, double** %ptr 2359 ret { <1 x double>, <1 x double> } %ld2 2360 } 2361 2362 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind { 2363 ;CHECK-LABEL: test_v1f64_post_reg_ld2r: 2364 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}} 2365 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A) 2366 %tmp = getelementptr double, double* %A, i64 %inc 2367 store double* %tmp, double** %ptr 2368 ret { <1 x double>, <1 x double> } %ld2 2369 } 2370 2371 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly 2372 2373 2374 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { 2375 ;CHECK-LABEL: test_v16i8_post_imm_ld3r: 2376 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3 2377 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) 2378 %tmp = getelementptr i8, i8* %A, i32 3 2379 store i8* %tmp, i8** %ptr 2380 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 2381 } 2382 2383 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2384 ;CHECK-LABEL: test_v16i8_post_reg_ld3r: 2385 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} 2386 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A) 2387 %tmp = getelementptr i8, i8* %A, i64 %inc 2388 store i8* %tmp, i8** %ptr 2389 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 2390 } 2391 2392 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly 2393 2394 2395 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind { 2396 ;CHECK-LABEL: test_v8i8_post_imm_ld3r: 2397 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3 2398 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) 2399 %tmp = getelementptr i8, i8* %A, i32 3 2400 store i8* %tmp, i8** %ptr 2401 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 2402 } 2403 2404 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2405 ;CHECK-LABEL: test_v8i8_post_reg_ld3r: 2406 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} 2407 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A) 2408 %tmp = getelementptr i8, i8* %A, i64 %inc 2409 store i8* %tmp, i8** %ptr 2410 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 2411 } 2412 2413 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly 2414 2415 2416 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { 2417 ;CHECK-LABEL: test_v8i16_post_imm_ld3r: 2418 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6 2419 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) 2420 %tmp = getelementptr i16, i16* %A, i32 3 2421 store i16* %tmp, i16** %ptr 2422 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 2423 } 2424 2425 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2426 ;CHECK-LABEL: test_v8i16_post_reg_ld3r: 2427 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} 2428 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A) 2429 %tmp = getelementptr i16, i16* %A, i64 %inc 2430 store i16* %tmp, i16** %ptr 2431 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 2432 } 2433 2434 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly 2435 2436 2437 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind { 2438 ;CHECK-LABEL: test_v4i16_post_imm_ld3r: 2439 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6 2440 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) 2441 %tmp = getelementptr i16, i16* %A, i32 3 2442 store i16* %tmp, i16** %ptr 2443 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 2444 } 2445 2446 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2447 ;CHECK-LABEL: test_v4i16_post_reg_ld3r: 2448 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} 2449 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A) 2450 %tmp = getelementptr i16, i16* %A, i64 %inc 2451 store i16* %tmp, i16** %ptr 2452 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 2453 } 2454 2455 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly 2456 2457 2458 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { 2459 ;CHECK-LABEL: test_v4i32_post_imm_ld3r: 2460 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 2461 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) 2462 %tmp = getelementptr i32, i32* %A, i32 3 2463 store i32* %tmp, i32** %ptr 2464 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 2465 } 2466 2467 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2468 ;CHECK-LABEL: test_v4i32_post_reg_ld3r: 2469 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 2470 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A) 2471 %tmp = getelementptr i32, i32* %A, i64 %inc 2472 store i32* %tmp, i32** %ptr 2473 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 2474 } 2475 2476 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly 2477 2478 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind { 2479 ;CHECK-LABEL: test_v2i32_post_imm_ld3r: 2480 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 2481 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) 2482 %tmp = getelementptr i32, i32* %A, i32 3 2483 store i32* %tmp, i32** %ptr 2484 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 2485 } 2486 2487 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2488 ;CHECK-LABEL: test_v2i32_post_reg_ld3r: 2489 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 2490 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A) 2491 %tmp = getelementptr i32, i32* %A, i64 %inc 2492 store i32* %tmp, i32** %ptr 2493 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 2494 } 2495 2496 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly 2497 2498 2499 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { 2500 ;CHECK-LABEL: test_v2i64_post_imm_ld3r: 2501 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 2502 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) 2503 %tmp = getelementptr i64, i64* %A, i32 3 2504 store i64* %tmp, i64** %ptr 2505 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 2506 } 2507 2508 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2509 ;CHECK-LABEL: test_v2i64_post_reg_ld3r: 2510 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 2511 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A) 2512 %tmp = getelementptr i64, i64* %A, i64 %inc 2513 store i64* %tmp, i64** %ptr 2514 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 2515 } 2516 2517 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly 2518 2519 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind { 2520 ;CHECK-LABEL: test_v1i64_post_imm_ld3r: 2521 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 2522 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) 2523 %tmp = getelementptr i64, i64* %A, i32 3 2524 store i64* %tmp, i64** %ptr 2525 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 2526 } 2527 2528 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2529 ;CHECK-LABEL: test_v1i64_post_reg_ld3r: 2530 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 2531 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A) 2532 %tmp = getelementptr i64, i64* %A, i64 %inc 2533 store i64* %tmp, i64** %ptr 2534 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 2535 } 2536 2537 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly 2538 2539 2540 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { 2541 ;CHECK-LABEL: test_v4f32_post_imm_ld3r: 2542 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12 2543 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) 2544 %tmp = getelementptr float, float* %A, i32 3 2545 store float* %tmp, float** %ptr 2546 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 2547 } 2548 2549 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { 2550 ;CHECK-LABEL: test_v4f32_post_reg_ld3r: 2551 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 2552 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A) 2553 %tmp = getelementptr float, float* %A, i64 %inc 2554 store float* %tmp, float** %ptr 2555 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 2556 } 2557 2558 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly 2559 2560 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind { 2561 ;CHECK-LABEL: test_v2f32_post_imm_ld3r: 2562 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12 2563 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) 2564 %tmp = getelementptr float, float* %A, i32 3 2565 store float* %tmp, float** %ptr 2566 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 2567 } 2568 2569 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind { 2570 ;CHECK-LABEL: test_v2f32_post_reg_ld3r: 2571 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 2572 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A) 2573 %tmp = getelementptr float, float* %A, i64 %inc 2574 store float* %tmp, float** %ptr 2575 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 2576 } 2577 2578 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly 2579 2580 2581 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { 2582 ;CHECK-LABEL: test_v2f64_post_imm_ld3r: 2583 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24 2584 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) 2585 %tmp = getelementptr double, double* %A, i32 3 2586 store double* %tmp, double** %ptr 2587 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 2588 } 2589 2590 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { 2591 ;CHECK-LABEL: test_v2f64_post_reg_ld3r: 2592 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 2593 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A) 2594 %tmp = getelementptr double, double* %A, i64 %inc 2595 store double* %tmp, double** %ptr 2596 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 2597 } 2598 2599 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly 2600 2601 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind { 2602 ;CHECK-LABEL: test_v1f64_post_imm_ld3r: 2603 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24 2604 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) 2605 %tmp = getelementptr double, double* %A, i32 3 2606 store double* %tmp, double** %ptr 2607 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 2608 } 2609 2610 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind { 2611 ;CHECK-LABEL: test_v1f64_post_reg_ld3r: 2612 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 2613 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A) 2614 %tmp = getelementptr double, double* %A, i64 %inc 2615 store double* %tmp, double** %ptr 2616 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 2617 } 2618 2619 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly 2620 2621 2622 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { 2623 ;CHECK-LABEL: test_v16i8_post_imm_ld4r: 2624 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4 2625 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) 2626 %tmp = getelementptr i8, i8* %A, i32 4 2627 store i8* %tmp, i8** %ptr 2628 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 2629 } 2630 2631 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2632 ;CHECK-LABEL: test_v16i8_post_reg_ld4r: 2633 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2634 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A) 2635 %tmp = getelementptr i8, i8* %A, i64 %inc 2636 store i8* %tmp, i8** %ptr 2637 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 2638 } 2639 2640 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly 2641 2642 2643 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind { 2644 ;CHECK-LABEL: test_v8i8_post_imm_ld4r: 2645 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4 2646 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) 2647 %tmp = getelementptr i8, i8* %A, i32 4 2648 store i8* %tmp, i8** %ptr 2649 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 2650 } 2651 2652 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind { 2653 ;CHECK-LABEL: test_v8i8_post_reg_ld4r: 2654 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2655 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A) 2656 %tmp = getelementptr i8, i8* %A, i64 %inc 2657 store i8* %tmp, i8** %ptr 2658 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 2659 } 2660 2661 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly 2662 2663 2664 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { 2665 ;CHECK-LABEL: test_v8i16_post_imm_ld4r: 2666 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8 2667 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) 2668 %tmp = getelementptr i16, i16* %A, i32 4 2669 store i16* %tmp, i16** %ptr 2670 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 2671 } 2672 2673 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2674 ;CHECK-LABEL: test_v8i16_post_reg_ld4r: 2675 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2676 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A) 2677 %tmp = getelementptr i16, i16* %A, i64 %inc 2678 store i16* %tmp, i16** %ptr 2679 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 2680 } 2681 2682 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly 2683 2684 2685 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind { 2686 ;CHECK-LABEL: test_v4i16_post_imm_ld4r: 2687 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8 2688 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) 2689 %tmp = getelementptr i16, i16* %A, i32 4 2690 store i16* %tmp, i16** %ptr 2691 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 2692 } 2693 2694 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind { 2695 ;CHECK-LABEL: test_v4i16_post_reg_ld4r: 2696 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2697 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A) 2698 %tmp = getelementptr i16, i16* %A, i64 %inc 2699 store i16* %tmp, i16** %ptr 2700 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 2701 } 2702 2703 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly 2704 2705 2706 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { 2707 ;CHECK-LABEL: test_v4i32_post_imm_ld4r: 2708 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 2709 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) 2710 %tmp = getelementptr i32, i32* %A, i32 4 2711 store i32* %tmp, i32** %ptr 2712 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 2713 } 2714 2715 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2716 ;CHECK-LABEL: test_v4i32_post_reg_ld4r: 2717 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2718 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A) 2719 %tmp = getelementptr i32, i32* %A, i64 %inc 2720 store i32* %tmp, i32** %ptr 2721 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 2722 } 2723 2724 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly 2725 2726 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind { 2727 ;CHECK-LABEL: test_v2i32_post_imm_ld4r: 2728 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 2729 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) 2730 %tmp = getelementptr i32, i32* %A, i32 4 2731 store i32* %tmp, i32** %ptr 2732 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 2733 } 2734 2735 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind { 2736 ;CHECK-LABEL: test_v2i32_post_reg_ld4r: 2737 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2738 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A) 2739 %tmp = getelementptr i32, i32* %A, i64 %inc 2740 store i32* %tmp, i32** %ptr 2741 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 2742 } 2743 2744 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly 2745 2746 2747 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { 2748 ;CHECK-LABEL: test_v2i64_post_imm_ld4r: 2749 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 2750 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) 2751 %tmp = getelementptr i64, i64* %A, i32 4 2752 store i64* %tmp, i64** %ptr 2753 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 2754 } 2755 2756 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2757 ;CHECK-LABEL: test_v2i64_post_reg_ld4r: 2758 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2759 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A) 2760 %tmp = getelementptr i64, i64* %A, i64 %inc 2761 store i64* %tmp, i64** %ptr 2762 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 2763 } 2764 2765 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly 2766 2767 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind { 2768 ;CHECK-LABEL: test_v1i64_post_imm_ld4r: 2769 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 2770 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) 2771 %tmp = getelementptr i64, i64* %A, i32 4 2772 store i64* %tmp, i64** %ptr 2773 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 2774 } 2775 2776 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind { 2777 ;CHECK-LABEL: test_v1i64_post_reg_ld4r: 2778 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2779 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A) 2780 %tmp = getelementptr i64, i64* %A, i64 %inc 2781 store i64* %tmp, i64** %ptr 2782 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 2783 } 2784 2785 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly 2786 2787 2788 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { 2789 ;CHECK-LABEL: test_v4f32_post_imm_ld4r: 2790 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16 2791 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) 2792 %tmp = getelementptr float, float* %A, i32 4 2793 store float* %tmp, float** %ptr 2794 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 2795 } 2796 2797 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { 2798 ;CHECK-LABEL: test_v4f32_post_reg_ld4r: 2799 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2800 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A) 2801 %tmp = getelementptr float, float* %A, i64 %inc 2802 store float* %tmp, float** %ptr 2803 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 2804 } 2805 2806 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly 2807 2808 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind { 2809 ;CHECK-LABEL: test_v2f32_post_imm_ld4r: 2810 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16 2811 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) 2812 %tmp = getelementptr float, float* %A, i32 4 2813 store float* %tmp, float** %ptr 2814 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 2815 } 2816 2817 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind { 2818 ;CHECK-LABEL: test_v2f32_post_reg_ld4r: 2819 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2820 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A) 2821 %tmp = getelementptr float, float* %A, i64 %inc 2822 store float* %tmp, float** %ptr 2823 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 2824 } 2825 2826 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly 2827 2828 2829 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { 2830 ;CHECK-LABEL: test_v2f64_post_imm_ld4r: 2831 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32 2832 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) 2833 %tmp = getelementptr double, double* %A, i32 4 2834 store double* %tmp, double** %ptr 2835 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 2836 } 2837 2838 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { 2839 ;CHECK-LABEL: test_v2f64_post_reg_ld4r: 2840 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2841 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A) 2842 %tmp = getelementptr double, double* %A, i64 %inc 2843 store double* %tmp, double** %ptr 2844 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 2845 } 2846 2847 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly 2848 2849 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind { 2850 ;CHECK-LABEL: test_v1f64_post_imm_ld4r: 2851 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32 2852 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) 2853 %tmp = getelementptr double, double* %A, i32 4 2854 store double* %tmp, double** %ptr 2855 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 2856 } 2857 2858 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind { 2859 ;CHECK-LABEL: test_v1f64_post_reg_ld4r: 2860 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 2861 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A) 2862 %tmp = getelementptr double, double* %A, i64 %inc 2863 store double* %tmp, double** %ptr 2864 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 2865 } 2866 2867 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly 2868 2869 2870 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { 2871 ;CHECK-LABEL: test_v16i8_post_imm_ld2lane: 2872 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2 2873 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) 2874 %tmp = getelementptr i8, i8* %A, i32 2 2875 store i8* %tmp, i8** %ptr 2876 ret { <16 x i8>, <16 x i8> } %ld2 2877 } 2878 2879 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { 2880 ;CHECK-LABEL: test_v16i8_post_reg_ld2lane: 2881 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} 2882 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) 2883 %tmp = getelementptr i8, i8* %A, i64 %inc 2884 store i8* %tmp, i8** %ptr 2885 ret { <16 x i8>, <16 x i8> } %ld2 2886 } 2887 2888 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 2889 2890 2891 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { 2892 ;CHECK-LABEL: test_v8i8_post_imm_ld2lane: 2893 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2 2894 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) 2895 %tmp = getelementptr i8, i8* %A, i32 2 2896 store i8* %tmp, i8** %ptr 2897 ret { <8 x i8>, <8 x i8> } %ld2 2898 } 2899 2900 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { 2901 ;CHECK-LABEL: test_v8i8_post_reg_ld2lane: 2902 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}} 2903 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) 2904 %tmp = getelementptr i8, i8* %A, i64 %inc 2905 store i8* %tmp, i8** %ptr 2906 ret { <8 x i8>, <8 x i8> } %ld2 2907 } 2908 2909 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly 2910 2911 2912 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { 2913 ;CHECK-LABEL: test_v8i16_post_imm_ld2lane: 2914 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4 2915 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) 2916 %tmp = getelementptr i16, i16* %A, i32 2 2917 store i16* %tmp, i16** %ptr 2918 ret { <8 x i16>, <8 x i16> } %ld2 2919 } 2920 2921 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { 2922 ;CHECK-LABEL: test_v8i16_post_reg_ld2lane: 2923 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} 2924 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) 2925 %tmp = getelementptr i16, i16* %A, i64 %inc 2926 store i16* %tmp, i16** %ptr 2927 ret { <8 x i16>, <8 x i16> } %ld2 2928 } 2929 2930 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 2931 2932 2933 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { 2934 ;CHECK-LABEL: test_v4i16_post_imm_ld2lane: 2935 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4 2936 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) 2937 %tmp = getelementptr i16, i16* %A, i32 2 2938 store i16* %tmp, i16** %ptr 2939 ret { <4 x i16>, <4 x i16> } %ld2 2940 } 2941 2942 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { 2943 ;CHECK-LABEL: test_v4i16_post_reg_ld2lane: 2944 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}} 2945 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) 2946 %tmp = getelementptr i16, i16* %A, i64 %inc 2947 store i16* %tmp, i16** %ptr 2948 ret { <4 x i16>, <4 x i16> } %ld2 2949 } 2950 2951 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly 2952 2953 2954 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { 2955 ;CHECK-LABEL: test_v4i32_post_imm_ld2lane: 2956 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 2957 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) 2958 %tmp = getelementptr i32, i32* %A, i32 2 2959 store i32* %tmp, i32** %ptr 2960 ret { <4 x i32>, <4 x i32> } %ld2 2961 } 2962 2963 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { 2964 ;CHECK-LABEL: test_v4i32_post_reg_ld2lane: 2965 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 2966 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) 2967 %tmp = getelementptr i32, i32* %A, i64 %inc 2968 store i32* %tmp, i32** %ptr 2969 ret { <4 x i32>, <4 x i32> } %ld2 2970 } 2971 2972 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 2973 2974 2975 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { 2976 ;CHECK-LABEL: test_v2i32_post_imm_ld2lane: 2977 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 2978 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) 2979 %tmp = getelementptr i32, i32* %A, i32 2 2980 store i32* %tmp, i32** %ptr 2981 ret { <2 x i32>, <2 x i32> } %ld2 2982 } 2983 2984 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { 2985 ;CHECK-LABEL: test_v2i32_post_reg_ld2lane: 2986 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 2987 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) 2988 %tmp = getelementptr i32, i32* %A, i64 %inc 2989 store i32* %tmp, i32** %ptr 2990 ret { <2 x i32>, <2 x i32> } %ld2 2991 } 2992 2993 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly 2994 2995 2996 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { 2997 ;CHECK-LABEL: test_v2i64_post_imm_ld2lane: 2998 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 2999 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) 3000 %tmp = getelementptr i64, i64* %A, i32 2 3001 store i64* %tmp, i64** %ptr 3002 ret { <2 x i64>, <2 x i64> } %ld2 3003 } 3004 3005 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { 3006 ;CHECK-LABEL: test_v2i64_post_reg_ld2lane: 3007 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 3008 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) 3009 %tmp = getelementptr i64, i64* %A, i64 %inc 3010 store i64* %tmp, i64** %ptr 3011 ret { <2 x i64>, <2 x i64> } %ld2 3012 } 3013 3014 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 3015 3016 3017 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { 3018 ;CHECK-LABEL: test_v1i64_post_imm_ld2lane: 3019 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 3020 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) 3021 %tmp = getelementptr i64, i64* %A, i32 2 3022 store i64* %tmp, i64** %ptr 3023 ret { <1 x i64>, <1 x i64> } %ld2 3024 } 3025 3026 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { 3027 ;CHECK-LABEL: test_v1i64_post_reg_ld2lane: 3028 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 3029 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) 3030 %tmp = getelementptr i64, i64* %A, i64 %inc 3031 store i64* %tmp, i64** %ptr 3032 ret { <1 x i64>, <1 x i64> } %ld2 3033 } 3034 3035 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly 3036 3037 3038 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { 3039 ;CHECK-LABEL: test_v4f32_post_imm_ld2lane: 3040 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 3041 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) 3042 %tmp = getelementptr float, float* %A, i32 2 3043 store float* %tmp, float** %ptr 3044 ret { <4 x float>, <4 x float> } %ld2 3045 } 3046 3047 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { 3048 ;CHECK-LABEL: test_v4f32_post_reg_ld2lane: 3049 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 3050 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) 3051 %tmp = getelementptr float, float* %A, i64 %inc 3052 store float* %tmp, float** %ptr 3053 ret { <4 x float>, <4 x float> } %ld2 3054 } 3055 3056 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly 3057 3058 3059 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { 3060 ;CHECK-LABEL: test_v2f32_post_imm_ld2lane: 3061 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8 3062 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) 3063 %tmp = getelementptr float, float* %A, i32 2 3064 store float* %tmp, float** %ptr 3065 ret { <2 x float>, <2 x float> } %ld2 3066 } 3067 3068 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { 3069 ;CHECK-LABEL: test_v2f32_post_reg_ld2lane: 3070 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 3071 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) 3072 %tmp = getelementptr float, float* %A, i64 %inc 3073 store float* %tmp, float** %ptr 3074 ret { <2 x float>, <2 x float> } %ld2 3075 } 3076 3077 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly 3078 3079 3080 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { 3081 ;CHECK-LABEL: test_v2f64_post_imm_ld2lane: 3082 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 3083 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) 3084 %tmp = getelementptr double, double* %A, i32 2 3085 store double* %tmp, double** %ptr 3086 ret { <2 x double>, <2 x double> } %ld2 3087 } 3088 3089 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { 3090 ;CHECK-LABEL: test_v2f64_post_reg_ld2lane: 3091 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 3092 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) 3093 %tmp = getelementptr double, double* %A, i64 %inc 3094 store double* %tmp, double** %ptr 3095 ret { <2 x double>, <2 x double> } %ld2 3096 } 3097 3098 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly 3099 3100 3101 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { 3102 ;CHECK-LABEL: test_v1f64_post_imm_ld2lane: 3103 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16 3104 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) 3105 %tmp = getelementptr double, double* %A, i32 2 3106 store double* %tmp, double** %ptr 3107 ret { <1 x double>, <1 x double> } %ld2 3108 } 3109 3110 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { 3111 ;CHECK-LABEL: test_v1f64_post_reg_ld2lane: 3112 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 3113 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) 3114 %tmp = getelementptr double, double* %A, i64 %inc 3115 store double* %tmp, double** %ptr 3116 ret { <1 x double>, <1 x double> } %ld2 3117 } 3118 3119 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly 3120 3121 3122 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { 3123 ;CHECK-LABEL: test_v16i8_post_imm_ld3lane: 3124 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 3125 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) 3126 %tmp = getelementptr i8, i8* %A, i32 3 3127 store i8* %tmp, i8** %ptr 3128 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 3129 } 3130 3131 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { 3132 ;CHECK-LABEL: test_v16i8_post_reg_ld3lane: 3133 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3134 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) 3135 %tmp = getelementptr i8, i8* %A, i64 %inc 3136 store i8* %tmp, i8** %ptr 3137 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3 3138 } 3139 3140 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 3141 3142 3143 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { 3144 ;CHECK-LABEL: test_v8i8_post_imm_ld3lane: 3145 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3 3146 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) 3147 %tmp = getelementptr i8, i8* %A, i32 3 3148 store i8* %tmp, i8** %ptr 3149 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 3150 } 3151 3152 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { 3153 ;CHECK-LABEL: test_v8i8_post_reg_ld3lane: 3154 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3155 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) 3156 %tmp = getelementptr i8, i8* %A, i64 %inc 3157 store i8* %tmp, i8** %ptr 3158 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3 3159 } 3160 3161 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly 3162 3163 3164 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { 3165 ;CHECK-LABEL: test_v8i16_post_imm_ld3lane: 3166 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 3167 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) 3168 %tmp = getelementptr i16, i16* %A, i32 3 3169 store i16* %tmp, i16** %ptr 3170 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 3171 } 3172 3173 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { 3174 ;CHECK-LABEL: test_v8i16_post_reg_ld3lane: 3175 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3176 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) 3177 %tmp = getelementptr i16, i16* %A, i64 %inc 3178 store i16* %tmp, i16** %ptr 3179 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3 3180 } 3181 3182 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 3183 3184 3185 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { 3186 ;CHECK-LABEL: test_v4i16_post_imm_ld3lane: 3187 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6 3188 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) 3189 %tmp = getelementptr i16, i16* %A, i32 3 3190 store i16* %tmp, i16** %ptr 3191 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 3192 } 3193 3194 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { 3195 ;CHECK-LABEL: test_v4i16_post_reg_ld3lane: 3196 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3197 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) 3198 %tmp = getelementptr i16, i16* %A, i64 %inc 3199 store i16* %tmp, i16** %ptr 3200 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3 3201 } 3202 3203 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly 3204 3205 3206 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { 3207 ;CHECK-LABEL: test_v4i32_post_imm_ld3lane: 3208 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 3209 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) 3210 %tmp = getelementptr i32, i32* %A, i32 3 3211 store i32* %tmp, i32** %ptr 3212 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 3213 } 3214 3215 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { 3216 ;CHECK-LABEL: test_v4i32_post_reg_ld3lane: 3217 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3218 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) 3219 %tmp = getelementptr i32, i32* %A, i64 %inc 3220 store i32* %tmp, i32** %ptr 3221 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3 3222 } 3223 3224 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 3225 3226 3227 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { 3228 ;CHECK-LABEL: test_v2i32_post_imm_ld3lane: 3229 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 3230 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) 3231 %tmp = getelementptr i32, i32* %A, i32 3 3232 store i32* %tmp, i32** %ptr 3233 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 3234 } 3235 3236 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { 3237 ;CHECK-LABEL: test_v2i32_post_reg_ld3lane: 3238 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3239 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) 3240 %tmp = getelementptr i32, i32* %A, i64 %inc 3241 store i32* %tmp, i32** %ptr 3242 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3 3243 } 3244 3245 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly 3246 3247 3248 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { 3249 ;CHECK-LABEL: test_v2i64_post_imm_ld3lane: 3250 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 3251 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) 3252 %tmp = getelementptr i64, i64* %A, i32 3 3253 store i64* %tmp, i64** %ptr 3254 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 3255 } 3256 3257 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { 3258 ;CHECK-LABEL: test_v2i64_post_reg_ld3lane: 3259 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3260 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) 3261 %tmp = getelementptr i64, i64* %A, i64 %inc 3262 store i64* %tmp, i64** %ptr 3263 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3 3264 } 3265 3266 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 3267 3268 3269 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { 3270 ;CHECK-LABEL: test_v1i64_post_imm_ld3lane: 3271 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 3272 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) 3273 %tmp = getelementptr i64, i64* %A, i32 3 3274 store i64* %tmp, i64** %ptr 3275 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 3276 } 3277 3278 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { 3279 ;CHECK-LABEL: test_v1i64_post_reg_ld3lane: 3280 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3281 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) 3282 %tmp = getelementptr i64, i64* %A, i64 %inc 3283 store i64* %tmp, i64** %ptr 3284 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3 3285 } 3286 3287 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly 3288 3289 3290 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { 3291 ;CHECK-LABEL: test_v4f32_post_imm_ld3lane: 3292 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 3293 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) 3294 %tmp = getelementptr float, float* %A, i32 3 3295 store float* %tmp, float** %ptr 3296 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 3297 } 3298 3299 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { 3300 ;CHECK-LABEL: test_v4f32_post_reg_ld3lane: 3301 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3302 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) 3303 %tmp = getelementptr float, float* %A, i64 %inc 3304 store float* %tmp, float** %ptr 3305 ret { <4 x float>, <4 x float>, <4 x float> } %ld3 3306 } 3307 3308 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly 3309 3310 3311 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { 3312 ;CHECK-LABEL: test_v2f32_post_imm_ld3lane: 3313 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12 3314 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) 3315 %tmp = getelementptr float, float* %A, i32 3 3316 store float* %tmp, float** %ptr 3317 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 3318 } 3319 3320 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { 3321 ;CHECK-LABEL: test_v2f32_post_reg_ld3lane: 3322 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3323 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) 3324 %tmp = getelementptr float, float* %A, i64 %inc 3325 store float* %tmp, float** %ptr 3326 ret { <2 x float>, <2 x float>, <2 x float> } %ld3 3327 } 3328 3329 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly 3330 3331 3332 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { 3333 ;CHECK-LABEL: test_v2f64_post_imm_ld3lane: 3334 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 3335 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) 3336 %tmp = getelementptr double, double* %A, i32 3 3337 store double* %tmp, double** %ptr 3338 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 3339 } 3340 3341 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { 3342 ;CHECK-LABEL: test_v2f64_post_reg_ld3lane: 3343 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3344 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) 3345 %tmp = getelementptr double, double* %A, i64 %inc 3346 store double* %tmp, double** %ptr 3347 ret { <2 x double>, <2 x double>, <2 x double> } %ld3 3348 } 3349 3350 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly 3351 3352 3353 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { 3354 ;CHECK-LABEL: test_v1f64_post_imm_ld3lane: 3355 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24 3356 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) 3357 %tmp = getelementptr double, double* %A, i32 3 3358 store double* %tmp, double** %ptr 3359 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 3360 } 3361 3362 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { 3363 ;CHECK-LABEL: test_v1f64_post_reg_ld3lane: 3364 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 3365 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) 3366 %tmp = getelementptr double, double* %A, i64 %inc 3367 store double* %tmp, double** %ptr 3368 ret { <1 x double>, <1 x double>, <1 x double> } %ld3 3369 } 3370 3371 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly 3372 3373 3374 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { 3375 ;CHECK-LABEL: test_v16i8_post_imm_ld4lane: 3376 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 3377 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) 3378 %tmp = getelementptr i8, i8* %A, i32 4 3379 store i8* %tmp, i8** %ptr 3380 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 3381 } 3382 3383 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { 3384 ;CHECK-LABEL: test_v16i8_post_reg_ld4lane: 3385 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3386 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) 3387 %tmp = getelementptr i8, i8* %A, i64 %inc 3388 store i8* %tmp, i8** %ptr 3389 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4 3390 } 3391 3392 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly 3393 3394 3395 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { 3396 ;CHECK-LABEL: test_v8i8_post_imm_ld4lane: 3397 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 3398 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) 3399 %tmp = getelementptr i8, i8* %A, i32 4 3400 store i8* %tmp, i8** %ptr 3401 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 3402 } 3403 3404 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { 3405 ;CHECK-LABEL: test_v8i8_post_reg_ld4lane: 3406 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3407 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) 3408 %tmp = getelementptr i8, i8* %A, i64 %inc 3409 store i8* %tmp, i8** %ptr 3410 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4 3411 } 3412 3413 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly 3414 3415 3416 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { 3417 ;CHECK-LABEL: test_v8i16_post_imm_ld4lane: 3418 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 3419 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) 3420 %tmp = getelementptr i16, i16* %A, i32 4 3421 store i16* %tmp, i16** %ptr 3422 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 3423 } 3424 3425 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { 3426 ;CHECK-LABEL: test_v8i16_post_reg_ld4lane: 3427 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3428 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) 3429 %tmp = getelementptr i16, i16* %A, i64 %inc 3430 store i16* %tmp, i16** %ptr 3431 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4 3432 } 3433 3434 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly 3435 3436 3437 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { 3438 ;CHECK-LABEL: test_v4i16_post_imm_ld4lane: 3439 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 3440 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) 3441 %tmp = getelementptr i16, i16* %A, i32 4 3442 store i16* %tmp, i16** %ptr 3443 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 3444 } 3445 3446 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { 3447 ;CHECK-LABEL: test_v4i16_post_reg_ld4lane: 3448 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3449 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) 3450 %tmp = getelementptr i16, i16* %A, i64 %inc 3451 store i16* %tmp, i16** %ptr 3452 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4 3453 } 3454 3455 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly 3456 3457 3458 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { 3459 ;CHECK-LABEL: test_v4i32_post_imm_ld4lane: 3460 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 3461 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) 3462 %tmp = getelementptr i32, i32* %A, i32 4 3463 store i32* %tmp, i32** %ptr 3464 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 3465 } 3466 3467 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { 3468 ;CHECK-LABEL: test_v4i32_post_reg_ld4lane: 3469 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3470 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) 3471 %tmp = getelementptr i32, i32* %A, i64 %inc 3472 store i32* %tmp, i32** %ptr 3473 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4 3474 } 3475 3476 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly 3477 3478 3479 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { 3480 ;CHECK-LABEL: test_v2i32_post_imm_ld4lane: 3481 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 3482 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) 3483 %tmp = getelementptr i32, i32* %A, i32 4 3484 store i32* %tmp, i32** %ptr 3485 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 3486 } 3487 3488 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { 3489 ;CHECK-LABEL: test_v2i32_post_reg_ld4lane: 3490 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3491 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) 3492 %tmp = getelementptr i32, i32* %A, i64 %inc 3493 store i32* %tmp, i32** %ptr 3494 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4 3495 } 3496 3497 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly 3498 3499 3500 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { 3501 ;CHECK-LABEL: test_v2i64_post_imm_ld4lane: 3502 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 3503 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) 3504 %tmp = getelementptr i64, i64* %A, i32 4 3505 store i64* %tmp, i64** %ptr 3506 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 3507 } 3508 3509 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { 3510 ;CHECK-LABEL: test_v2i64_post_reg_ld4lane: 3511 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3512 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) 3513 %tmp = getelementptr i64, i64* %A, i64 %inc 3514 store i64* %tmp, i64** %ptr 3515 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4 3516 } 3517 3518 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly 3519 3520 3521 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { 3522 ;CHECK-LABEL: test_v1i64_post_imm_ld4lane: 3523 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 3524 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) 3525 %tmp = getelementptr i64, i64* %A, i32 4 3526 store i64* %tmp, i64** %ptr 3527 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 3528 } 3529 3530 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { 3531 ;CHECK-LABEL: test_v1i64_post_reg_ld4lane: 3532 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3533 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) 3534 %tmp = getelementptr i64, i64* %A, i64 %inc 3535 store i64* %tmp, i64** %ptr 3536 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4 3537 } 3538 3539 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly 3540 3541 3542 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { 3543 ;CHECK-LABEL: test_v4f32_post_imm_ld4lane: 3544 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 3545 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) 3546 %tmp = getelementptr float, float* %A, i32 4 3547 store float* %tmp, float** %ptr 3548 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 3549 } 3550 3551 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { 3552 ;CHECK-LABEL: test_v4f32_post_reg_ld4lane: 3553 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3554 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) 3555 %tmp = getelementptr float, float* %A, i64 %inc 3556 store float* %tmp, float** %ptr 3557 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4 3558 } 3559 3560 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly 3561 3562 3563 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { 3564 ;CHECK-LABEL: test_v2f32_post_imm_ld4lane: 3565 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 3566 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) 3567 %tmp = getelementptr float, float* %A, i32 4 3568 store float* %tmp, float** %ptr 3569 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 3570 } 3571 3572 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { 3573 ;CHECK-LABEL: test_v2f32_post_reg_ld4lane: 3574 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3575 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) 3576 %tmp = getelementptr float, float* %A, i64 %inc 3577 store float* %tmp, float** %ptr 3578 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4 3579 } 3580 3581 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly 3582 3583 3584 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { 3585 ;CHECK-LABEL: test_v2f64_post_imm_ld4lane: 3586 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 3587 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) 3588 %tmp = getelementptr double, double* %A, i32 4 3589 store double* %tmp, double** %ptr 3590 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 3591 } 3592 3593 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { 3594 ;CHECK-LABEL: test_v2f64_post_reg_ld4lane: 3595 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3596 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) 3597 %tmp = getelementptr double, double* %A, i64 %inc 3598 store double* %tmp, double** %ptr 3599 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4 3600 } 3601 3602 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly 3603 3604 3605 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { 3606 ;CHECK-LABEL: test_v1f64_post_imm_ld4lane: 3607 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 3608 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) 3609 %tmp = getelementptr double, double* %A, i32 4 3610 store double* %tmp, double** %ptr 3611 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 3612 } 3613 3614 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { 3615 ;CHECK-LABEL: test_v1f64_post_reg_ld4lane: 3616 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 3617 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) 3618 %tmp = getelementptr double, double* %A, i64 %inc 3619 store double* %tmp, double** %ptr 3620 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4 3621 } 3622 3623 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly 3624 3625 3626 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { 3627 ;CHECK-LABEL: test_v16i8_post_imm_st2: 3628 ;CHECK: st2.16b { v0, v1 }, [x0], #32 3629 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) 3630 %tmp = getelementptr i8, i8* %A, i32 32 3631 ret i8* %tmp 3632 } 3633 3634 define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { 3635 ;CHECK-LABEL: test_v16i8_post_reg_st2: 3636 ;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}} 3637 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) 3638 %tmp = getelementptr i8, i8* %A, i64 %inc 3639 ret i8* %tmp 3640 } 3641 3642 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) 3643 3644 3645 define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { 3646 ;CHECK-LABEL: test_v8i8_post_imm_st2: 3647 ;CHECK: st2.8b { v0, v1 }, [x0], #16 3648 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) 3649 %tmp = getelementptr i8, i8* %A, i32 16 3650 ret i8* %tmp 3651 } 3652 3653 define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { 3654 ;CHECK-LABEL: test_v8i8_post_reg_st2: 3655 ;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}} 3656 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) 3657 %tmp = getelementptr i8, i8* %A, i64 %inc 3658 ret i8* %tmp 3659 } 3660 3661 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) 3662 3663 3664 define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { 3665 ;CHECK-LABEL: test_v8i16_post_imm_st2: 3666 ;CHECK: st2.8h { v0, v1 }, [x0], #32 3667 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) 3668 %tmp = getelementptr i16, i16* %A, i32 16 3669 ret i16* %tmp 3670 } 3671 3672 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { 3673 ;CHECK-LABEL: test_v8i16_post_reg_st2: 3674 ;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}} 3675 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) 3676 %tmp = getelementptr i16, i16* %A, i64 %inc 3677 ret i16* %tmp 3678 } 3679 3680 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) 3681 3682 3683 define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { 3684 ;CHECK-LABEL: test_v4i16_post_imm_st2: 3685 ;CHECK: st2.4h { v0, v1 }, [x0], #16 3686 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) 3687 %tmp = getelementptr i16, i16* %A, i32 8 3688 ret i16* %tmp 3689 } 3690 3691 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { 3692 ;CHECK-LABEL: test_v4i16_post_reg_st2: 3693 ;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}} 3694 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) 3695 %tmp = getelementptr i16, i16* %A, i64 %inc 3696 ret i16* %tmp 3697 } 3698 3699 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) 3700 3701 3702 define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { 3703 ;CHECK-LABEL: test_v4i32_post_imm_st2: 3704 ;CHECK: st2.4s { v0, v1 }, [x0], #32 3705 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) 3706 %tmp = getelementptr i32, i32* %A, i32 8 3707 ret i32* %tmp 3708 } 3709 3710 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { 3711 ;CHECK-LABEL: test_v4i32_post_reg_st2: 3712 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} 3713 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) 3714 %tmp = getelementptr i32, i32* %A, i64 %inc 3715 ret i32* %tmp 3716 } 3717 3718 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) 3719 3720 3721 define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { 3722 ;CHECK-LABEL: test_v2i32_post_imm_st2: 3723 ;CHECK: st2.2s { v0, v1 }, [x0], #16 3724 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) 3725 %tmp = getelementptr i32, i32* %A, i32 4 3726 ret i32* %tmp 3727 } 3728 3729 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { 3730 ;CHECK-LABEL: test_v2i32_post_reg_st2: 3731 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} 3732 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) 3733 %tmp = getelementptr i32, i32* %A, i64 %inc 3734 ret i32* %tmp 3735 } 3736 3737 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) 3738 3739 3740 define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { 3741 ;CHECK-LABEL: test_v2i64_post_imm_st2: 3742 ;CHECK: st2.2d { v0, v1 }, [x0], #32 3743 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) 3744 %tmp = getelementptr i64, i64* %A, i64 4 3745 ret i64* %tmp 3746 } 3747 3748 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { 3749 ;CHECK-LABEL: test_v2i64_post_reg_st2: 3750 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} 3751 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) 3752 %tmp = getelementptr i64, i64* %A, i64 %inc 3753 ret i64* %tmp 3754 } 3755 3756 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) 3757 3758 3759 define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { 3760 ;CHECK-LABEL: test_v1i64_post_imm_st2: 3761 ;CHECK: st1.1d { v0, v1 }, [x0], #16 3762 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) 3763 %tmp = getelementptr i64, i64* %A, i64 2 3764 ret i64* %tmp 3765 } 3766 3767 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { 3768 ;CHECK-LABEL: test_v1i64_post_reg_st2: 3769 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} 3770 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) 3771 %tmp = getelementptr i64, i64* %A, i64 %inc 3772 ret i64* %tmp 3773 } 3774 3775 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) 3776 3777 3778 define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { 3779 ;CHECK-LABEL: test_v4f32_post_imm_st2: 3780 ;CHECK: st2.4s { v0, v1 }, [x0], #32 3781 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) 3782 %tmp = getelementptr float, float* %A, i32 8 3783 ret float* %tmp 3784 } 3785 3786 define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { 3787 ;CHECK-LABEL: test_v4f32_post_reg_st2: 3788 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}} 3789 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) 3790 %tmp = getelementptr float, float* %A, i64 %inc 3791 ret float* %tmp 3792 } 3793 3794 declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*) 3795 3796 3797 define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { 3798 ;CHECK-LABEL: test_v2f32_post_imm_st2: 3799 ;CHECK: st2.2s { v0, v1 }, [x0], #16 3800 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) 3801 %tmp = getelementptr float, float* %A, i32 4 3802 ret float* %tmp 3803 } 3804 3805 define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { 3806 ;CHECK-LABEL: test_v2f32_post_reg_st2: 3807 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}} 3808 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) 3809 %tmp = getelementptr float, float* %A, i64 %inc 3810 ret float* %tmp 3811 } 3812 3813 declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*) 3814 3815 3816 define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { 3817 ;CHECK-LABEL: test_v2f64_post_imm_st2: 3818 ;CHECK: st2.2d { v0, v1 }, [x0], #32 3819 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) 3820 %tmp = getelementptr double, double* %A, i64 4 3821 ret double* %tmp 3822 } 3823 3824 define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { 3825 ;CHECK-LABEL: test_v2f64_post_reg_st2: 3826 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}} 3827 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) 3828 %tmp = getelementptr double, double* %A, i64 %inc 3829 ret double* %tmp 3830 } 3831 3832 declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*) 3833 3834 3835 define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { 3836 ;CHECK-LABEL: test_v1f64_post_imm_st2: 3837 ;CHECK: st1.1d { v0, v1 }, [x0], #16 3838 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) 3839 %tmp = getelementptr double, double* %A, i64 2 3840 ret double* %tmp 3841 } 3842 3843 define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { 3844 ;CHECK-LABEL: test_v1f64_post_reg_st2: 3845 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} 3846 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) 3847 %tmp = getelementptr double, double* %A, i64 %inc 3848 ret double* %tmp 3849 } 3850 3851 declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*) 3852 3853 3854 define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { 3855 ;CHECK-LABEL: test_v16i8_post_imm_st3: 3856 ;CHECK: st3.16b { v0, v1, v2 }, [x0], #48 3857 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) 3858 %tmp = getelementptr i8, i8* %A, i32 48 3859 ret i8* %tmp 3860 } 3861 3862 define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { 3863 ;CHECK-LABEL: test_v16i8_post_reg_st3: 3864 ;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} 3865 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) 3866 %tmp = getelementptr i8, i8* %A, i64 %inc 3867 ret i8* %tmp 3868 } 3869 3870 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) 3871 3872 3873 define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { 3874 ;CHECK-LABEL: test_v8i8_post_imm_st3: 3875 ;CHECK: st3.8b { v0, v1, v2 }, [x0], #24 3876 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) 3877 %tmp = getelementptr i8, i8* %A, i32 24 3878 ret i8* %tmp 3879 } 3880 3881 define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { 3882 ;CHECK-LABEL: test_v8i8_post_reg_st3: 3883 ;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} 3884 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) 3885 %tmp = getelementptr i8, i8* %A, i64 %inc 3886 ret i8* %tmp 3887 } 3888 3889 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) 3890 3891 3892 define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { 3893 ;CHECK-LABEL: test_v8i16_post_imm_st3: 3894 ;CHECK: st3.8h { v0, v1, v2 }, [x0], #48 3895 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) 3896 %tmp = getelementptr i16, i16* %A, i32 24 3897 ret i16* %tmp 3898 } 3899 3900 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { 3901 ;CHECK-LABEL: test_v8i16_post_reg_st3: 3902 ;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} 3903 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) 3904 %tmp = getelementptr i16, i16* %A, i64 %inc 3905 ret i16* %tmp 3906 } 3907 3908 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) 3909 3910 3911 define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { 3912 ;CHECK-LABEL: test_v4i16_post_imm_st3: 3913 ;CHECK: st3.4h { v0, v1, v2 }, [x0], #24 3914 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) 3915 %tmp = getelementptr i16, i16* %A, i32 12 3916 ret i16* %tmp 3917 } 3918 3919 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { 3920 ;CHECK-LABEL: test_v4i16_post_reg_st3: 3921 ;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} 3922 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) 3923 %tmp = getelementptr i16, i16* %A, i64 %inc 3924 ret i16* %tmp 3925 } 3926 3927 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) 3928 3929 3930 define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { 3931 ;CHECK-LABEL: test_v4i32_post_imm_st3: 3932 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 3933 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) 3934 %tmp = getelementptr i32, i32* %A, i32 12 3935 ret i32* %tmp 3936 } 3937 3938 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { 3939 ;CHECK-LABEL: test_v4i32_post_reg_st3: 3940 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 3941 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) 3942 %tmp = getelementptr i32, i32* %A, i64 %inc 3943 ret i32* %tmp 3944 } 3945 3946 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) 3947 3948 3949 define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { 3950 ;CHECK-LABEL: test_v2i32_post_imm_st3: 3951 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 3952 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) 3953 %tmp = getelementptr i32, i32* %A, i32 6 3954 ret i32* %tmp 3955 } 3956 3957 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { 3958 ;CHECK-LABEL: test_v2i32_post_reg_st3: 3959 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 3960 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) 3961 %tmp = getelementptr i32, i32* %A, i64 %inc 3962 ret i32* %tmp 3963 } 3964 3965 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) 3966 3967 3968 define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { 3969 ;CHECK-LABEL: test_v2i64_post_imm_st3: 3970 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 3971 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) 3972 %tmp = getelementptr i64, i64* %A, i64 6 3973 ret i64* %tmp 3974 } 3975 3976 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { 3977 ;CHECK-LABEL: test_v2i64_post_reg_st3: 3978 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 3979 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) 3980 %tmp = getelementptr i64, i64* %A, i64 %inc 3981 ret i64* %tmp 3982 } 3983 3984 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) 3985 3986 3987 define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { 3988 ;CHECK-LABEL: test_v1i64_post_imm_st3: 3989 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 3990 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) 3991 %tmp = getelementptr i64, i64* %A, i64 3 3992 ret i64* %tmp 3993 } 3994 3995 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { 3996 ;CHECK-LABEL: test_v1i64_post_reg_st3: 3997 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 3998 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) 3999 %tmp = getelementptr i64, i64* %A, i64 %inc 4000 ret i64* %tmp 4001 } 4002 4003 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) 4004 4005 4006 define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { 4007 ;CHECK-LABEL: test_v4f32_post_imm_st3: 4008 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48 4009 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) 4010 %tmp = getelementptr float, float* %A, i32 12 4011 ret float* %tmp 4012 } 4013 4014 define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { 4015 ;CHECK-LABEL: test_v4f32_post_reg_st3: 4016 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4017 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) 4018 %tmp = getelementptr float, float* %A, i64 %inc 4019 ret float* %tmp 4020 } 4021 4022 declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) 4023 4024 4025 define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { 4026 ;CHECK-LABEL: test_v2f32_post_imm_st3: 4027 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24 4028 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) 4029 %tmp = getelementptr float, float* %A, i32 6 4030 ret float* %tmp 4031 } 4032 4033 define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { 4034 ;CHECK-LABEL: test_v2f32_post_reg_st3: 4035 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4036 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) 4037 %tmp = getelementptr float, float* %A, i64 %inc 4038 ret float* %tmp 4039 } 4040 4041 declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) 4042 4043 4044 define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { 4045 ;CHECK-LABEL: test_v2f64_post_imm_st3: 4046 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48 4047 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) 4048 %tmp = getelementptr double, double* %A, i64 6 4049 ret double* %tmp 4050 } 4051 4052 define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { 4053 ;CHECK-LABEL: test_v2f64_post_reg_st3: 4054 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4055 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) 4056 %tmp = getelementptr double, double* %A, i64 %inc 4057 ret double* %tmp 4058 } 4059 4060 declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) 4061 4062 4063 define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { 4064 ;CHECK-LABEL: test_v1f64_post_imm_st3: 4065 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 4066 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) 4067 %tmp = getelementptr double, double* %A, i64 3 4068 ret double* %tmp 4069 } 4070 4071 define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { 4072 ;CHECK-LABEL: test_v1f64_post_reg_st3: 4073 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4074 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) 4075 %tmp = getelementptr double, double* %A, i64 %inc 4076 ret double* %tmp 4077 } 4078 4079 declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) 4080 4081 4082 define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { 4083 ;CHECK-LABEL: test_v16i8_post_imm_st4: 4084 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64 4085 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) 4086 %tmp = getelementptr i8, i8* %A, i32 64 4087 ret i8* %tmp 4088 } 4089 4090 define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { 4091 ;CHECK-LABEL: test_v16i8_post_reg_st4: 4092 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4093 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) 4094 %tmp = getelementptr i8, i8* %A, i64 %inc 4095 ret i8* %tmp 4096 } 4097 4098 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) 4099 4100 4101 define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { 4102 ;CHECK-LABEL: test_v8i8_post_imm_st4: 4103 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32 4104 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) 4105 %tmp = getelementptr i8, i8* %A, i32 32 4106 ret i8* %tmp 4107 } 4108 4109 define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { 4110 ;CHECK-LABEL: test_v8i8_post_reg_st4: 4111 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4112 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) 4113 %tmp = getelementptr i8, i8* %A, i64 %inc 4114 ret i8* %tmp 4115 } 4116 4117 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) 4118 4119 4120 define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { 4121 ;CHECK-LABEL: test_v8i16_post_imm_st4: 4122 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64 4123 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) 4124 %tmp = getelementptr i16, i16* %A, i32 32 4125 ret i16* %tmp 4126 } 4127 4128 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { 4129 ;CHECK-LABEL: test_v8i16_post_reg_st4: 4130 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4131 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) 4132 %tmp = getelementptr i16, i16* %A, i64 %inc 4133 ret i16* %tmp 4134 } 4135 4136 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) 4137 4138 4139 define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { 4140 ;CHECK-LABEL: test_v4i16_post_imm_st4: 4141 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32 4142 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) 4143 %tmp = getelementptr i16, i16* %A, i32 16 4144 ret i16* %tmp 4145 } 4146 4147 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { 4148 ;CHECK-LABEL: test_v4i16_post_reg_st4: 4149 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4150 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) 4151 %tmp = getelementptr i16, i16* %A, i64 %inc 4152 ret i16* %tmp 4153 } 4154 4155 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) 4156 4157 4158 define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { 4159 ;CHECK-LABEL: test_v4i32_post_imm_st4: 4160 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 4161 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) 4162 %tmp = getelementptr i32, i32* %A, i32 16 4163 ret i32* %tmp 4164 } 4165 4166 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { 4167 ;CHECK-LABEL: test_v4i32_post_reg_st4: 4168 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4169 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) 4170 %tmp = getelementptr i32, i32* %A, i64 %inc 4171 ret i32* %tmp 4172 } 4173 4174 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) 4175 4176 4177 define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { 4178 ;CHECK-LABEL: test_v2i32_post_imm_st4: 4179 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 4180 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) 4181 %tmp = getelementptr i32, i32* %A, i32 8 4182 ret i32* %tmp 4183 } 4184 4185 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { 4186 ;CHECK-LABEL: test_v2i32_post_reg_st4: 4187 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4188 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) 4189 %tmp = getelementptr i32, i32* %A, i64 %inc 4190 ret i32* %tmp 4191 } 4192 4193 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) 4194 4195 4196 define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { 4197 ;CHECK-LABEL: test_v2i64_post_imm_st4: 4198 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 4199 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) 4200 %tmp = getelementptr i64, i64* %A, i64 8 4201 ret i64* %tmp 4202 } 4203 4204 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { 4205 ;CHECK-LABEL: test_v2i64_post_reg_st4: 4206 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4207 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) 4208 %tmp = getelementptr i64, i64* %A, i64 %inc 4209 ret i64* %tmp 4210 } 4211 4212 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) 4213 4214 4215 define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { 4216 ;CHECK-LABEL: test_v1i64_post_imm_st4: 4217 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 4218 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) 4219 %tmp = getelementptr i64, i64* %A, i64 4 4220 ret i64* %tmp 4221 } 4222 4223 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { 4224 ;CHECK-LABEL: test_v1i64_post_reg_st4: 4225 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4226 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) 4227 %tmp = getelementptr i64, i64* %A, i64 %inc 4228 ret i64* %tmp 4229 } 4230 4231 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) 4232 4233 4234 define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { 4235 ;CHECK-LABEL: test_v4f32_post_imm_st4: 4236 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64 4237 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) 4238 %tmp = getelementptr float, float* %A, i32 16 4239 ret float* %tmp 4240 } 4241 4242 define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { 4243 ;CHECK-LABEL: test_v4f32_post_reg_st4: 4244 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4245 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) 4246 %tmp = getelementptr float, float* %A, i64 %inc 4247 ret float* %tmp 4248 } 4249 4250 declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) 4251 4252 4253 define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { 4254 ;CHECK-LABEL: test_v2f32_post_imm_st4: 4255 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32 4256 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) 4257 %tmp = getelementptr float, float* %A, i32 8 4258 ret float* %tmp 4259 } 4260 4261 define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { 4262 ;CHECK-LABEL: test_v2f32_post_reg_st4: 4263 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4264 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) 4265 %tmp = getelementptr float, float* %A, i64 %inc 4266 ret float* %tmp 4267 } 4268 4269 declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) 4270 4271 4272 define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { 4273 ;CHECK-LABEL: test_v2f64_post_imm_st4: 4274 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64 4275 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) 4276 %tmp = getelementptr double, double* %A, i64 8 4277 ret double* %tmp 4278 } 4279 4280 define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { 4281 ;CHECK-LABEL: test_v2f64_post_reg_st4: 4282 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4283 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) 4284 %tmp = getelementptr double, double* %A, i64 %inc 4285 ret double* %tmp 4286 } 4287 4288 declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) 4289 4290 4291 define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { 4292 ;CHECK-LABEL: test_v1f64_post_imm_st4: 4293 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 4294 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) 4295 %tmp = getelementptr double, double* %A, i64 4 4296 ret double* %tmp 4297 } 4298 4299 define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { 4300 ;CHECK-LABEL: test_v1f64_post_reg_st4: 4301 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4302 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) 4303 %tmp = getelementptr double, double* %A, i64 %inc 4304 ret double* %tmp 4305 } 4306 4307 declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) 4308 4309 4310 define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { 4311 ;CHECK-LABEL: test_v16i8_post_imm_st1x2: 4312 ;CHECK: st1.16b { v0, v1 }, [x0], #32 4313 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) 4314 %tmp = getelementptr i8, i8* %A, i32 32 4315 ret i8* %tmp 4316 } 4317 4318 define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { 4319 ;CHECK-LABEL: test_v16i8_post_reg_st1x2: 4320 ;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}} 4321 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A) 4322 %tmp = getelementptr i8, i8* %A, i64 %inc 4323 ret i8* %tmp 4324 } 4325 4326 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) 4327 4328 4329 define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { 4330 ;CHECK-LABEL: test_v8i8_post_imm_st1x2: 4331 ;CHECK: st1.8b { v0, v1 }, [x0], #16 4332 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) 4333 %tmp = getelementptr i8, i8* %A, i32 16 4334 ret i8* %tmp 4335 } 4336 4337 define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { 4338 ;CHECK-LABEL: test_v8i8_post_reg_st1x2: 4339 ;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}} 4340 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A) 4341 %tmp = getelementptr i8, i8* %A, i64 %inc 4342 ret i8* %tmp 4343 } 4344 4345 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) 4346 4347 4348 define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { 4349 ;CHECK-LABEL: test_v8i16_post_imm_st1x2: 4350 ;CHECK: st1.8h { v0, v1 }, [x0], #32 4351 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) 4352 %tmp = getelementptr i16, i16* %A, i32 16 4353 ret i16* %tmp 4354 } 4355 4356 define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { 4357 ;CHECK-LABEL: test_v8i16_post_reg_st1x2: 4358 ;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}} 4359 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A) 4360 %tmp = getelementptr i16, i16* %A, i64 %inc 4361 ret i16* %tmp 4362 } 4363 4364 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) 4365 4366 4367 define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { 4368 ;CHECK-LABEL: test_v4i16_post_imm_st1x2: 4369 ;CHECK: st1.4h { v0, v1 }, [x0], #16 4370 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) 4371 %tmp = getelementptr i16, i16* %A, i32 8 4372 ret i16* %tmp 4373 } 4374 4375 define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { 4376 ;CHECK-LABEL: test_v4i16_post_reg_st1x2: 4377 ;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}} 4378 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A) 4379 %tmp = getelementptr i16, i16* %A, i64 %inc 4380 ret i16* %tmp 4381 } 4382 4383 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) 4384 4385 4386 define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { 4387 ;CHECK-LABEL: test_v4i32_post_imm_st1x2: 4388 ;CHECK: st1.4s { v0, v1 }, [x0], #32 4389 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) 4390 %tmp = getelementptr i32, i32* %A, i32 8 4391 ret i32* %tmp 4392 } 4393 4394 define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { 4395 ;CHECK-LABEL: test_v4i32_post_reg_st1x2: 4396 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} 4397 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A) 4398 %tmp = getelementptr i32, i32* %A, i64 %inc 4399 ret i32* %tmp 4400 } 4401 4402 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) 4403 4404 4405 define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { 4406 ;CHECK-LABEL: test_v2i32_post_imm_st1x2: 4407 ;CHECK: st1.2s { v0, v1 }, [x0], #16 4408 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) 4409 %tmp = getelementptr i32, i32* %A, i32 4 4410 ret i32* %tmp 4411 } 4412 4413 define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { 4414 ;CHECK-LABEL: test_v2i32_post_reg_st1x2: 4415 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} 4416 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A) 4417 %tmp = getelementptr i32, i32* %A, i64 %inc 4418 ret i32* %tmp 4419 } 4420 4421 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) 4422 4423 4424 define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { 4425 ;CHECK-LABEL: test_v2i64_post_imm_st1x2: 4426 ;CHECK: st1.2d { v0, v1 }, [x0], #32 4427 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) 4428 %tmp = getelementptr i64, i64* %A, i64 4 4429 ret i64* %tmp 4430 } 4431 4432 define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { 4433 ;CHECK-LABEL: test_v2i64_post_reg_st1x2: 4434 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} 4435 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A) 4436 %tmp = getelementptr i64, i64* %A, i64 %inc 4437 ret i64* %tmp 4438 } 4439 4440 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) 4441 4442 4443 define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { 4444 ;CHECK-LABEL: test_v1i64_post_imm_st1x2: 4445 ;CHECK: st1.1d { v0, v1 }, [x0], #16 4446 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) 4447 %tmp = getelementptr i64, i64* %A, i64 2 4448 ret i64* %tmp 4449 } 4450 4451 define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { 4452 ;CHECK-LABEL: test_v1i64_post_reg_st1x2: 4453 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} 4454 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A) 4455 %tmp = getelementptr i64, i64* %A, i64 %inc 4456 ret i64* %tmp 4457 } 4458 4459 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) 4460 4461 4462 define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { 4463 ;CHECK-LABEL: test_v4f32_post_imm_st1x2: 4464 ;CHECK: st1.4s { v0, v1 }, [x0], #32 4465 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) 4466 %tmp = getelementptr float, float* %A, i32 8 4467 ret float* %tmp 4468 } 4469 4470 define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { 4471 ;CHECK-LABEL: test_v4f32_post_reg_st1x2: 4472 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}} 4473 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A) 4474 %tmp = getelementptr float, float* %A, i64 %inc 4475 ret float* %tmp 4476 } 4477 4478 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) 4479 4480 4481 define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { 4482 ;CHECK-LABEL: test_v2f32_post_imm_st1x2: 4483 ;CHECK: st1.2s { v0, v1 }, [x0], #16 4484 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) 4485 %tmp = getelementptr float, float* %A, i32 4 4486 ret float* %tmp 4487 } 4488 4489 define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { 4490 ;CHECK-LABEL: test_v2f32_post_reg_st1x2: 4491 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}} 4492 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A) 4493 %tmp = getelementptr float, float* %A, i64 %inc 4494 ret float* %tmp 4495 } 4496 4497 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) 4498 4499 4500 define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { 4501 ;CHECK-LABEL: test_v2f64_post_imm_st1x2: 4502 ;CHECK: st1.2d { v0, v1 }, [x0], #32 4503 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) 4504 %tmp = getelementptr double, double* %A, i64 4 4505 ret double* %tmp 4506 } 4507 4508 define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { 4509 ;CHECK-LABEL: test_v2f64_post_reg_st1x2: 4510 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}} 4511 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A) 4512 %tmp = getelementptr double, double* %A, i64 %inc 4513 ret double* %tmp 4514 } 4515 4516 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) 4517 4518 4519 define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { 4520 ;CHECK-LABEL: test_v1f64_post_imm_st1x2: 4521 ;CHECK: st1.1d { v0, v1 }, [x0], #16 4522 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) 4523 %tmp = getelementptr double, double* %A, i64 2 4524 ret double* %tmp 4525 } 4526 4527 define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { 4528 ;CHECK-LABEL: test_v1f64_post_reg_st1x2: 4529 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}} 4530 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A) 4531 %tmp = getelementptr double, double* %A, i64 %inc 4532 ret double* %tmp 4533 } 4534 4535 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) 4536 4537 4538 define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { 4539 ;CHECK-LABEL: test_v16i8_post_imm_st1x3: 4540 ;CHECK: st1.16b { v0, v1, v2 }, [x0], #48 4541 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) 4542 %tmp = getelementptr i8, i8* %A, i32 48 4543 ret i8* %tmp 4544 } 4545 4546 define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { 4547 ;CHECK-LABEL: test_v16i8_post_reg_st1x3: 4548 ;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}} 4549 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A) 4550 %tmp = getelementptr i8, i8* %A, i64 %inc 4551 ret i8* %tmp 4552 } 4553 4554 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) 4555 4556 4557 define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { 4558 ;CHECK-LABEL: test_v8i8_post_imm_st1x3: 4559 ;CHECK: st1.8b { v0, v1, v2 }, [x0], #24 4560 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) 4561 %tmp = getelementptr i8, i8* %A, i32 24 4562 ret i8* %tmp 4563 } 4564 4565 define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { 4566 ;CHECK-LABEL: test_v8i8_post_reg_st1x3: 4567 ;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}} 4568 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A) 4569 %tmp = getelementptr i8, i8* %A, i64 %inc 4570 ret i8* %tmp 4571 } 4572 4573 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) 4574 4575 4576 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { 4577 ;CHECK-LABEL: test_v8i16_post_imm_st1x3: 4578 ;CHECK: st1.8h { v0, v1, v2 }, [x0], #48 4579 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) 4580 %tmp = getelementptr i16, i16* %A, i32 24 4581 ret i16* %tmp 4582 } 4583 4584 define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { 4585 ;CHECK-LABEL: test_v8i16_post_reg_st1x3: 4586 ;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}} 4587 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A) 4588 %tmp = getelementptr i16, i16* %A, i64 %inc 4589 ret i16* %tmp 4590 } 4591 4592 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) 4593 4594 4595 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { 4596 ;CHECK-LABEL: test_v4i16_post_imm_st1x3: 4597 ;CHECK: st1.4h { v0, v1, v2 }, [x0], #24 4598 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) 4599 %tmp = getelementptr i16, i16* %A, i32 12 4600 ret i16* %tmp 4601 } 4602 4603 define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { 4604 ;CHECK-LABEL: test_v4i16_post_reg_st1x3: 4605 ;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}} 4606 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A) 4607 %tmp = getelementptr i16, i16* %A, i64 %inc 4608 ret i16* %tmp 4609 } 4610 4611 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) 4612 4613 4614 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { 4615 ;CHECK-LABEL: test_v4i32_post_imm_st1x3: 4616 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 4617 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) 4618 %tmp = getelementptr i32, i32* %A, i32 12 4619 ret i32* %tmp 4620 } 4621 4622 define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { 4623 ;CHECK-LABEL: test_v4i32_post_reg_st1x3: 4624 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4625 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A) 4626 %tmp = getelementptr i32, i32* %A, i64 %inc 4627 ret i32* %tmp 4628 } 4629 4630 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) 4631 4632 4633 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { 4634 ;CHECK-LABEL: test_v2i32_post_imm_st1x3: 4635 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 4636 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) 4637 %tmp = getelementptr i32, i32* %A, i32 6 4638 ret i32* %tmp 4639 } 4640 4641 define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { 4642 ;CHECK-LABEL: test_v2i32_post_reg_st1x3: 4643 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4644 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A) 4645 %tmp = getelementptr i32, i32* %A, i64 %inc 4646 ret i32* %tmp 4647 } 4648 4649 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) 4650 4651 4652 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { 4653 ;CHECK-LABEL: test_v2i64_post_imm_st1x3: 4654 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 4655 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) 4656 %tmp = getelementptr i64, i64* %A, i64 6 4657 ret i64* %tmp 4658 } 4659 4660 define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { 4661 ;CHECK-LABEL: test_v2i64_post_reg_st1x3: 4662 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4663 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A) 4664 %tmp = getelementptr i64, i64* %A, i64 %inc 4665 ret i64* %tmp 4666 } 4667 4668 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) 4669 4670 4671 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { 4672 ;CHECK-LABEL: test_v1i64_post_imm_st1x3: 4673 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 4674 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) 4675 %tmp = getelementptr i64, i64* %A, i64 3 4676 ret i64* %tmp 4677 } 4678 4679 define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { 4680 ;CHECK-LABEL: test_v1i64_post_reg_st1x3: 4681 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4682 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A) 4683 %tmp = getelementptr i64, i64* %A, i64 %inc 4684 ret i64* %tmp 4685 } 4686 4687 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) 4688 4689 4690 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { 4691 ;CHECK-LABEL: test_v4f32_post_imm_st1x3: 4692 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48 4693 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) 4694 %tmp = getelementptr float, float* %A, i32 12 4695 ret float* %tmp 4696 } 4697 4698 define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { 4699 ;CHECK-LABEL: test_v4f32_post_reg_st1x3: 4700 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4701 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A) 4702 %tmp = getelementptr float, float* %A, i64 %inc 4703 ret float* %tmp 4704 } 4705 4706 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) 4707 4708 4709 define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { 4710 ;CHECK-LABEL: test_v2f32_post_imm_st1x3: 4711 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24 4712 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) 4713 %tmp = getelementptr float, float* %A, i32 6 4714 ret float* %tmp 4715 } 4716 4717 define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { 4718 ;CHECK-LABEL: test_v2f32_post_reg_st1x3: 4719 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}} 4720 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A) 4721 %tmp = getelementptr float, float* %A, i64 %inc 4722 ret float* %tmp 4723 } 4724 4725 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) 4726 4727 4728 define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { 4729 ;CHECK-LABEL: test_v2f64_post_imm_st1x3: 4730 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48 4731 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) 4732 %tmp = getelementptr double, double* %A, i64 6 4733 ret double* %tmp 4734 } 4735 4736 define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { 4737 ;CHECK-LABEL: test_v2f64_post_reg_st1x3: 4738 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4739 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A) 4740 %tmp = getelementptr double, double* %A, i64 %inc 4741 ret double* %tmp 4742 } 4743 4744 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) 4745 4746 4747 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { 4748 ;CHECK-LABEL: test_v1f64_post_imm_st1x3: 4749 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24 4750 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) 4751 %tmp = getelementptr double, double* %A, i64 3 4752 ret double* %tmp 4753 } 4754 4755 define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { 4756 ;CHECK-LABEL: test_v1f64_post_reg_st1x3: 4757 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}} 4758 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A) 4759 %tmp = getelementptr double, double* %A, i64 %inc 4760 ret double* %tmp 4761 } 4762 4763 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) 4764 4765 4766 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { 4767 ;CHECK-LABEL: test_v16i8_post_imm_st1x4: 4768 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64 4769 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) 4770 %tmp = getelementptr i8, i8* %A, i32 64 4771 ret i8* %tmp 4772 } 4773 4774 define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { 4775 ;CHECK-LABEL: test_v16i8_post_reg_st1x4: 4776 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4777 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A) 4778 %tmp = getelementptr i8, i8* %A, i64 %inc 4779 ret i8* %tmp 4780 } 4781 4782 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) 4783 4784 4785 define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { 4786 ;CHECK-LABEL: test_v8i8_post_imm_st1x4: 4787 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32 4788 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) 4789 %tmp = getelementptr i8, i8* %A, i32 32 4790 ret i8* %tmp 4791 } 4792 4793 define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { 4794 ;CHECK-LABEL: test_v8i8_post_reg_st1x4: 4795 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4796 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A) 4797 %tmp = getelementptr i8, i8* %A, i64 %inc 4798 ret i8* %tmp 4799 } 4800 4801 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) 4802 4803 4804 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { 4805 ;CHECK-LABEL: test_v8i16_post_imm_st1x4: 4806 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64 4807 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) 4808 %tmp = getelementptr i16, i16* %A, i32 32 4809 ret i16* %tmp 4810 } 4811 4812 define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { 4813 ;CHECK-LABEL: test_v8i16_post_reg_st1x4: 4814 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4815 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A) 4816 %tmp = getelementptr i16, i16* %A, i64 %inc 4817 ret i16* %tmp 4818 } 4819 4820 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) 4821 4822 4823 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { 4824 ;CHECK-LABEL: test_v4i16_post_imm_st1x4: 4825 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32 4826 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) 4827 %tmp = getelementptr i16, i16* %A, i32 16 4828 ret i16* %tmp 4829 } 4830 4831 define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { 4832 ;CHECK-LABEL: test_v4i16_post_reg_st1x4: 4833 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4834 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A) 4835 %tmp = getelementptr i16, i16* %A, i64 %inc 4836 ret i16* %tmp 4837 } 4838 4839 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*) 4840 4841 4842 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { 4843 ;CHECK-LABEL: test_v4i32_post_imm_st1x4: 4844 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 4845 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) 4846 %tmp = getelementptr i32, i32* %A, i32 16 4847 ret i32* %tmp 4848 } 4849 4850 define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { 4851 ;CHECK-LABEL: test_v4i32_post_reg_st1x4: 4852 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4853 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A) 4854 %tmp = getelementptr i32, i32* %A, i64 %inc 4855 ret i32* %tmp 4856 } 4857 4858 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*) 4859 4860 4861 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { 4862 ;CHECK-LABEL: test_v2i32_post_imm_st1x4: 4863 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 4864 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) 4865 %tmp = getelementptr i32, i32* %A, i32 8 4866 ret i32* %tmp 4867 } 4868 4869 define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { 4870 ;CHECK-LABEL: test_v2i32_post_reg_st1x4: 4871 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4872 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A) 4873 %tmp = getelementptr i32, i32* %A, i64 %inc 4874 ret i32* %tmp 4875 } 4876 4877 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) 4878 4879 4880 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { 4881 ;CHECK-LABEL: test_v2i64_post_imm_st1x4: 4882 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 4883 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) 4884 %tmp = getelementptr i64, i64* %A, i64 8 4885 ret i64* %tmp 4886 } 4887 4888 define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { 4889 ;CHECK-LABEL: test_v2i64_post_reg_st1x4: 4890 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4891 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A) 4892 %tmp = getelementptr i64, i64* %A, i64 %inc 4893 ret i64* %tmp 4894 } 4895 4896 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*) 4897 4898 4899 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { 4900 ;CHECK-LABEL: test_v1i64_post_imm_st1x4: 4901 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 4902 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) 4903 %tmp = getelementptr i64, i64* %A, i64 4 4904 ret i64* %tmp 4905 } 4906 4907 define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { 4908 ;CHECK-LABEL: test_v1i64_post_reg_st1x4: 4909 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4910 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A) 4911 %tmp = getelementptr i64, i64* %A, i64 %inc 4912 ret i64* %tmp 4913 } 4914 4915 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*) 4916 4917 4918 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { 4919 ;CHECK-LABEL: test_v4f32_post_imm_st1x4: 4920 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64 4921 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) 4922 %tmp = getelementptr float, float* %A, i32 16 4923 ret float* %tmp 4924 } 4925 4926 define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { 4927 ;CHECK-LABEL: test_v4f32_post_reg_st1x4: 4928 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4929 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A) 4930 %tmp = getelementptr float, float* %A, i64 %inc 4931 ret float* %tmp 4932 } 4933 4934 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) 4935 4936 4937 define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { 4938 ;CHECK-LABEL: test_v2f32_post_imm_st1x4: 4939 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32 4940 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) 4941 %tmp = getelementptr float, float* %A, i32 8 4942 ret float* %tmp 4943 } 4944 4945 define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { 4946 ;CHECK-LABEL: test_v2f32_post_reg_st1x4: 4947 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4948 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A) 4949 %tmp = getelementptr float, float* %A, i64 %inc 4950 ret float* %tmp 4951 } 4952 4953 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) 4954 4955 4956 define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { 4957 ;CHECK-LABEL: test_v2f64_post_imm_st1x4: 4958 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64 4959 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) 4960 %tmp = getelementptr double, double* %A, i64 8 4961 ret double* %tmp 4962 } 4963 4964 define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { 4965 ;CHECK-LABEL: test_v2f64_post_reg_st1x4: 4966 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4967 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A) 4968 %tmp = getelementptr double, double* %A, i64 %inc 4969 ret double* %tmp 4970 } 4971 4972 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*) 4973 4974 4975 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { 4976 ;CHECK-LABEL: test_v1f64_post_imm_st1x4: 4977 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32 4978 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) 4979 %tmp = getelementptr double, double* %A, i64 4 4980 ret double* %tmp 4981 } 4982 4983 define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { 4984 ;CHECK-LABEL: test_v1f64_post_reg_st1x4: 4985 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}} 4986 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A) 4987 %tmp = getelementptr double, double* %A, i64 %inc 4988 ret double* %tmp 4989 } 4990 4991 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) 4992 4993 4994 define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) { 4995 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) 4996 %tmp = getelementptr i8, i8* %A, i32 2 4997 ret i8* %tmp 4998 } 4999 5000 define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) { 5001 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A) 5002 %tmp = getelementptr i8, i8* %A, i64 %inc 5003 ret i8* %tmp 5004 } 5005 5006 declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone 5007 5008 5009 define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { 5010 ;CHECK-LABEL: test_v16i8_post_imm_st2lane: 5011 ;CHECK: st2.b { v0, v1 }[0], [x0], #2 5012 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) 5013 %tmp = getelementptr i8, i8* %A, i32 2 5014 ret i8* %tmp 5015 } 5016 5017 define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { 5018 ;CHECK-LABEL: test_v16i8_post_reg_st2lane: 5019 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} 5020 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A) 5021 %tmp = getelementptr i8, i8* %A, i64 %inc 5022 ret i8* %tmp 5023 } 5024 5025 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) 5026 5027 5028 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { 5029 ;CHECK-LABEL: test_v8i8_post_imm_st2lane: 5030 ;CHECK: st2.b { v0, v1 }[0], [x0], #2 5031 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) 5032 %tmp = getelementptr i8, i8* %A, i32 2 5033 ret i8* %tmp 5034 } 5035 5036 define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { 5037 ;CHECK-LABEL: test_v8i8_post_reg_st2lane: 5038 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}} 5039 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A) 5040 %tmp = getelementptr i8, i8* %A, i64 %inc 5041 ret i8* %tmp 5042 } 5043 5044 declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) 5045 5046 5047 define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { 5048 ;CHECK-LABEL: test_v8i16_post_imm_st2lane: 5049 ;CHECK: st2.h { v0, v1 }[0], [x0], #4 5050 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) 5051 %tmp = getelementptr i16, i16* %A, i32 2 5052 ret i16* %tmp 5053 } 5054 5055 define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind { 5056 ;CHECK-LABEL: test_v8i16_post_reg_st2lane: 5057 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} 5058 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A) 5059 %tmp = getelementptr i16, i16* %A, i64 %inc 5060 ret i16* %tmp 5061 } 5062 5063 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) 5064 5065 5066 define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { 5067 ;CHECK-LABEL: test_v4i16_post_imm_st2lane: 5068 ;CHECK: st2.h { v0, v1 }[0], [x0], #4 5069 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) 5070 %tmp = getelementptr i16, i16* %A, i32 2 5071 ret i16* %tmp 5072 } 5073 5074 define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind { 5075 ;CHECK-LABEL: test_v4i16_post_reg_st2lane: 5076 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}} 5077 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A) 5078 %tmp = getelementptr i16, i16* %A, i64 %inc 5079 ret i16* %tmp 5080 } 5081 5082 declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) 5083 5084 5085 define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { 5086 ;CHECK-LABEL: test_v4i32_post_imm_st2lane: 5087 ;CHECK: st2.s { v0, v1 }[0], [x0], #8 5088 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) 5089 %tmp = getelementptr i32, i32* %A, i32 2 5090 ret i32* %tmp 5091 } 5092 5093 define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind { 5094 ;CHECK-LABEL: test_v4i32_post_reg_st2lane: 5095 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 5096 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A) 5097 %tmp = getelementptr i32, i32* %A, i64 %inc 5098 ret i32* %tmp 5099 } 5100 5101 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) 5102 5103 5104 define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { 5105 ;CHECK-LABEL: test_v2i32_post_imm_st2lane: 5106 ;CHECK: st2.s { v0, v1 }[0], [x0], #8 5107 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) 5108 %tmp = getelementptr i32, i32* %A, i32 2 5109 ret i32* %tmp 5110 } 5111 5112 define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind { 5113 ;CHECK-LABEL: test_v2i32_post_reg_st2lane: 5114 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 5115 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A) 5116 %tmp = getelementptr i32, i32* %A, i64 %inc 5117 ret i32* %tmp 5118 } 5119 5120 declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) 5121 5122 5123 define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { 5124 ;CHECK-LABEL: test_v2i64_post_imm_st2lane: 5125 ;CHECK: st2.d { v0, v1 }[0], [x0], #16 5126 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) 5127 %tmp = getelementptr i64, i64* %A, i64 2 5128 ret i64* %tmp 5129 } 5130 5131 define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind { 5132 ;CHECK-LABEL: test_v2i64_post_reg_st2lane: 5133 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 5134 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A) 5135 %tmp = getelementptr i64, i64* %A, i64 %inc 5136 ret i64* %tmp 5137 } 5138 5139 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) 5140 5141 5142 define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { 5143 ;CHECK-LABEL: test_v1i64_post_imm_st2lane: 5144 ;CHECK: st2.d { v0, v1 }[0], [x0], #16 5145 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) 5146 %tmp = getelementptr i64, i64* %A, i64 2 5147 ret i64* %tmp 5148 } 5149 5150 define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind { 5151 ;CHECK-LABEL: test_v1i64_post_reg_st2lane: 5152 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 5153 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A) 5154 %tmp = getelementptr i64, i64* %A, i64 %inc 5155 ret i64* %tmp 5156 } 5157 5158 declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) 5159 5160 5161 define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind { 5162 ;CHECK-LABEL: test_v4f32_post_imm_st2lane: 5163 ;CHECK: st2.s { v0, v1 }[0], [x0], #8 5164 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) 5165 %tmp = getelementptr float, float* %A, i32 2 5166 ret float* %tmp 5167 } 5168 5169 define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind { 5170 ;CHECK-LABEL: test_v4f32_post_reg_st2lane: 5171 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 5172 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A) 5173 %tmp = getelementptr float, float* %A, i64 %inc 5174 ret float* %tmp 5175 } 5176 5177 declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) 5178 5179 5180 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind { 5181 ;CHECK-LABEL: test_v2f32_post_imm_st2lane: 5182 ;CHECK: st2.s { v0, v1 }[0], [x0], #8 5183 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) 5184 %tmp = getelementptr float, float* %A, i32 2 5185 ret float* %tmp 5186 } 5187 5188 define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind { 5189 ;CHECK-LABEL: test_v2f32_post_reg_st2lane: 5190 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}} 5191 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A) 5192 %tmp = getelementptr float, float* %A, i64 %inc 5193 ret float* %tmp 5194 } 5195 5196 declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) 5197 5198 5199 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind { 5200 ;CHECK-LABEL: test_v2f64_post_imm_st2lane: 5201 ;CHECK: st2.d { v0, v1 }[0], [x0], #16 5202 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) 5203 %tmp = getelementptr double, double* %A, i64 2 5204 ret double* %tmp 5205 } 5206 5207 define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind { 5208 ;CHECK-LABEL: test_v2f64_post_reg_st2lane: 5209 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 5210 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A) 5211 %tmp = getelementptr double, double* %A, i64 %inc 5212 ret double* %tmp 5213 } 5214 5215 declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) 5216 5217 5218 define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind { 5219 ;CHECK-LABEL: test_v1f64_post_imm_st2lane: 5220 ;CHECK: st2.d { v0, v1 }[0], [x0], #16 5221 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) 5222 %tmp = getelementptr double, double* %A, i64 2 5223 ret double* %tmp 5224 } 5225 5226 define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind { 5227 ;CHECK-LABEL: test_v1f64_post_reg_st2lane: 5228 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}} 5229 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A) 5230 %tmp = getelementptr double, double* %A, i64 %inc 5231 ret double* %tmp 5232 } 5233 5234 declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) 5235 5236 5237 define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { 5238 ;CHECK-LABEL: test_v16i8_post_imm_st3lane: 5239 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 5240 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) 5241 %tmp = getelementptr i8, i8* %A, i32 3 5242 ret i8* %tmp 5243 } 5244 5245 define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { 5246 ;CHECK-LABEL: test_v16i8_post_reg_st3lane: 5247 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5248 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A) 5249 %tmp = getelementptr i8, i8* %A, i64 %inc 5250 ret i8* %tmp 5251 } 5252 5253 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) 5254 5255 5256 define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { 5257 ;CHECK-LABEL: test_v8i8_post_imm_st3lane: 5258 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3 5259 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) 5260 %tmp = getelementptr i8, i8* %A, i32 3 5261 ret i8* %tmp 5262 } 5263 5264 define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { 5265 ;CHECK-LABEL: test_v8i8_post_reg_st3lane: 5266 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5267 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A) 5268 %tmp = getelementptr i8, i8* %A, i64 %inc 5269 ret i8* %tmp 5270 } 5271 5272 declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) 5273 5274 5275 define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { 5276 ;CHECK-LABEL: test_v8i16_post_imm_st3lane: 5277 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 5278 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) 5279 %tmp = getelementptr i16, i16* %A, i32 3 5280 ret i16* %tmp 5281 } 5282 5283 define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { 5284 ;CHECK-LABEL: test_v8i16_post_reg_st3lane: 5285 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5286 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A) 5287 %tmp = getelementptr i16, i16* %A, i64 %inc 5288 ret i16* %tmp 5289 } 5290 5291 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) 5292 5293 5294 define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { 5295 ;CHECK-LABEL: test_v4i16_post_imm_st3lane: 5296 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6 5297 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) 5298 %tmp = getelementptr i16, i16* %A, i32 3 5299 ret i16* %tmp 5300 } 5301 5302 define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { 5303 ;CHECK-LABEL: test_v4i16_post_reg_st3lane: 5304 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5305 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A) 5306 %tmp = getelementptr i16, i16* %A, i64 %inc 5307 ret i16* %tmp 5308 } 5309 5310 declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) 5311 5312 5313 define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { 5314 ;CHECK-LABEL: test_v4i32_post_imm_st3lane: 5315 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 5316 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) 5317 %tmp = getelementptr i32, i32* %A, i32 3 5318 ret i32* %tmp 5319 } 5320 5321 define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { 5322 ;CHECK-LABEL: test_v4i32_post_reg_st3lane: 5323 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5324 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A) 5325 %tmp = getelementptr i32, i32* %A, i64 %inc 5326 ret i32* %tmp 5327 } 5328 5329 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) 5330 5331 5332 define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { 5333 ;CHECK-LABEL: test_v2i32_post_imm_st3lane: 5334 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 5335 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) 5336 %tmp = getelementptr i32, i32* %A, i32 3 5337 ret i32* %tmp 5338 } 5339 5340 define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { 5341 ;CHECK-LABEL: test_v2i32_post_reg_st3lane: 5342 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5343 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A) 5344 %tmp = getelementptr i32, i32* %A, i64 %inc 5345 ret i32* %tmp 5346 } 5347 5348 declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) 5349 5350 5351 define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { 5352 ;CHECK-LABEL: test_v2i64_post_imm_st3lane: 5353 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 5354 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) 5355 %tmp = getelementptr i64, i64* %A, i64 3 5356 ret i64* %tmp 5357 } 5358 5359 define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { 5360 ;CHECK-LABEL: test_v2i64_post_reg_st3lane: 5361 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5362 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A) 5363 %tmp = getelementptr i64, i64* %A, i64 %inc 5364 ret i64* %tmp 5365 } 5366 5367 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) 5368 5369 5370 define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { 5371 ;CHECK-LABEL: test_v1i64_post_imm_st3lane: 5372 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 5373 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) 5374 %tmp = getelementptr i64, i64* %A, i64 3 5375 ret i64* %tmp 5376 } 5377 5378 define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { 5379 ;CHECK-LABEL: test_v1i64_post_reg_st3lane: 5380 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5381 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A) 5382 %tmp = getelementptr i64, i64* %A, i64 %inc 5383 ret i64* %tmp 5384 } 5385 5386 declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) 5387 5388 5389 define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { 5390 ;CHECK-LABEL: test_v4f32_post_imm_st3lane: 5391 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 5392 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) 5393 %tmp = getelementptr float, float* %A, i32 3 5394 ret float* %tmp 5395 } 5396 5397 define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { 5398 ;CHECK-LABEL: test_v4f32_post_reg_st3lane: 5399 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5400 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A) 5401 %tmp = getelementptr float, float* %A, i64 %inc 5402 ret float* %tmp 5403 } 5404 5405 declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) 5406 5407 5408 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { 5409 ;CHECK-LABEL: test_v2f32_post_imm_st3lane: 5410 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12 5411 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) 5412 %tmp = getelementptr float, float* %A, i32 3 5413 ret float* %tmp 5414 } 5415 5416 define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { 5417 ;CHECK-LABEL: test_v2f32_post_reg_st3lane: 5418 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5419 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A) 5420 %tmp = getelementptr float, float* %A, i64 %inc 5421 ret float* %tmp 5422 } 5423 5424 declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) 5425 5426 5427 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { 5428 ;CHECK-LABEL: test_v2f64_post_imm_st3lane: 5429 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 5430 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) 5431 %tmp = getelementptr double, double* %A, i64 3 5432 ret double* %tmp 5433 } 5434 5435 define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { 5436 ;CHECK-LABEL: test_v2f64_post_reg_st3lane: 5437 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5438 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A) 5439 %tmp = getelementptr double, double* %A, i64 %inc 5440 ret double* %tmp 5441 } 5442 5443 declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) 5444 5445 5446 define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { 5447 ;CHECK-LABEL: test_v1f64_post_imm_st3lane: 5448 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24 5449 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) 5450 %tmp = getelementptr double, double* %A, i64 3 5451 ret double* %tmp 5452 } 5453 5454 define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { 5455 ;CHECK-LABEL: test_v1f64_post_reg_st3lane: 5456 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}} 5457 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A) 5458 %tmp = getelementptr double, double* %A, i64 %inc 5459 ret double* %tmp 5460 } 5461 5462 declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) 5463 5464 5465 define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { 5466 ;CHECK-LABEL: test_v16i8_post_imm_st4lane: 5467 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 5468 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) 5469 %tmp = getelementptr i8, i8* %A, i32 4 5470 ret i8* %tmp 5471 } 5472 5473 define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { 5474 ;CHECK-LABEL: test_v16i8_post_reg_st4lane: 5475 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5476 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A) 5477 %tmp = getelementptr i8, i8* %A, i64 %inc 5478 ret i8* %tmp 5479 } 5480 5481 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) 5482 5483 5484 define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { 5485 ;CHECK-LABEL: test_v8i8_post_imm_st4lane: 5486 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4 5487 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) 5488 %tmp = getelementptr i8, i8* %A, i32 4 5489 ret i8* %tmp 5490 } 5491 5492 define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { 5493 ;CHECK-LABEL: test_v8i8_post_reg_st4lane: 5494 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5495 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A) 5496 %tmp = getelementptr i8, i8* %A, i64 %inc 5497 ret i8* %tmp 5498 } 5499 5500 declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) 5501 5502 5503 define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { 5504 ;CHECK-LABEL: test_v8i16_post_imm_st4lane: 5505 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 5506 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) 5507 %tmp = getelementptr i16, i16* %A, i32 4 5508 ret i16* %tmp 5509 } 5510 5511 define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { 5512 ;CHECK-LABEL: test_v8i16_post_reg_st4lane: 5513 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5514 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A) 5515 %tmp = getelementptr i16, i16* %A, i64 %inc 5516 ret i16* %tmp 5517 } 5518 5519 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) 5520 5521 5522 define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { 5523 ;CHECK-LABEL: test_v4i16_post_imm_st4lane: 5524 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8 5525 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) 5526 %tmp = getelementptr i16, i16* %A, i32 4 5527 ret i16* %tmp 5528 } 5529 5530 define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { 5531 ;CHECK-LABEL: test_v4i16_post_reg_st4lane: 5532 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5533 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A) 5534 %tmp = getelementptr i16, i16* %A, i64 %inc 5535 ret i16* %tmp 5536 } 5537 5538 declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) 5539 5540 5541 define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { 5542 ;CHECK-LABEL: test_v4i32_post_imm_st4lane: 5543 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 5544 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) 5545 %tmp = getelementptr i32, i32* %A, i32 4 5546 ret i32* %tmp 5547 } 5548 5549 define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { 5550 ;CHECK-LABEL: test_v4i32_post_reg_st4lane: 5551 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5552 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A) 5553 %tmp = getelementptr i32, i32* %A, i64 %inc 5554 ret i32* %tmp 5555 } 5556 5557 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) 5558 5559 5560 define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { 5561 ;CHECK-LABEL: test_v2i32_post_imm_st4lane: 5562 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 5563 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) 5564 %tmp = getelementptr i32, i32* %A, i32 4 5565 ret i32* %tmp 5566 } 5567 5568 define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { 5569 ;CHECK-LABEL: test_v2i32_post_reg_st4lane: 5570 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5571 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A) 5572 %tmp = getelementptr i32, i32* %A, i64 %inc 5573 ret i32* %tmp 5574 } 5575 5576 declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) 5577 5578 5579 define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { 5580 ;CHECK-LABEL: test_v2i64_post_imm_st4lane: 5581 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 5582 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) 5583 %tmp = getelementptr i64, i64* %A, i64 4 5584 ret i64* %tmp 5585 } 5586 5587 define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { 5588 ;CHECK-LABEL: test_v2i64_post_reg_st4lane: 5589 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5590 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A) 5591 %tmp = getelementptr i64, i64* %A, i64 %inc 5592 ret i64* %tmp 5593 } 5594 5595 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) 5596 5597 5598 define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { 5599 ;CHECK-LABEL: test_v1i64_post_imm_st4lane: 5600 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 5601 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) 5602 %tmp = getelementptr i64, i64* %A, i64 4 5603 ret i64* %tmp 5604 } 5605 5606 define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { 5607 ;CHECK-LABEL: test_v1i64_post_reg_st4lane: 5608 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5609 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A) 5610 %tmp = getelementptr i64, i64* %A, i64 %inc 5611 ret i64* %tmp 5612 } 5613 5614 declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) 5615 5616 5617 define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { 5618 ;CHECK-LABEL: test_v4f32_post_imm_st4lane: 5619 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 5620 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) 5621 %tmp = getelementptr float, float* %A, i32 4 5622 ret float* %tmp 5623 } 5624 5625 define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { 5626 ;CHECK-LABEL: test_v4f32_post_reg_st4lane: 5627 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5628 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A) 5629 %tmp = getelementptr float, float* %A, i64 %inc 5630 ret float* %tmp 5631 } 5632 5633 declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) 5634 5635 5636 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { 5637 ;CHECK-LABEL: test_v2f32_post_imm_st4lane: 5638 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16 5639 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) 5640 %tmp = getelementptr float, float* %A, i32 4 5641 ret float* %tmp 5642 } 5643 5644 define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { 5645 ;CHECK-LABEL: test_v2f32_post_reg_st4lane: 5646 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5647 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A) 5648 %tmp = getelementptr float, float* %A, i64 %inc 5649 ret float* %tmp 5650 } 5651 5652 declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) 5653 5654 5655 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { 5656 ;CHECK-LABEL: test_v2f64_post_imm_st4lane: 5657 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 5658 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) 5659 %tmp = getelementptr double, double* %A, i64 4 5660 ret double* %tmp 5661 } 5662 5663 define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { 5664 ;CHECK-LABEL: test_v2f64_post_reg_st4lane: 5665 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5666 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A) 5667 %tmp = getelementptr double, double* %A, i64 %inc 5668 ret double* %tmp 5669 } 5670 5671 declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) 5672 5673 5674 define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { 5675 ;CHECK-LABEL: test_v1f64_post_imm_st4lane: 5676 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32 5677 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) 5678 %tmp = getelementptr double, double* %A, i64 4 5679 ret double* %tmp 5680 } 5681 5682 define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { 5683 ;CHECK-LABEL: test_v1f64_post_reg_st4lane: 5684 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}} 5685 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A) 5686 %tmp = getelementptr double, double* %A, i64 %inc 5687 ret double* %tmp 5688 } 5689 5690 declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) 5691 5692 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) { 5693 ; CHECK-LABEL: test_v16i8_post_imm_ld1r: 5694 ; CHECK: ld1r.16b { v0 }, [x0], #1 5695 %tmp1 = load i8, i8* %bar 5696 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 5697 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 5698 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2 5699 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3 5700 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4 5701 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5 5702 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6 5703 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7 5704 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8 5705 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9 5706 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10 5707 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11 5708 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12 5709 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13 5710 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14 5711 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15 5712 %tmp18 = getelementptr i8, i8* %bar, i64 1 5713 store i8* %tmp18, i8** %ptr 5714 ret <16 x i8> %tmp17 5715 } 5716 5717 define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { 5718 ; CHECK-LABEL: test_v16i8_post_reg_ld1r: 5719 ; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}} 5720 %tmp1 = load i8, i8* %bar 5721 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 5722 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 5723 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2 5724 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3 5725 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4 5726 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5 5727 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6 5728 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7 5729 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8 5730 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9 5731 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10 5732 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11 5733 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12 5734 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13 5735 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14 5736 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15 5737 %tmp18 = getelementptr i8, i8* %bar, i64 %inc 5738 store i8* %tmp18, i8** %ptr 5739 ret <16 x i8> %tmp17 5740 } 5741 5742 define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) { 5743 ; CHECK-LABEL: test_v8i8_post_imm_ld1r: 5744 ; CHECK: ld1r.8b { v0 }, [x0], #1 5745 %tmp1 = load i8, i8* %bar 5746 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 5747 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 5748 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2 5749 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3 5750 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4 5751 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5 5752 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6 5753 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7 5754 %tmp10 = getelementptr i8, i8* %bar, i64 1 5755 store i8* %tmp10, i8** %ptr 5756 ret <8 x i8> %tmp9 5757 } 5758 5759 define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) { 5760 ; CHECK-LABEL: test_v8i8_post_reg_ld1r: 5761 ; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}} 5762 %tmp1 = load i8, i8* %bar 5763 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 5764 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 5765 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2 5766 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3 5767 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4 5768 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5 5769 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6 5770 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7 5771 %tmp10 = getelementptr i8, i8* %bar, i64 %inc 5772 store i8* %tmp10, i8** %ptr 5773 ret <8 x i8> %tmp9 5774 } 5775 5776 define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) { 5777 ; CHECK-LABEL: test_v8i16_post_imm_ld1r: 5778 ; CHECK: ld1r.8h { v0 }, [x0], #2 5779 %tmp1 = load i16, i16* %bar 5780 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 5781 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 5782 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2 5783 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3 5784 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4 5785 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5 5786 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6 5787 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7 5788 %tmp10 = getelementptr i16, i16* %bar, i64 1 5789 store i16* %tmp10, i16** %ptr 5790 ret <8 x i16> %tmp9 5791 } 5792 5793 define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { 5794 ; CHECK-LABEL: test_v8i16_post_reg_ld1r: 5795 ; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}} 5796 %tmp1 = load i16, i16* %bar 5797 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 5798 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 5799 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2 5800 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3 5801 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4 5802 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5 5803 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6 5804 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7 5805 %tmp10 = getelementptr i16, i16* %bar, i64 %inc 5806 store i16* %tmp10, i16** %ptr 5807 ret <8 x i16> %tmp9 5808 } 5809 5810 define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) { 5811 ; CHECK-LABEL: test_v4i16_post_imm_ld1r: 5812 ; CHECK: ld1r.4h { v0 }, [x0], #2 5813 %tmp1 = load i16, i16* %bar 5814 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 5815 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 5816 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2 5817 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3 5818 %tmp6 = getelementptr i16, i16* %bar, i64 1 5819 store i16* %tmp6, i16** %ptr 5820 ret <4 x i16> %tmp5 5821 } 5822 5823 define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) { 5824 ; CHECK-LABEL: test_v4i16_post_reg_ld1r: 5825 ; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}} 5826 %tmp1 = load i16, i16* %bar 5827 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0 5828 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 5829 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2 5830 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3 5831 %tmp6 = getelementptr i16, i16* %bar, i64 %inc 5832 store i16* %tmp6, i16** %ptr 5833 ret <4 x i16> %tmp5 5834 } 5835 5836 define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) { 5837 ; CHECK-LABEL: test_v4i32_post_imm_ld1r: 5838 ; CHECK: ld1r.4s { v0 }, [x0], #4 5839 %tmp1 = load i32, i32* %bar 5840 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0 5841 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 5842 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2 5843 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3 5844 %tmp6 = getelementptr i32, i32* %bar, i64 1 5845 store i32* %tmp6, i32** %ptr 5846 ret <4 x i32> %tmp5 5847 } 5848 5849 define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { 5850 ; CHECK-LABEL: test_v4i32_post_reg_ld1r: 5851 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}} 5852 %tmp1 = load i32, i32* %bar 5853 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0 5854 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 5855 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2 5856 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3 5857 %tmp6 = getelementptr i32, i32* %bar, i64 %inc 5858 store i32* %tmp6, i32** %ptr 5859 ret <4 x i32> %tmp5 5860 } 5861 5862 define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) { 5863 ; CHECK-LABEL: test_v2i32_post_imm_ld1r: 5864 ; CHECK: ld1r.2s { v0 }, [x0], #4 5865 %tmp1 = load i32, i32* %bar 5866 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0 5867 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 5868 %tmp4 = getelementptr i32, i32* %bar, i64 1 5869 store i32* %tmp4, i32** %ptr 5870 ret <2 x i32> %tmp3 5871 } 5872 5873 define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) { 5874 ; CHECK-LABEL: test_v2i32_post_reg_ld1r: 5875 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}} 5876 %tmp1 = load i32, i32* %bar 5877 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0 5878 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 5879 %tmp4 = getelementptr i32, i32* %bar, i64 %inc 5880 store i32* %tmp4, i32** %ptr 5881 ret <2 x i32> %tmp3 5882 } 5883 5884 define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) { 5885 ; CHECK-LABEL: test_v2i64_post_imm_ld1r: 5886 ; CHECK: ld1r.2d { v0 }, [x0], #8 5887 %tmp1 = load i64, i64* %bar 5888 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0 5889 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 5890 %tmp4 = getelementptr i64, i64* %bar, i64 1 5891 store i64* %tmp4, i64** %ptr 5892 ret <2 x i64> %tmp3 5893 } 5894 5895 define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) { 5896 ; CHECK-LABEL: test_v2i64_post_reg_ld1r: 5897 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}} 5898 %tmp1 = load i64, i64* %bar 5899 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0 5900 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 5901 %tmp4 = getelementptr i64, i64* %bar, i64 %inc 5902 store i64* %tmp4, i64** %ptr 5903 ret <2 x i64> %tmp3 5904 } 5905 5906 define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) { 5907 ; CHECK-LABEL: test_v4f32_post_imm_ld1r: 5908 ; CHECK: ld1r.4s { v0 }, [x0], #4 5909 %tmp1 = load float, float* %bar 5910 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0 5911 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1 5912 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2 5913 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3 5914 %tmp6 = getelementptr float, float* %bar, i64 1 5915 store float* %tmp6, float** %ptr 5916 ret <4 x float> %tmp5 5917 } 5918 5919 define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) { 5920 ; CHECK-LABEL: test_v4f32_post_reg_ld1r: 5921 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}} 5922 %tmp1 = load float, float* %bar 5923 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0 5924 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1 5925 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2 5926 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3 5927 %tmp6 = getelementptr float, float* %bar, i64 %inc 5928 store float* %tmp6, float** %ptr 5929 ret <4 x float> %tmp5 5930 } 5931 5932 define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) { 5933 ; CHECK-LABEL: test_v2f32_post_imm_ld1r: 5934 ; CHECK: ld1r.2s { v0 }, [x0], #4 5935 %tmp1 = load float, float* %bar 5936 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0 5937 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1 5938 %tmp4 = getelementptr float, float* %bar, i64 1 5939 store float* %tmp4, float** %ptr 5940 ret <2 x float> %tmp3 5941 } 5942 5943 define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) { 5944 ; CHECK-LABEL: test_v2f32_post_reg_ld1r: 5945 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}} 5946 %tmp1 = load float, float* %bar 5947 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0 5948 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1 5949 %tmp4 = getelementptr float, float* %bar, i64 %inc 5950 store float* %tmp4, float** %ptr 5951 ret <2 x float> %tmp3 5952 } 5953 5954 define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) { 5955 ; CHECK-LABEL: test_v2f64_post_imm_ld1r: 5956 ; CHECK: ld1r.2d { v0 }, [x0], #8 5957 %tmp1 = load double, double* %bar 5958 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0 5959 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1 5960 %tmp4 = getelementptr double, double* %bar, i64 1 5961 store double* %tmp4, double** %ptr 5962 ret <2 x double> %tmp3 5963 } 5964 5965 define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) { 5966 ; CHECK-LABEL: test_v2f64_post_reg_ld1r: 5967 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}} 5968 %tmp1 = load double, double* %bar 5969 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0 5970 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1 5971 %tmp4 = getelementptr double, double* %bar, i64 %inc 5972 store double* %tmp4, double** %ptr 5973 ret <2 x double> %tmp3 5974 } 5975 5976 define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) { 5977 ; CHECK-LABEL: test_v16i8_post_imm_ld1lane: 5978 ; CHECK: ld1.b { v0 }[1], [x0], #1 5979 %tmp1 = load i8, i8* %bar 5980 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 5981 %tmp3 = getelementptr i8, i8* %bar, i64 1 5982 store i8* %tmp3, i8** %ptr 5983 ret <16 x i8> %tmp2 5984 } 5985 5986 define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) { 5987 ; CHECK-LABEL: test_v16i8_post_reg_ld1lane: 5988 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}} 5989 %tmp1 = load i8, i8* %bar 5990 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1 5991 %tmp3 = getelementptr i8, i8* %bar, i64 %inc 5992 store i8* %tmp3, i8** %ptr 5993 ret <16 x i8> %tmp2 5994 } 5995 5996 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) { 5997 ; CHECK-LABEL: test_v8i8_post_imm_ld1lane: 5998 ; CHECK: ld1.b { v0 }[1], [x0], #1 5999 %tmp1 = load i8, i8* %bar 6000 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1 6001 %tmp3 = getelementptr i8, i8* %bar, i64 1 6002 store i8* %tmp3, i8** %ptr 6003 ret <8 x i8> %tmp2 6004 } 6005 6006 define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) { 6007 ; CHECK-LABEL: test_v8i8_post_reg_ld1lane: 6008 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}} 6009 %tmp1 = load i8, i8* %bar 6010 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1 6011 %tmp3 = getelementptr i8, i8* %bar, i64 %inc 6012 store i8* %tmp3, i8** %ptr 6013 ret <8 x i8> %tmp2 6014 } 6015 6016 define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) { 6017 ; CHECK-LABEL: test_v8i16_post_imm_ld1lane: 6018 ; CHECK: ld1.h { v0 }[1], [x0], #2 6019 %tmp1 = load i16, i16* %bar 6020 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1 6021 %tmp3 = getelementptr i16, i16* %bar, i64 1 6022 store i16* %tmp3, i16** %ptr 6023 ret <8 x i16> %tmp2 6024 } 6025 6026 define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) { 6027 ; CHECK-LABEL: test_v8i16_post_reg_ld1lane: 6028 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} 6029 %tmp1 = load i16, i16* %bar 6030 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1 6031 %tmp3 = getelementptr i16, i16* %bar, i64 %inc 6032 store i16* %tmp3, i16** %ptr 6033 ret <8 x i16> %tmp2 6034 } 6035 6036 define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) { 6037 ; CHECK-LABEL: test_v4i16_post_imm_ld1lane: 6038 ; CHECK: ld1.h { v0 }[1], [x0], #2 6039 %tmp1 = load i16, i16* %bar 6040 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 6041 %tmp3 = getelementptr i16, i16* %bar, i64 1 6042 store i16* %tmp3, i16** %ptr 6043 ret <4 x i16> %tmp2 6044 } 6045 6046 define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) { 6047 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane: 6048 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} 6049 %tmp1 = load i16, i16* %bar 6050 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 6051 %tmp3 = getelementptr i16, i16* %bar, i64 %inc 6052 store i16* %tmp3, i16** %ptr 6053 ret <4 x i16> %tmp2 6054 } 6055 6056 define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) { 6057 ; CHECK-LABEL: test_v4i32_post_imm_ld1lane: 6058 ; CHECK: ld1.s { v0 }[1], [x0], #4 6059 %tmp1 = load i32, i32* %bar 6060 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1 6061 %tmp3 = getelementptr i32, i32* %bar, i64 1 6062 store i32* %tmp3, i32** %ptr 6063 ret <4 x i32> %tmp2 6064 } 6065 6066 define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) { 6067 ; CHECK-LABEL: test_v4i32_post_reg_ld1lane: 6068 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} 6069 %tmp1 = load i32, i32* %bar 6070 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1 6071 %tmp3 = getelementptr i32, i32* %bar, i64 %inc 6072 store i32* %tmp3, i32** %ptr 6073 ret <4 x i32> %tmp2 6074 } 6075 6076 define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) { 6077 ; CHECK-LABEL: test_v2i32_post_imm_ld1lane: 6078 ; CHECK: ld1.s { v0 }[1], [x0], #4 6079 %tmp1 = load i32, i32* %bar 6080 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1 6081 %tmp3 = getelementptr i32, i32* %bar, i64 1 6082 store i32* %tmp3, i32** %ptr 6083 ret <2 x i32> %tmp2 6084 } 6085 6086 define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) { 6087 ; CHECK-LABEL: test_v2i32_post_reg_ld1lane: 6088 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} 6089 %tmp1 = load i32, i32* %bar 6090 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1 6091 %tmp3 = getelementptr i32, i32* %bar, i64 %inc 6092 store i32* %tmp3, i32** %ptr 6093 ret <2 x i32> %tmp2 6094 } 6095 6096 define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) { 6097 ; CHECK-LABEL: test_v2i64_post_imm_ld1lane: 6098 ; CHECK: ld1.d { v0 }[1], [x0], #8 6099 %tmp1 = load i64, i64* %bar 6100 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1 6101 %tmp3 = getelementptr i64, i64* %bar, i64 1 6102 store i64* %tmp3, i64** %ptr 6103 ret <2 x i64> %tmp2 6104 } 6105 6106 define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) { 6107 ; CHECK-LABEL: test_v2i64_post_reg_ld1lane: 6108 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}} 6109 %tmp1 = load i64, i64* %bar 6110 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1 6111 %tmp3 = getelementptr i64, i64* %bar, i64 %inc 6112 store i64* %tmp3, i64** %ptr 6113 ret <2 x i64> %tmp2 6114 } 6115 6116 define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) { 6117 ; CHECK-LABEL: test_v4f32_post_imm_ld1lane: 6118 ; CHECK: ld1.s { v0 }[1], [x0], #4 6119 %tmp1 = load float, float* %bar 6120 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 6121 %tmp3 = getelementptr float, float* %bar, i64 1 6122 store float* %tmp3, float** %ptr 6123 ret <4 x float> %tmp2 6124 } 6125 6126 define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) { 6127 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane: 6128 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} 6129 %tmp1 = load float, float* %bar 6130 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 6131 %tmp3 = getelementptr float, float* %bar, i64 %inc 6132 store float* %tmp3, float** %ptr 6133 ret <4 x float> %tmp2 6134 } 6135 6136 define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) { 6137 ; CHECK-LABEL: test_v2f32_post_imm_ld1lane: 6138 ; CHECK: ld1.s { v0 }[1], [x0], #4 6139 %tmp1 = load float, float* %bar 6140 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1 6141 %tmp3 = getelementptr float, float* %bar, i64 1 6142 store float* %tmp3, float** %ptr 6143 ret <2 x float> %tmp2 6144 } 6145 6146 define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) { 6147 ; CHECK-LABEL: test_v2f32_post_reg_ld1lane: 6148 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}} 6149 %tmp1 = load float, float* %bar 6150 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1 6151 %tmp3 = getelementptr float, float* %bar, i64 %inc 6152 store float* %tmp3, float** %ptr 6153 ret <2 x float> %tmp2 6154 } 6155 6156 define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) { 6157 ; CHECK-LABEL: test_v2f64_post_imm_ld1lane: 6158 ; CHECK: ld1.d { v0 }[1], [x0], #8 6159 %tmp1 = load double, double* %bar 6160 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1 6161 %tmp3 = getelementptr double, double* %bar, i64 1 6162 store double* %tmp3, double** %ptr 6163 ret <2 x double> %tmp2 6164 } 6165 6166 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) { 6167 ; CHECK-LABEL: test_v2f64_post_reg_ld1lane: 6168 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}} 6169 %tmp1 = load double, double* %bar 6170 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1 6171 %tmp3 = getelementptr double, double* %bar, i64 %inc 6172 store double* %tmp3, double** %ptr 6173 ret <2 x double> %tmp2 6174 } 6175 6176 ; Check for dependencies between the vector and the scalar load. 6177 define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) { 6178 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load: 6179 ; CHECK: %bb.0: 6180 ; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0] 6181 ; CHECK-NEXT: str q0, [x3] 6182 ; CHECK-NEXT: ldr q0, [x4] 6183 ; CHECK-NEXT: mov.s v0[1], v[[LD]][0] 6184 ; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2 6185 ; CHECK-NEXT: str [[POST]], [x1] 6186 ; CHECK-NEXT: ret 6187 %tmp1 = load float, float* %bar 6188 store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16 6189 %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16 6190 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1 6191 %tmp3 = getelementptr float, float* %bar, i64 %inc 6192 store float* %tmp3, float** %ptr 6193 ret <4 x float> %tmp2 6194 } 6195 6196 ; Make sure that we test the narrow V64 code path. 6197 ; The tests above don't, because there, 64-bit insert_vector_elt nodes will be 6198 ; widened to 128-bit before the LD1LANEpost combine has the chance to run, 6199 ; making it avoid narrow vector types. 6200 ; One way to trick that combine into running early is to force the vector ops 6201 ; legalizer to run. We achieve that using the ctpop. 6202 ; PR23265 6203 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) { 6204 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow: 6205 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}} 6206 %tmp1 = load i16, i16* %bar 6207 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1 6208 %tmp3 = getelementptr i16, i16* %bar, i64 %inc 6209 store i16* %tmp3, i16** %ptr 6210 %dl = load <2 x i32>, <2 x i32>* %d 6211 %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl) 6212 store <2 x i32> %dr, <2 x i32>* %d 6213 ret <4 x i16> %tmp2 6214 } 6215 6216 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) 6217 6218 ; CHECK-LABEL: test_ld1lane_build: 6219 ; CHECK-DAG: ldr s[[REGNUM0:[0-9]+]], [x0] 6220 ; CHECK-DAG: ld1.s { v[[REGNUM0:[0-9]+]] }[1], [x1] 6221 ; CHECK-DAG: ldr s[[REGNUM1:[0-9]+]], [x2] 6222 ; CHECK-DAG: ld1.s { v[[REGNUM1:[0-9]+]] }[1], [x3] 6223 ; CHECK: sub.2s v[[REGNUM2:[0-9]+]], v[[REGNUM0]], v[[REGNUM1]] 6224 ; CHECK-NEXT: str d[[REGNUM2]], [x4] 6225 ; CHECK-NEXT: ret 6226 define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) { 6227 %load0 = load i32, i32* %ptr0, align 4 6228 %load1 = load i32, i32* %ptr1, align 4 6229 %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0 6230 %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1 6231 6232 %load2 = load i32, i32* %ptr2, align 4 6233 %load3 = load i32, i32* %ptr3, align 4 6234 %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0 6235 %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1 6236 6237 %sub = sub nsw <2 x i32> %vec0_1, %vec1_1 6238 store <2 x i32> %sub, <2 x i32>* %out, align 16 6239 ret void 6240 } 6241 6242 ; CHECK-LABEL: test_ld1lane_build_i16: 6243 ; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] 6244 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] 6245 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] 6246 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] 6247 ; CHECK: sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 6248 ; CHECK-NEXT: str d[[REGNUM2]], [x4] 6249 ; CHECK-NEXT: ret 6250 define void @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) { 6251 %ld.a = load i16, i16* %a 6252 %ld.b = load i16, i16* %b 6253 %ld.c = load i16, i16* %c 6254 %ld.d = load i16, i16* %d 6255 %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0 6256 %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1 6257 %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2 6258 %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3 6259 %sub = sub nsw <4 x i16> %v, %e 6260 store <4 x i16> %sub, <4 x i16>* %p 6261 ret void 6262 } 6263 6264 ; CHECK-LABEL: test_ld1lane_build_half: 6265 ; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0] 6266 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1] 6267 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2] 6268 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3] 6269 ; CHECK-DAG: fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h 6270 ; CHECK-DAG: fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h 6271 ; CHECK: fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]] 6272 ; CHECK-DAG: fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s 6273 ; CHECK-NEXT: str d[[REGNUM2]], [x4] 6274 ; CHECK-NEXT: ret 6275 define void @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) { 6276 %ld.a = load half, half* %a 6277 %ld.b = load half, half* %b 6278 %ld.c = load half, half* %c 6279 %ld.d = load half, half* %d 6280 %v.a = insertelement <4 x half> undef, half %ld.a, i64 0 6281 %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1 6282 %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2 6283 %v = insertelement <4 x half> %v.c, half %ld.d, i64 3 6284 %sub = fsub <4 x half> %v, %e 6285 store <4 x half> %sub, <4 x half>* %p 6286 ret void 6287 } 6288 6289 ; CHECK-LABEL: test_ld1lane_build_i8: 6290 ; CHECK-DAG: ldr b[[REGNUM1:[0-9]+]], [x0] 6291 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[1], [x1] 6292 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[2], [x2] 6293 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[3], [x3] 6294 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[4], [x4] 6295 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[5], [x5] 6296 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[6], [x6] 6297 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[7], [x7] 6298 ; CHECK: sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0 6299 ; CHECK-NEXT: str d[[REGNUM2]], [x 6300 ; CHECK-NEXT: ret 6301 define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) { 6302 %ld.a = load i8, i8* %a 6303 %ld.b = load i8, i8* %b 6304 %ld.c = load i8, i8* %c 6305 %ld.d = load i8, i8* %d 6306 %ld.e = load i8, i8* %e 6307 %ld.f = load i8, i8* %f 6308 %ld.g = load i8, i8* %g 6309 %ld.h = load i8, i8* %h 6310 %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0 6311 %v.b = insertelement <8 x i8> %v.a, i8 %ld.b, i64 1 6312 %v.c = insertelement <8 x i8> %v.b, i8 %ld.c, i64 2 6313 %v.d = insertelement <8 x i8> %v.c, i8 %ld.d, i64 3 6314 %v.e = insertelement <8 x i8> %v.d, i8 %ld.e, i64 4 6315 %v.f = insertelement <8 x i8> %v.e, i8 %ld.f, i64 5 6316 %v.g = insertelement <8 x i8> %v.f, i8 %ld.g, i64 6 6317 %v1 = insertelement <8 x i8> %v.g, i8 %ld.h, i64 7 6318 %sub = sub nsw <8 x i8> %v1, %v 6319 store <8 x i8> %sub, <8 x i8>* %p 6320 ret void 6321 } 6322